In [None]:
!pip install sagemaker==2.88.0 s3fs joblib scikit-learn==1.0.2 xgboost

In [None]:
import sagemaker
from sagemaker.session import Session
from sagemaker.feature_store.feature_group import FeatureGroup
# import os
# os.environ["AWS_ACCESS_KEY_ID"] = "<aws_key_id>"
# os.environ["AWS_SECRET_ACCESS_KEY"] = "<aws_secret>"
# os.environ["AWS_DEFAULT_REGION"] = "us-east-1"

role = "arn:aws:iam::<account_number>:role/sagemaker-iam-role"
FEATURE_GROUP_NAME = "telcom-customer-features"

sagemaker_session = sagemaker.Session()
region = sagemaker_session.boto_region_name
s3_bucket_name = "feast-demo-mar-2022"

customers_feature_group = FeatureGroup(
    name=FEATURE_GROUP_NAME, sagemaker_session=sagemaker_session
)

In [None]:
get_latest_snapshot_query = customers_feature_group.athena_query()
query = f"""SELECT *
FROM
    (SELECT *,
         row_number()
        OVER (PARTITION BY customerid
    ORDER BY  event_timestamp desc, Api_Invocation_Time DESC, write_time DESC) AS row_num
    FROM "{get_latest_snapshot_query.table_name}")
WHERE row_num = 1 and 
NOT is_deleted;"""


In [None]:
get_latest_snapshot_query.run(query_string=query, output_location=f"s3://{s3_bucket_name}/output")
get_latest_snapshot_query.wait()

In [None]:
churn_data = get_latest_snapshot_query.as_dataframe()
churn_data = churn_data.drop(columns=["event_timestamp", "write_time", "api_invocation_time", "is_deleted", "row_num"])

In [None]:
import boto3
from datetime import date
s3 = boto3.client('s3')
s3.download_file(s3_bucket_name, f"model-repo/customer-churn-v0.0", "customer-churn-v0.0")

In [None]:
features = churn_data.drop(['customerid', 'churn'], axis=1)
loaded_model = joblib.load('/content/customer-churn-v0.0')
prediction = loaded_model.predict(features)
prediction.tolist()

In [None]:
file_name = f"customer_churn_prediction_{date.today()}.parquet"
churn_data["predicted_churn"] = prediction.tolist()
s3_url = f's3://{s3_bucket_name}/prediction_results/{file_name}'
churn_data.to_parquet(s3_url)