In [None]:
!pip install sagemaker==2.88.0 s3fs pandas

In [None]:
import sagemaker
from sagemaker.session import Session
from sagemaker.feature_store.feature_group import FeatureGroup
# import os
# os.environ["AWS_ACCESS_KEY_ID"] = "<aws_key_id>"
# os.environ["AWS_SECRET_ACCESS_KEY"] = "<aws_secret>"
# os.environ["AWS_DEFAULT_REGION"] = "us-east-1"

role = "arn:aws:iam::<account_number>:role/sagemaker-iam-role"
FEATURE_GROUP_NAME = "telcom-customer-features"

sagemaker_session = sagemaker.Session()
region = sagemaker_session.boto_region_name
s3_bucket_name = "feast-demo-mar-2022"

customers_feature_group = FeatureGroup(
    name=FEATURE_GROUP_NAME, sagemaker_session=sagemaker_session
)

In [None]:
get_latest_snapshot_query = customers_feature_group.athena_query()
query = f"""SELECT *
FROM
    (SELECT *,
         row_number()
        OVER (PARTITION BY customerid
    ORDER BY  event_timestamp desc, Api_Invocation_Time DESC, write_time DESC) AS row_num
    FROM "{get_latest_snapshot_query.table_name}")
WHERE row_num = 1 and 
NOT is_deleted;"""

In [None]:
get_latest_snapshot_query.run(query_string=query, output_location=f"s3://{s3_bucket_name}/output")
get_latest_snapshot_query.wait()

In [None]:
churn_data = get_latest_snapshot_query.as_dataframe()
churn_data = churn_data.drop(columns=["event_timestamp", "write_time", "api_invocation_time", "is_deleted", "row_num"])

In [None]:
from datetime import date, timedelta
import pandas as pd
pred_date = date.today()-timedelta(weeks=4)
file_name = f"customer_churn_prediction_{pred_date}.parquet"
prediction_data = pd.read_parquet(f"s3://{s3_bucket_name}/prediction_results/{file_name}")

prediction_y = prediction_data[["customerid", "predicted_churn"]]
acutal_y = churn_data[["customerid", "churn"]]

merged_data = prediction_y.merge(acutal_y, on="customerid")

In [None]:
testing_y = merged_data["churn"]
predictions = merged_data["predicted_churn"]
print("\n Classification report : \n", classification_report(testing_y, predictions))
print("Accuracy   Score : ", accuracy_score(testing_y, predictions))
# confusion matrix
conf_matrix = confusion_matrix(testing_y, predictions)
# roc_auc_score
model_roc_auc = roc_auc_score(testing_y, predictions)
print("Area under curve : ", model_roc_auc, "\n")