In [None]:
!pip install sagemaker==2.88.0 s3fs plotly

In [None]:
import sagemaker
from sagemaker.session import Session
from sagemaker.feature_store.feature_group import FeatureGroup
# import os
# os.environ["AWS_ACCESS_KEY_ID"] = "<aws_key_id>"
# os.environ["AWS_SECRET_ACCESS_KEY"] = "<aws_secret>"
# os.environ["AWS_DEFAULT_REGION"] = "us-east-1"

role = "arn:aws:iam::<account_number>:role/sagemaker-iam-role"
FEATURE_GROUP_NAME = "telcom-customer-features"

sagemaker_session = sagemaker.Session()
region = sagemaker_session.boto_region_name
s3_bucket_name = "feast-demo-mar-2022"

customers_feature_group = FeatureGroup(
    name=FEATURE_GROUP_NAME, sagemaker_session=sagemaker_session
)

In [None]:
get_latest_snapshot_query = customers_feature_group.athena_query()
query = f"""SELECT *
FROM
    (SELECT *,
         row_number()
        OVER (PARTITION BY customerid
    ORDER BY  event_timestamp desc, Api_Invocation_Time DESC, write_time DESC) AS row_num
    FROM "{get_latest_snapshot_query.table_name}")
WHERE row_num = 1 and 
NOT is_deleted;"""

In [None]:
get_latest_snapshot_query.run(query_string=query, output_location=f"s3://{s3_bucket_name}/output")
get_latest_snapshot_query.wait()

In [None]:
churn_data = get_latest_snapshot_query.as_dataframe()
churn_data = churn_data.drop(columns=["event_timestamp", "write_time", "api_invocation_time", "is_deleted", "row_num"])

In [None]:
churn_data.describe(include='all').T

In [None]:
import numpy as np
import warnings
warnings.filterwarnings("ignore")
import plotly.offline as py
import plotly.graph_objs as go
correlation = churn_data.corr()
matrix_cols = correlation.columns.tolist()
corr_array = np.array(correlation)
trace = go.Heatmap(z=corr_array,
                   x=matrix_cols,
                   y=matrix_cols,
                   colorscale="Viridis",
                   colorbar=dict(title="Pearson Correlation coefficient",
                                 titleside="right"
                                 ),
                   )
layout = go.Layout(dict(title="Correlation Matrix for variables",
                        autosize=False,
                        height=720,
                        width=800,
                        margin=dict(r=0, l=210,
                                    t=25, b=210,
                                    ),
                        yaxis=dict(tickfont=dict(size=9)),
                        xaxis=dict(tickfont=dict(size=9))
                        )
                   )
fig = go.Figure(data=[trace], layout=layout)
py.iplot(fig)
