In [6]:
%pip install --upgrade pip -q
%pip install sagemaker-core -q

Note: you may need to restart the kernel to use updated packages.
Note: you may need to restart the kernel to use updated packages.


In [8]:
import time
import sagemaker

from sagemaker_core.helper.session_helper import Session, get_execution_role

# Set up region, role and bucket parameters used throughout the notebook.
sagemaker_session = Session()
region = sagemaker_session.boto_region_name
role = get_execution_role()
bucket = sagemaker.Session().default_bucket()
default_bucket_prefix = sagemaker.Session().default_bucket_prefix

print(f"AWS region: {region}")
print(f"Execution role: {role}")
print(f"Default S3 bucket: {bucket}")
print(f"Default S3 bucket prefix: {default_bucket_prefix}")

AWS region: us-east-1
Execution role: arn:aws:iam::975050210426:role/cfst-4216-d8276b3f555b1be591-SageMakerExecutionRole-LmCb9apjk8dX
Default S3 bucket: sagemaker-us-east-1-975050210426
Default S3 bucket prefix: None


In [9]:
import pandas as pd

# Load the dataset from S3
s3_key = 'telco-customer-churn.csv'
dataset_url = f's3://{bucket}/{s3_key}'

df = pd.read_csv(dataset_url)
print(df.head())

In [14]:
print(df.head())

   customerID  gender  SeniorCitizen  Partner  Dependents    tenure  \
0  7590-VHVEG       0              0        1           0 -1.280248   
1  5575-GNVDE       1              0        0           0  0.064303   
2  3668-QPYBK       1              0        0           0 -1.239504   
3  7795-CFOCW       1              0        0           0  0.512486   
4  9237-HQITU       0              0        0           0 -1.239504   

   PhoneService     MultipleLines InternetService OnlineSecurity  ...  \
0             0  No phone service             DSL             No  ...   
1             1                No             DSL            Yes  ...   
2             1                No             DSL            Yes  ...   
3             0  No phone service             DSL            Yes  ...   
4             1                No     Fiber optic             No  ...   

  DeviceProtection TechSupport StreamingTV StreamingMovies        Contract  \
0               No          No          No              

In [10]:
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, LabelEncoder

# Handle missing values
df = df.dropna()

# Encode categorical variables
df['gender'] = LabelEncoder().fit_transform(df['gender'])
df['Partner'] = LabelEncoder().fit_transform(df['Partner'])
df['Dependents'] = LabelEncoder().fit_transform(df['Dependents'])
df['PhoneService'] = LabelEncoder().fit_transform(df['PhoneService'])
df['PaperlessBilling'] = LabelEncoder().fit_transform(df['PaperlessBilling'])
df['Churn'] = LabelEncoder().fit_transform(df['Churn'])

df['TotalCharges'] = pd.to_numeric(df['TotalCharges'], errors='coerce')
df = df.dropna(subset=['TotalCharges'])  # Remove rows where conversion failed

# Scale numerical features
scaler = StandardScaler()
df[['tenure', 'MonthlyCharges', 'TotalCharges']] = scaler.fit_transform(df[['tenure', 'MonthlyCharges', 'TotalCharges']])

# Split the data into training and testing sets
X = df.drop('Churn', axis=1)
y = df['Churn']
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [17]:
container = sagemaker.image_uris.retrieve("xgboost", sagemaker_session.boto_region_name, "1.5-1")

op_url = f's3://{bucket}/output'

sess = sagemaker.Session()

xgb = sagemaker.estimator.Estimator(
    container,
    role,
    instance_count=1,
    instance_type="ml.m5.xlarge",
    output_path=op_url,
    sagemaker_session=sess,
)
xgb.set_hyperparameters(
    eval_metric="auc",
    max_depth=5,
    objective="binary:logistic",
    num_round=10,
)

from sagemaker.tuner import (
    ContinuousParameter,
    HyperparameterTuner,
)

# https://docs.aws.amazon.com/sagemaker/latest/dg/xgboost_hyperparameters.html
# Setting up Hyperparamters and job

objective_metric_name = "validation:auc"


hyperparameter_ranges = {
    "alpha": ContinuousParameter(0.01, 10, scaling_type="Logarithmic"),
    "lambda": ContinuousParameter(0.01, 10, scaling_type="Logarithmic"),
    "gamma": ContinuousParameter(0.01, 10, scaling_type="Logarithmic")
}