# XGBoost Base Model

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import warnings
import mlflow
import xgboost as xgb
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
# from xgboost import XGBClassifier
# from hyperopt import fmin, tpe, hp, STATUS_OK, Trials
# from hyperopt.pyll import scope


warnings.filterwarnings("ignore")

# Data Setup

In [None]:
# read input data
churn = pd.read_csv("../data/churn.txt")
pd.set_option("display.max_columns", 500)

In [None]:
churn = churn.drop(["Phone", "Day Charge", "Eve Charge", "Night Charge", "Intl Charge"], axis=1)
churn["Area Code"] = churn["Area Code"].astype(object)

# Train Model

In [None]:
# Extract feature and target arrays
X, y = churn.drop('Churn?', axis=1), churn[['Churn?']]

In [None]:
# Extract text features
cats = X.select_dtypes(exclude=np.number).columns.tolist()

# Convert to Pandas category
for col in cats:
   X[col] = X[col].astype('category')

In [None]:
y['Churn?'] = y['Churn?'].replace({'True.': 1, 'False.': 0})

In [None]:
# Split the data
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=1)

In [None]:
X_test.shape

In [None]:
X_test.head()

In [None]:
X_test_customer = X_test.iloc[0]
y_test_customer = y_test.iloc[0]

In [None]:
print(X_test_customer)
print(y_test_customer)

In [None]:
import pandas as pd

# Sample object to check
my_object = X_test

# Check if the object is a DataFrame
if isinstance(my_object, pd.DataFrame):
    print("It's a DataFrame!")
else:
    print("It's not a DataFrame.")


In [None]:
import pandas as pd

# Sample object to check
my_object = X_test_customer

# Check if the object is a pandas data structure (DataFrame or Series)
if issubclass(type(my_object), (pd.DataFrame, pd.Series)):
    print("It's a pandas data structure!")
else:
    print("It's not a pandas data structure.")


In [None]:
import pandas as pd

# Sample pandas object (DataFrame or Series)
my_object = X_test_customer

# Check the type of the object
obj_type = type(my_object)

# Print the type
print(obj_type)


In [None]:
X_test_customer.dtypes

In [None]:
X_test_customer.shape

In [None]:
customer = X_test_customer.to_json()
print(customer)

In [None]:
dtrain = xgb.DMatrix(X_train, label=y_train, enable_categorical=True)
dtest = xgb.DMatrix(X_test_customer, label=y_test_customer, enable_categorical=True)

# Train the best model

In [None]:
best_params = {
    'learning_rate'	: 0.2611886716276454,
    'max_depth' : 39,
    'min_child_weight' : 4.490391995734931,
    'objective' : 'binary:logistic',
    'reg_alpha' : 0.044567672488398144,
    'reg_lambda' : 0.11968534468462336,
    'seed' : 42
}

booster = xgb.train(
    params=best_params,
    dtrain=dtrain,
    num_boost_round=1000,
    evals=[(dtest,"test")],
    early_stopping_rounds=50
)

# Make Single Prediction

In [None]:
# make prediction
y_pred_prob = booster.predict(dtest)
y_pred = (y_pred_prob >= 0.5).astype(int)


In [None]:
def predict_single(dtest, model):
    y_pred_prob = booster.predict(dtest)
    y_pred = (y_pred_prob >= 0.5).astype(int)
    
    return y_pred[0]

In [None]:
predict_single(dtest, booster)

In [None]:
import pickle 

with open('../models/churn-model.bin', 'wb') as f_out:
    pickle.dump(booster, f_out)

In [None]:
import requests
customer = X_test_customer.to_json()
url = 'http://localhost:9696/predict'
response = requests.post(url, json=customer)
result = response.json()
result