# Telco Customer Churn Prediction Results

## Introduction and Context

## Libraries and Configurations

In [1]:
import sys
from pathlib import Path

# path to project root
project_root = Path().resolve().parent
sys.path.append(str(project_root))

In [None]:
import src.utils as ut
import src.preprocess as pp
import src.config as cf
import src.train as tn
import src.evaluate as et

import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.pipeline import Pipeline
from sklearn.model_selection import train_test_split

pd.options.display.max_columns = None

## Data Loading

In [None]:
raw_data = ut.load_data(cf.paths['data_raw'])
raw_data.head()

## Data Splitting and Preprocessing

### Target - Feature and Train - Test Split

In [4]:
X = raw_data.drop(columns = 'Churn', axis = 1)
y = raw_data['Churn']

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = cf.test_size, random_state = cf.random_state)

### Pipeline Definition

In [5]:
# create preprocessing pipeline

target_preprocessing_pipeline = pp.TargetPreprocess()

feature_preprocessing_pipeline = Pipeline(steps = [
    ('feature_selection', pp.FeatureSelector()),
    ('data_cleaning', pp.DataCleaning()),
    ('feature_engineering', pp.FeatureEngineering()),
    ('outlier_detection', pp.OutlierDetector()),
    ('missing_values', pp.MissingValuesHandler()),
    ('encoding', pp.CategoricalEncoder()),
    ('scaling', pp.Scaling())
])

### Target Variable Preprocessing

In [6]:
y_train = target_preprocessing_pipeline.fit_transform(y_train)
y_test = target_preprocessing_pipeline.fit_transform(y_test)


### Feature Variable Preprocessing

In [None]:
X_train = feature_preprocessing_pipeline.fit_transform(X_train)

In [None]:
X_train.head()

In [None]:
X_test = feature_preprocessing_pipeline.fit_transform(X_test)

In [None]:
X_test.head()

## Modeling

### Random Forest Classifier

#### Training

In [None]:
# documentation log
ut.write_log('Training Random Forest Classifier')

# model fitting
rf_results = tn.train_model(
    model_name = 'RandomForest',
    X_train = X_train,
    y_train = y_train,
    mode = 'grid_search' 
)

# model trained
best_rf_model = rf_results['best_model']

#### Testing

##### Predictions

In [12]:
# values predictions
y_pred = best_rf_model.predict(X_test)

# probabability predictions
y_pred_proba = best_rf_model.predict_proba(X_test)[:, 1]

##### Classification Report

In [None]:
et.generate_classification_report(y_test, y_pred)

##### Confusion Matrix

In [None]:
et.plot_confusion_matrix(y_test, y_pred)

##### AUC-ROC Curve

In [None]:
et.plot_roc_curve(y_test, y_pred_proba)

## Results