---
# Name of Method - Model 4
---
In this notebook, ...


## Results

---

### Importing necessary library

In [None]:
import pandas as pd
import numpy as np

import seaborn as sns
import matplotlib.pyplot as plt

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import GridSearchCV
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import accuracy_score
from sklearn.metrics import confusion_matrix
from sklearn.metrics import classification_report
from sklearn.model_selection import cross_val_score
from sklearn.pipeline import Pipeline

from sklearn.tree import DecisionTreeClassifier, plot_tree
from sklearn.metrics import classification_report, confusion_matrix, accuracy_score, ConfusionMatrixDisplay, balanced_accuracy_score, precision_recall_curve
from sklearn.svm import SVC 
from features import features

### Read data file

In [None]:
churn_df = pd.read_excel('../data/churn_cleaned_featEng.xlsx')
churn_df

---

<center>
    
## Preparing data

</center>

---

### Separate X and y features

In [None]:
# Seperate X and y features
feat_list = [ 'Dependents_Yes', 'Contract_Two year','Contract_One year', 'Internet Service_No']#['Tenure Months', 'Dependents_Yes', 'Internet Service_Fiber optic','Payment Method_Electronic check','Contract_Two year','Contract_One year']
X = churn_df.drop(columns=['Churn Value'])[feat_list]
y = churn_df['Churn Value']

### Split dataset (training/testing)

In [None]:
# Separating the dataset into a training dataset (70%) and testing+validation (30%) dataset
X_train, X_test_validation, y_train, y_test_validation = train_test_split(X, y, train_size=0.7, random_state=5)

# Separating the testing+valisation dataset into a testing dataset (15%) and a validation dataset (15%) 
X_val, X_test, y_val, y_test = train_test_split(X_test_validation, y_test_validation, test_size=0.5, random_state=5)

### Converting data subset to dataframe 

In [None]:
X_train = pd.DataFrame(X_train, columns=X.columns)
X_test = pd.DataFrame(X_test, columns=X.columns)

### Scale X features

In [None]:
# Create instance of scaler
scaler = StandardScaler()

# Scale the data
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

---

<center>
    
## Support Vector Classifiers Model

</center>

---

### Find best parameters for model
---

**Create intance of model**

In [None]:
svm = SVC(random_state=25) 

**Setting parameters for the GridSearch**

In [None]:
# param_grid = { 'criterion' : ['gini', 'entropy', 'log_loss'],
#                'splitter' : ['best','random'],
#                'max_depth' : [3,5,8,10,15,20,40],
#                'class_weight': [{0: 0.1, 1: 1.9}],
#              }

param_grid = {'C':[0.001,0.01,.1], # first tried [0.01,0.1, 1], but best C was 0.01 # then tried [0.001,0.01, .01] best C was 0.001
              'kernel':['linear','rbf'],
              'gamma' : ['scale','auto'],
              'class_weight': ['balanced', {0: 1, 1: 1}, {0: 0.7, 1: 1.3}, {0: 0.5, 1: 1.5}, {0: 0.3, 1: 1.7}, {0: 0.1, 1: 1.9}],
             }

scoring = ['balanced_accuracy', 'recall', 'f1_macro', 'roc_auc']
refit = 'balanced_accuracy'

**Setting instance of GridSearchCV**

In [None]:
grid = GridSearchCV(svm, param_grid, scoring=scoring, refit=refit)

**Train the models to find best parameters**

In [None]:
grid.fit(X_train_scaled,y_train)

---
### Investigate best model's predictive features

---

In [None]:
grid.best_params_

In [None]:
grid.best_score_

In [None]:
grid.n_features_in_

In [None]:
model = grid.best_estimator_

In [None]:
model = grid.best_estimator_

**Plot Decision Tree Arborescence**

In [None]:
# # Create figure and adjust size
# plt.figure(figsize=(50,50))

# # Plot decision tree
# plot_tree(model, feature_names=X_train.columns)

# # Save decision tree
# plt.savefig('../graph/DecisionTreeClassifier.png')

# # Show model
# plt.show()

---
### Get scores for the training dataset
---

**Make prediction on training dataset**

In [None]:
y_pred_train = grid.predict(X_train_scaled)

**Create Confusion Matrix**

In [None]:
cm = confusion_matrix(y_train, y_pred_train)
cm

**Plot confusion matrix**

In [None]:
# Create figure and adjsut its size
plt.figure(figsize=(4,4))

# Create plot for Confusion Matrix
disp = ConfusionMatrixDisplay(confusion_matrix=cm,
                              display_labels=grid.classes_)

disp.plot()

# Show plot
plt.show()

**Print classification Report**

In [None]:
print(classification_report(y_train,y_pred_train))

---

<center>
    
## Validation

</center>

---

**Run model on testing dataset**

In [None]:
y_pred_test = grid.predict(X_test_scaled)

**Create Confusion Matrix**

In [None]:
cm = confusion_matrix(y_test, y_pred_test)
cm

**Plot confusion matrix**

In [None]:
# Create figure and adjsut its size
plt.figure(figsize=(4,4))

# Create plot for Confusion Matrix
disp = ConfusionMatrixDisplay(confusion_matrix=cm,
                              display_labels=grid.classes_)

disp.plot()

# Show plot
plt.show()

**Print classification Report**

In [None]:
print(classification_report(y_test,y_pred_test))

In [None]:
balanced_accuracy_score(y_test,y_pred_test)

In [None]:
precision_recall_curve(y_test,y_pred_test)

---

<center>
    
## Results

</center>

---

### Results

### Results

all = 'Senior Citizen', 'Partner', 'Dependents', 'Tenure Months', 
            'Internet Service', 
            'Online Security', 'Online Backup', 'Device Protection',
            'Tech Support', 'Contract', 'Paperless Billing', 'Payment Method',
            'Monthly Charges', 'Churn Value',

Training dataset (all+ lat/lon)
| Metric       | Precision | Recall | F1-Score | Support |
|-------------|-----------|-------|----------|--------|
| Class 0     | 0.89     | 0.82  | 0.86    | 4132   |
| Class 1     | 0.60     | 0.72  | 0.66    | 1502   |
| Accuracy    |          |       | 0.80    | 5634   |
| Macro Avg   | 0.74     | 0.77  | 0.76    | 5634   |
| Weighted Avg| 0.81     | 0.80  | 0.80    | 5634   |

test(all + lat/lon)
| Metric       | Precision | Recall | F1-Score | Support |
|-------------|-----------|-------|----------|--------|
| Class 0     | 0.88     | 0.79  | 0.84    | 504    |
| Class 1     | 0.59     | 0.74  | 0.65    | 201    |
| Accuracy    |          |       | 0.78    | 705    |
| Macro Avg   | 0.74     | 0.77  | 0.75    | 705    |
| Weighted Avg| 0.80     | 0.78  | 0.78    | 705    |