<img src="http://bigdataexperience.org/BX/wp-content/uploads/2016/05/BX-FULL.png" width="200" height="200" alt="Big Data Experience Center, King Mongkut's University of Technology Thonburi">

# <center>Machine Learning</center>
# <center>Module 2 - More Classifications</center>
---

# 1. Load data

In [1]:
import pandas as pd
pd.set_option('max_columns',500)

telcoData_train = pd.read_pickle('telcoData_train.data')
telcoData_test = pd.read_pickle('telcoData_test.data')

# 2. SVM - Training

In [104]:
from sklearn.svm import SVC
model = SVC(C=0.001, probability=True)
model.fit(X=telcoData_train.drop(columns='Churn_Yes'), 
          y=telcoData_train['Churn_Yes'])

SVC(C=0.001, probability=True)

# 3. SVM - Prediction

In [111]:
res_svm_p = model.predict_proba(telcoData_test.drop(columns='Churn_Yes'))[:,1]

In [112]:
res_svm_p

array([0.31715464, 0.36980819, 0.4656694 , ..., 0.12051746, 0.26196836,
       0.38692825])

In [113]:
res_svm = model.predict(telcoData_test.drop(columns='Churn_Yes'))

In [120]:
import numpy as np
res_svm_class = np.where(res_svm_p > 0.3, 1, 0)

In [121]:
from sklearn.metrics import classification_report

In [122]:
print(classification_report(y_true=telcoData_test['Churn_Yes'], 
                      y_pred=res_svm_class))

              precision    recall  f1-score   support

           0       0.83      0.78      0.80      1560
           1       0.47      0.56      0.51       553

    accuracy                           0.72      2113
   macro avg       0.65      0.67      0.66      2113
weighted avg       0.74      0.72      0.73      2113



# 4. Random Forest

In [83]:
from sklearn.ensemble  import RandomForestClassifier
rf = RandomForestClassifier(n_estimators=500, class_weight='balanced', n_jobs=-1, min_samples_leaf=20, bootstrap=True, max_samples=0.5)

In [84]:
rf.fit(X=telcoData_train.drop(columns='Churn_Yes'),
       y=telcoData_train['Churn_Yes'])

RandomForestClassifier(class_weight='balanced', max_samples=0.5,
                       min_samples_leaf=20, n_estimators=500, n_jobs=-1)

# 5. Random Forest - Variable Importance

In [85]:
pd.DataFrame(dict(Feature=telcoData_train.columns[:-1],
                  Value=rf.feature_importances_))\
  .sort_values(by='Value', ascending=False)

Unnamed: 0,Feature,Value
27,SeniorCitizen,0.201205
29,MonthlyCharges,0.131217
21,Contract_Two year,0.126588
6,InternetService_Fiber optic,0.091076
28,tenure,0.072859
24,PaymentMethod_Electronic check,0.066564
20,Contract_One year,0.035599
15,TechSupport_Yes,0.029673
9,OnlineSecurity_Yes,0.025005
18,StreamingMovies_No internet service,0.024118


# 6. Random Forest - Prediction

In [86]:
res_rf = rf.predict(telcoData_test.drop(columns='Churn_Yes'))
print(classification_report(y_true=telcoData_test['Churn_Yes'].values, y_pred=res_rf))

              precision    recall  f1-score   support

           0       0.91      0.76      0.83      1560
           1       0.53      0.79      0.64       553

    accuracy                           0.76      2113
   macro avg       0.72      0.77      0.73      2113
weighted avg       0.81      0.76      0.78      2113



# 7. Neural network

In [87]:
telcoData_train.shape

(4930, 31)

In [100]:
from sklearn.neural_network import MLPClassifier
clf = MLPClassifier(solver='lbfgs', alpha=0.001,
                    hidden_layer_sizes=(20,20,), max_iter=10000, random_state=1)

In [101]:
clf.fit(X=telcoData_train.drop(columns='Churn_Yes'),
        y=telcoData_train['Churn_Yes'])

MLPClassifier(alpha=0.001, hidden_layer_sizes=(20, 20), max_iter=10000,
              random_state=1, solver='lbfgs')

In [102]:
res_nn = clf.predict(telcoData_test.drop(columns='Churn_Yes'))

In [103]:
print(classification_report(y_true=telcoData_test['Churn_Yes'].values, y_pred=res_nn))

              precision    recall  f1-score   support

           0       0.85      0.91      0.88      1560
           1       0.68      0.55      0.61       553

    accuracy                           0.81      2113
   macro avg       0.76      0.73      0.74      2113
weighted avg       0.81      0.81      0.81      2113

