<a href="https://colab.research.google.com/github/Mrkomiljon/Datascience/blob/main/CompareMLmodels.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

## Import libs and dependencies

In [None]:
import numpy as np
import seaborn as sns
import pandas as pd
%matplotlib inline
import matplotlib.pyplot as plt
import plotly.express as px

## Load the data

In [None]:
# we processed the data
df = pd.read_csv('processed_churn.csv')
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 7043 entries, 0 to 7042
Data columns (total 31 columns):
 #   Column                                 Non-Null Count  Dtype  
---  ------                                 --------------  -----  
 0   gender                                 7043 non-null   int64  
 1   SeniorCitizen                          7043 non-null   int64  
 2   Partner                                7043 non-null   int64  
 3   Dependents                             7043 non-null   int64  
 4   tenure                                 7043 non-null   int64  
 5   PhoneService                           7043 non-null   int64  
 6   PaperlessBilling                       7043 non-null   int64  
 7   MonthlyCharges                         7043 non-null   float64
 8   TotalCharges                           7043 non-null   float64
 9   Churn                                  7043 non-null   int64  
 10  MultipleLines_No_phone_service         7043 non-null   int64  
 11  Mult

## Feature scaling

In [None]:
#feature scaling
from sklearn.preprocessing import MinMaxScaler

sc = MinMaxScaler()

df['tenure'] = sc.fit_transform(df[['tenure']])
df['MonthlyCharges'] = sc.fit_transform(df[['MonthlyCharges']])
df['TotalCharges'] = sc.fit_transform(df[['TotalCharges']])

## Model preparing

In [None]:
# Import ML models
from sklearn.linear_model import LogisticRegression
from sklearn.svm import SVC
from sklearn.ensemble import RandomForestClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.naive_bayes import GaussianNB
from xgboost import XGBClassifier

# Import metrics
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, roc_auc_score

# Import data split
from sklearn.model_selection import train_test_split

## Split data

In [None]:
X = df.drop('Churn', axis=1)
y = df['Churn']

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=50)

In [None]:
#Defining the modelling function
def modeling(alg, alg_name, params={}):
    model = alg(**params) #Instantiating the algorithm class and unpacking parameters if any
    model.fit(X_train, y_train)
    y_pred = model.predict(X_test)
    
    #Performance evaluation
    def print_scores(alg, y_true, y_pred):
        print(alg_name)
        acc_score = accuracy_score(y_true, y_pred)
        print("accuracy: ",acc_score)
        pre_score = precision_score(y_true, y_pred)
        print("precision: ",pre_score)
        rec_score = recall_score(y_true, y_pred)                            
        print("recall: ",rec_score)
        f_score = f1_score(y_true, y_pred, average='weighted')
        print("f1_score: ",f_score)
        AUROC = roc_auc_score(y_true, y_pred)
        print('AUROC: ', AUROC)
        
        

    print_scores(alg, y_test, y_pred)
    return model

In [None]:
# Running logistic regression model
log_model = modeling(LogisticRegression, 'Logistic Regression')

Logistic Regression
accuracy:  0.7983909133932797
precision:  0.6281800391389433
recall:  0.5763016157989228
f1_score:  0.7955174819104321
AUROC:  0.727096823323626


In [None]:
### Trying other machine learning algorithms: SVC
svc_model = modeling(SVC, 'SVC Classification')

SVC Classification
accuracy:  0.795551348793185
precision:  0.6355748373101953
recall:  0.526032315978456
f1_score:  0.7889704158679894
AUROC:  0.7090315821537525


In [None]:
#Random forest
rf_model = modeling(RandomForestClassifier, "Random Forest Classification")

Random Forest Classification
accuracy:  0.7860861334595362
precision:  0.6169265033407573
recall:  0.49730700179533216
f1_score:  0.778191054792374
AUROC:  0.6933835780184887


In [None]:
#Decision tree
dt_model = modeling(DecisionTreeClassifier, "Decision Tree Classification")

Decision Tree Classification
accuracy:  0.7198296261239944
precision:  0.47145187601957583
recall:  0.518850987432675
f1_score:  0.723967552105011
AUROC:  0.6553123831764918


In [None]:
#Naive bayes 
nb_model = modeling(GaussianNB, "Naive Bayes Classification")

Naive Bayes Classification
accuracy:  0.6469474680548982
precision:  0.42011834319526625
recall:  0.8922800718132855
f1_score:  0.6660052398768987
AUROC:  0.7257030179117842


In [None]:
# XGBoost
xboost = modeling(XGBClassifier, 'XGBoost Classification')



XGBoost Classification
accuracy:  0.7846663511594889
precision:  0.6049382716049383
recall:  0.5278276481149012
f1_score:  0.7797387780225931
AUROC:  0.7022171659597642
