# **Evaluation Metrics for Classification**

In [1]:
# import the necessary libraries
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.pipeline import make_pipeline
from sklearn.preprocessing import OneHotEncoder
from sklearn.preprocessing import FunctionTransformer
from sklearn.feature_extraction import DictVectorizer

In [2]:
df = pd.read_csv('C:/Users/osaze/Desktop/Git Projects/ml-zoomcamp-notes/Logistics regression/Telco-Customer-Churn.csv')
df.head()

Unnamed: 0,customerID,gender,SeniorCitizen,Partner,Dependents,tenure,PhoneService,MultipleLines,InternetService,OnlineSecurity,...,DeviceProtection,TechSupport,StreamingTV,StreamingMovies,Contract,PaperlessBilling,PaymentMethod,MonthlyCharges,TotalCharges,Churn
0,7590-VHVEG,Female,0,Yes,No,1,No,No phone service,DSL,No,...,No,No,No,No,Month-to-month,Yes,Electronic check,29.85,29.85,No
1,5575-GNVDE,Male,0,No,No,34,Yes,No,DSL,Yes,...,Yes,No,No,No,One year,No,Mailed check,56.95,1889.5,No
2,3668-QPYBK,Male,0,No,No,2,Yes,No,DSL,Yes,...,No,No,No,No,Month-to-month,Yes,Mailed check,53.85,108.15,Yes
3,7795-CFOCW,Male,0,No,No,45,No,No phone service,DSL,Yes,...,Yes,Yes,No,No,One year,No,Bank transfer (automatic),42.3,1840.75,No
4,9237-HQITU,Female,0,No,No,2,Yes,No,Fiber optic,No,...,No,No,No,No,Month-to-month,Yes,Electronic check,70.7,151.65,Yes


In [3]:
# convert column headers to lower case and replace space with _
# convert categorical column values to lower case and replace space with _

df.columns = df.columns.str.lower().str.replace(' ', '_')

categorical_columns = list(df.dtypes[df.dtypes == 'object'].index)

for c in categorical_columns:
    df[c] = df[c].str.lower().str.replace(' ', '_')

In [4]:
# convert total charges to number and replace nulls with 0
df.totalcharges = pd.to_numeric(df.totalcharges, errors = 'coerce')
df.totalcharges = df.totalcharges.fillna(0)

# convert yes to 1 and no to 0 and convert the datatype to int
df.churn = (df.churn == 'yes').astype(int)

In [5]:
# split the dataset into train, validation and test sets
df_full_train, df_test = train_test_split(df, test_size = 0.2, random_state = 1)
df_train, df_val = train_test_split(df_full_train, test_size = 0.25, random_state = 1)

print(len(df_train), len(df_val), len(df_test))

# reset the index
df_train = df_train.reset_index(drop=True)
df_val = df_val.reset_index(drop=True)
df_test = df_test.reset_index(drop=True)

# seperate the target variables
y_train = df_train['churn'].values
y_val = df_val['churn'].values
y_test = df_test['churn'].values

# delete the target valriables in the feature matrix
del df_train['churn']
del df_val['churn']
del df_test['churn']

df_full_train = df_full_train.reset_index(drop=True)

4225 1409 1409


In [6]:
numerical = ['tenure', 'monthlycharges', 'totalcharges']
categorical = ['gender', 'seniorcitizen', 'partner', 'dependents',
        'phoneservice', 'multiplelines', 'internetservice',
       'onlinesecurity', 'onlinebackup', 'deviceprotection', 'techsupport',
       'streamingtv', 'streamingmovies', 'contract', 'paperlessbilling',
       'paymentmethod']
full_list = numerical + categorical

In [7]:
df_to_dict = FunctionTransformer(
    lambda X: X.to_dict(orient="records"),
    validate=False
)

model = make_pipeline(df_to_dict, 
                      DictVectorizer(sparse = False), 
                      LogisticRegression())
model

In [8]:
model.fit(df_train[numerical + categorical], y_train)

STOP: TOTAL NO. OF ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


In [9]:
y_pred = model.predict_proba(df_val)[:, 1]

churn_decision = (y_pred >= 0.5)

df = pd.DataFrame()
df['probability'] = y_pred
df['prediction'] = churn_decision.astype(int)
df['actual'] = y_val
df['correct'] = df.prediction == df.actual

# let us check the fraction of correct ones

print(f"Model Accuracy: {df.correct.mean()}")

Model Accuracy: 0.8034066713981547


### **Accuracy and dummy model**

- Evaluate the model on different thresholds
- Check the accuracy of dummy baselines

*Accuracy is the proportion of correctly predicted samples out of the total number of predictions.*

$
\text{Accuracy} = \frac{\text{Number of correct predictions}}{\text{Total number of predictions}}
$

In cases where the target variable is **class-imbalanced**, accuracy can be **misleading**, as a model may achieve a high accuracy by predicting only the majority class while performing poorly on the minority class.


In [10]:
from sklearn.metrics import accuracy_score

In [11]:
y_pred1 = model.predict(df_val)

In [12]:
score = accuracy_score(y_val, y_pred1)
print(f"Accuracy: {(score):.3f}")

Accuracy: 0.803


### **Confusion Table**

**Confusion Table** is a way of looking at different errors and correct decisions that the binary classification model makes.

- Different types of errors and correct decisions
- Arranging them in a table

In [13]:
from sklearn.metrics import confusion_matrix

In [15]:
# noramlize parameter helps express the result in percentage
confusion_matrix(y_val, y_pred1, normalize='all').round(2)

array([[0.65, 0.07],
       [0.12, 0.15]])

*Rather than summarizing model performance with a single number, the confusion table presents four values that indicate how many correct and incorrect decisions the model makes.*

### **Precision and Recall**