In [11]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import PolynomialFeatures
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, precision_score, recall_score

Read the solved data

In [12]:
data = pd.read_csv("solved_data.csv")

Split the data

In [13]:

train_data, temp_data = train_test_split(data, train_size=2500, shuffle=True, random_state=42)
val_data, test_data = train_test_split(temp_data, test_size=500, shuffle=True, random_state=42)

X_train = train_data.drop(columns=["ChurnStatus", "CustomerID"])
y_train = train_data["ChurnStatus"]

X_val = val_data.drop(columns=["ChurnStatus", "CustomerID"])
y_val = val_data["ChurnStatus"]

X_test = test_data.drop(columns=["ChurnStatus", "CustomerID"])
y_test = test_data["ChurnStatus"]


## Linear Decision Boundry

Create the model and train it

In [None]:
model_1 = LogisticRegression(max_iter=500, solver='lbfgs')

model_1.fit(X_train, y_train)

y_train_pred_1 = model_1.predict(X_train)
train_accuracy_1 = accuracy_score(y_train, y_train_pred_1)
train_precision_1 = precision_score(y_train, y_train_pred_1)
train_recall_1 = recall_score(y_train, y_train_pred_1)

y_val_pred_1 = model_1.predict(X_val)
val_accuracy_1 = accuracy_score(y_val, y_val_pred_1)
val_precision_1 = precision_score(y_val, y_val_pred_1)
val_recall_1 = recall_score(y_val, y_val_pred_1)

y_test_pred_1 = model_1.predict(X_test)
test_accuracy_1 = accuracy_score(y_test, y_test_pred_1)
test_precision_1 = precision_score(y_test, y_test_pred_1)
test_recall_1 = recall_score(y_test, y_test_pred_1)

linear_metrics_train = [train_accuracy_1, train_precision_1, train_recall_1]
linear_metrics_val = [val_accuracy_1, val_precision_1, val_recall_1]
linear_metrics_test = [test_accuracy_1, test_precision_1, test_recall_1]

print(linear_metrics_train)
print(linear_metrics_val)
print(linear_metrics_test)

[0.976, 0.7956989247311828, 0.6434782608695652]
[0.978, 0.75, 0.631578947368421]
[0.98, 0.8823529411764706, 0.6521739130434783]


## Non-linear Decision Boundry

### Degree = 2

In [27]:
poly_2 = PolynomialFeatures(degree=2, include_bias=False)
X_train_2 = poly_2.fit_transform(X_train)
X_val_2 = poly_2.transform(X_val)
X_test_2 = poly_2.transform(X_test)

model_2 = LogisticRegression(max_iter=500, solver='lbfgs')
model_2.fit(X_train_2, y_train)

y_train_pred_2 = model_2.predict(X_train_2)
train_accuracy_2 = accuracy_score(y_train, y_train_pred_2)
train_precision_2 = precision_score(y_train, y_train_pred_2)
train_recall_2 = recall_score(y_train, y_train_pred_2)

y_val_pred_2 = model_2.predict(X_val_2)
val_accuracy_2 = accuracy_score(y_val, y_val_pred_2)
val_precision_2 = precision_score(y_val, y_val_pred_2)
val_recall_2 = recall_score(y_val, y_val_pred_2)

y_test_pred_2 = model_2.predict(X_test_2)
test_accuracy_2 = accuracy_score(y_test, y_test_pred_2)
test_precision_2 = precision_score(y_test, y_test_pred_2)
test_recall_2 = recall_score(y_test, y_test_pred_2)

non_linear_2_metrics_train = [train_accuracy_2, train_precision_2, train_recall_2]
non_linear_2_metrics_val = [val_accuracy_2, val_precision_2, val_recall_2]
non_linear_2_metrics_test = [test_accuracy_2, test_precision_2, test_recall_2]

print(non_linear_2_metrics_train)
print(non_linear_2_metrics_val)
print(non_linear_2_metrics_test)


[0.9816, 0.8415841584158416, 0.7391304347826086]
[0.978, 0.7222222222222222, 0.6842105263157895]
[0.984, 0.8571428571428571, 0.782608695652174]


### Degree = 5

In [28]:
poly_5 = PolynomialFeatures(degree=5, include_bias=False)
X_train_5 = poly_5.fit_transform(X_train)
X_val_5 = poly_5.transform(X_val)
X_test_5 = poly_5.transform(X_test)

model_5 = LogisticRegression(max_iter=500, solver='lbfgs')
model_5.fit(X_train_5, y_train)

y_train_pred_5 = model_5.predict(X_train_5)
train_accuracy_5 = accuracy_score(y_train, y_train_pred_5)
train_precision_5 = precision_score(y_train, y_train_pred_5)
train_recall_5 = recall_score(y_train, y_train_pred_5)

y_val_pred_5 = model_5.predict(X_val_5)
val_accuracy_5 = accuracy_score(y_val, y_val_pred_5)
val_precision_5 = precision_score(y_val, y_val_pred_5)
val_recall_5 = recall_score(y_val, y_val_pred_5)

y_test_pred_5 = model_5.predict(X_test_5)
test_accuracy_5 = accuracy_score(y_test, y_test_pred_5)
test_precision_5 = precision_score(y_test, y_test_pred_5)
test_recall_5 = recall_score(y_test, y_test_pred_5)

non_linear_5_metrics_train = [train_accuracy_5, train_precision_5, train_recall_5]
non_linear_5_metrics_val = [val_accuracy_5, val_precision_5, val_recall_5]
non_linear_5_metrics_test = [test_accuracy_5, test_precision_5, test_recall_5]

print(non_linear_5_metrics_train)
print(non_linear_5_metrics_val)
print(non_linear_5_metrics_test)


[0.9984, 0.9911504424778761, 0.9739130434782609]
[0.99, 0.7916666666666666, 1.0]
[0.984, 0.8260869565217391, 0.8260869565217391]


### Degree = 9

In [29]:
poly_9 = PolynomialFeatures(degree=9, include_bias=False)
X_train_9 = poly_9.fit_transform(X_train)
X_val_9 = poly_9.transform(X_val)
X_test_9 = poly_9.transform(X_test)

model_9 = LogisticRegression(max_iter=500, solver='lbfgs')
model_9.fit(X_train_9, y_train)

y_train_pred_9 = model_9.predict(X_train_9)
train_accuracy_9 = accuracy_score(y_train, y_train_pred_9)
train_precision_9 = precision_score(y_train, y_train_pred_9)
train_recall_9 = recall_score(y_train, y_train_pred_9)

y_val_pred_9 = model_9.predict(X_val_9)
val_accuracy_9 = accuracy_score(y_val, y_val_pred_9)
val_precision_9 = precision_score(y_val, y_val_pred_9)
val_recall_9 = recall_score(y_val, y_val_pred_9)

y_test_pred_9 = model_9.predict(X_test_9)
test_accuracy_9 = accuracy_score(y_test, y_test_pred_9)
test_precision_9 = precision_score(y_test, y_test_pred_9)
test_recall_9 = recall_score(y_test, y_test_pred_9)

non_linear_9_metrics_train = [train_accuracy_9, train_precision_9, train_recall_9]
non_linear_9_metrics_val = [val_accuracy_9, val_precision_9, val_recall_9]
non_linear_9_metrics_test = [test_accuracy_9, test_precision_9, test_recall_9]

print(non_linear_9_metrics_train)
print(non_linear_9_metrics_val)
print(non_linear_9_metrics_test)

[1.0, 1.0, 1.0]
[0.988, 0.782608695652174, 0.9473684210526315]
[0.982, 0.85, 0.7391304347826086]


## Summary

In [33]:
def print_model_table(model_name, train_metrics, val_metrics, test_metrics):
    print(f"=== {model_name} ===")
    print(f"{'Dataset':<12} {'Accuracy':<10} {'Precision':<10} {'Recall':<10}")
    print(f"{'Train':<12} {train_metrics[0]:<10.3f} {train_metrics[1]:<10.3f} {train_metrics[2]:<10.3f}")
    print(f"{'Validation':<12} {val_metrics[0]:<10.3f} {val_metrics[1]:<10.3f} {val_metrics[2]:<10.3f}")
    print(f"{'Test':<12} {test_metrics[0]:<10.3f} {test_metrics[1]:<10.3f} {test_metrics[2]:<10.3f}")
    print()

# Linear model
print_model_table("Linear Logistic Regression", linear_metrics_train, linear_metrics_val, linear_metrics_test)

# Degree 2 model
print_model_table("Non-linear Logistic Regression (Degree 2)", non_linear_2_metrics_train, non_linear_2_metrics_val, non_linear_2_metrics_test)

# Degree 5 model
print_model_table("Non-linear Logistic Regression (Degree 5)", non_linear_5_metrics_train, non_linear_5_metrics_val, non_linear_5_metrics_test)

# Degree 9 model
print_model_table("Non-linear Logistic Regression (Degree 9)", non_linear_9_metrics_train, non_linear_9_metrics_val, non_linear_9_metrics_test)


=== Linear Logistic Regression ===
Dataset      Accuracy   Precision  Recall    
Train        0.976      0.796      0.643     
Validation   0.978      0.750      0.632     
Test         0.980      0.882      0.652     

=== Non-linear Logistic Regression (Degree 2) ===
Dataset      Accuracy   Precision  Recall    
Train        0.982      0.842      0.739     
Validation   0.978      0.722      0.684     
Test         0.984      0.857      0.783     

=== Non-linear Logistic Regression (Degree 5) ===
Dataset      Accuracy   Precision  Recall    
Train        0.998      0.991      0.974     
Validation   0.990      0.792      1.000     
Test         0.984      0.826      0.826     

=== Non-linear Logistic Regression (Degree 9) ===
Dataset      Accuracy   Precision  Recall    
Train        1.000      1.000      1.000     
Validation   0.988      0.783      0.947     
Test         0.982      0.850      0.739     

