In [None]:
import pandas as pd
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score

from collections import Counter

In [None]:
file_path = 'Customer-Churn.csv'
churn_data = pd.read_csv(file_path)

In [None]:
churn_data

In [None]:
churn_distrubtion  = churn_data['Churn'].value_counts()
print(churn_distrubtion)

In [None]:
churn_data['TotalCharges'] = pd.to_numeric(churn_data['TotalCharges'], errors='coerce')

churn_data['TotalCharges'] = churn_data['TotalCharges'].fillna(churn_data['TotalCharges'].mean())

features = ['tenure', 'SeniorCitizen', 'MonthlyCharges', 'TotalCharges']

X = churn_data[features]
y = churn_data['Churn'].apply(lambda x:1 if x == 'Yes' else 0)

scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, test_size=0.2, random_state=42)

model = LogisticRegression()
model.fit(X_train, y_train)

y_pred = model.predict(X_test)

accuracy = accuracy_score(y_test, y_pred)
precision = precision_score(y_test, y_pred)
recall = recall_score(y_test, y_pred)
f1 = f1_score(y_test, y_pred)

print("Unbalanced model :")
print(f"Accuracy : {accuracy}")
print(f"Precision : {precision}")
print(f"Recall : {recall}")
print(f"f1 : {f1}")

print("********************************************")

class_distribution = Counter(y)
print(f"Class distribution : {class_distribution}")
print("********************************************")


# Upsampling

churn_yes = churn_data[churn_data['Churn'] == 'Yes']
churn_no = churn_data[churn_data['Churn'] == 'No']

churn_yes_oversampled = churn_yes.sample(len(churn_no), replace=True, random_state=42)
churn_oversampled = pd.concat([churn_no, churn_yes_oversampled])
X_oversampled = churn_oversampled[features]
y_oversampled = churn_oversampled['Churn'].apply(lambda x:1 if x == 'Yes' else 0)

X_oversampled_scaled = scaler.fit_transform(X_oversampled)

X_train_os, X_test_os, y_train_os, y_test_os = train_test_split(X_oversampled_scaled, y_oversampled, test_size=0.2, random_state=42)
model_os = LogisticRegression()
model_os.fit(X_train_os, y_train_os)
y_pred_os = model_os.predict(X_test_os)

accuracy_os = accuracy_score(y_test_os, y_pred_os)
precision_os = precision_score(y_test_os, y_pred_os)
recall_os = recall_score(y_test_os, y_pred_os)
f1_os = f1_score(y_test_os, y_pred_os)

print("Upsampling model:")
print(f"Accuracy : {accuracy_os}")
print(f"Precision : {precision_os}")
print(f"Recall : {recall_os}")
print(f"f1 : {f1_os}")
print("********************************************")


# downsampling 

churn_no_undersampled = churn_no.sample(len(churn_yes), random_state=42)
churn_undersampled = pd.concat([churn_no_undersampled, churn_yes])
X_undersampled = churn_undersampled[features]
y_undersampled = churn_undersampled['Churn'].apply(lambda x:1 if x == 'Yes' else 0)
X_undersampled_scaled = scaler.fit_transform(X_undersampled)

X_train_us, X_test_us, y_train_us, y_test_us = train_test_split(X_undersampled_scaled, y_undersampled, test_size=0.2, random_state=42)

model_us = LogisticRegression()
model_us.fit(X_train_us, y_train_us)
y_pred_us = model_us.predict(X_test_us)


accuracy_us = accuracy_score(y_test_us, y_pred_us)
precision_us = precision_score(y_test_us, y_pred_us)
recall_us = recall_score(y_test_us, y_pred_us)
f1_us = f1_score(y_test_us, y_pred_us)

print("downsampling model:")
print(f"Accuracy : {accuracy_us}")
print(f"Precision : {precision_us}")
print(f"Recall : {recall_us}")
print(f"f1 : {f1_us}")