In [1]:
import pandas as pd

In [2]:
df = pd.read_csv("Telecom Churn Prediction.csv")

In [3]:
df.drop("customerID", axis=1, inplace=True)

In [4]:
# Define a mapping dictionary

df['gender'] = df['gender'].map({'Male': 0, 'Female': 1})
df['MultipleLines'] = df['MultipleLines'].map({'No': 0, 'Yes': 1, 'No phone service': 2})
df['Contract'] = df['Contract'].map({'Month-to-month': 0, 'One year': 1, 'Two year': 2})
df['InternetService'] = df['InternetService'].map({'No': 0, 'DSL': 1, 'Fiber optic': 2})
df['PaymentMethod'] = df['PaymentMethod'].map({'Electronic check': 0, 'Mailed check': 1, 'Bank transfer (automatic)': 2, 'Credit card (automatic)': 3})
df['TotalCharges']=df['TotalCharges'].replace({' ': 0})
df['TotalCharges'] = pd.to_numeric(df['TotalCharges'])

for col in ['Partner', 'Dependents', 'PhoneService', 'PaperlessBilling', 'Churn']:
    df[col] = df[col].map({'No': 0, 'Yes': 1})
for col in ['OnlineSecurity', 'OnlineBackup', 'DeviceProtection', 'TechSupport', 'StreamingTV', 'StreamingMovies']:
    df[col] = df[col].map({'No': 0, 'Yes': 1, 'No internet service': 2})

In [5]:
from sklearn.preprocessing import MinMaxScaler

In [6]:
# Using min max scaler technique on tenure, monthlycharges and total charges column

scaler = MinMaxScaler()
df[['tenure', 'MonthlyCharges', 'TotalCharges']] = scaler.fit_transform(df[['tenure', 'MonthlyCharges', 'TotalCharges']])

In [7]:
from sklearn.model_selection import train_test_split

In [8]:
# Splitting the data

X = df.drop("Churn", axis=1)
y = df['Churn']

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.7, random_state=100)

In [9]:
y_train.value_counts()

Churn
0    1560
1     552
Name: count, dtype: int64

In [12]:
# Performing oversampling as the target column has not equal values
from imblearn.over_sampling import SMOTE

In [13]:
smote = SMOTE(random_state=42)
X_oversampled, y_oversampled = smote.fit_resample(X_train, y_train)
print("Shape of X_oversampled:", X_oversampled.shape)
print("Shape of y_oversampled:", y_oversampled.shape)

Shape of X_oversampled: (3120, 19)
Shape of y_oversampled: (3120,)


# Logistic Regression

In [77]:
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, confusion_matrix, f1_score, precision_score, recall_score

In [78]:
logistic_obj = LogisticRegression()

In [80]:
# Train the model with the training data
logistic_obj.fit(X_train, y_train)
# Make predictions on the test data
predict = logistic_obj.predict(X_test)

# Confusion matrix
logistic_confusion_matrix = confusion_matrix(y_test, predict)
logistic_confusion_matrix

array([[3178,  436],
       [ 579,  738]])

In [66]:
# Fetching the accuracy, precision, recall and f1 of train data
train_predictions = logistic_obj.predict(X_train)
train_accuracy = accuracy_score(y_train, train_predictions)
train_precision = precision_score(y_train, train_predictions)
train_recall = recall_score(y_train, train_predictions)
train_f1 = f1_score(y_train, train_predictions)

print(f"Train accuracy: {train_accuracy * 100:.2f}%")
print(f"Train precision: {train_precision * 100:.2f}%")
print(f"Train recall: {train_recall * 100:.2f}%")
print(f"Train f1 score: {train_f1 * 100:.2f}%")

Train accuracy: 81.49%
Train precision: 66.67%
Train recall: 58.33%
Train f1 score: 62.22%


In [72]:
# Fetching the accuracy, precision, recall and f1 of test data
test_predictions = logistic_obj.predict(X_test)
test_accuracy = accuracy_score(y_test, test_predictions)
test_precision = precision_score(y_test, test_predictions)
test_recall = recall_score(y_test, test_predictions)
test_f1 = f1_score(y_test, test_predictions)

print(f"Test accuracy: {test_accuracy * 100:.2f}%")
print(f"Test precision: {test_precision * 100:.2f}%")
print(f"Test recall: {test_recall * 100:.2f}%")
print(f"Test f1 score: {test_f1 * 100:.2f}%")

Test accuracy: 79.42%
Test precision: 62.86%
Test recall: 56.04%
Test f1 score: 59.25%


# Descision Tree

In [82]:
from sklearn.tree import DecisionTreeClassifier

In [83]:
# Initialize the Decision Tree model
descision_obj = DecisionTreeClassifier()

In [84]:
# Train the model with the training data
descision_obj.fit(X_train, y_train)

# Make predictions on the test data
predictions = descision_obj.predict(X_test)

# Confusion matrix
descision_tree_matrix = confusion_matrix(y_test, predict)
descision_tree_matrix

array([[3178,  436],
       [ 579,  738]])

In [85]:
# Fetching the accuracy, precision, recall and f1 of train data
train_predictions = descision_obj.predict(X_train)
train_accuracy = accuracy_score(y_train, train_predictions)
train_precision = precision_score(y_train, train_predictions)
train_recall = recall_score(y_train, train_predictions)
train_f1 = f1_score(y_train, train_predictions)

print(f"Train accuracy: {train_accuracy * 100:.2f}%")
print(f"Train precision: {train_precision * 100:.2f}%")
print(f"Train recall: {train_recall * 100:.2f}%")
print(f"Train f1 score: {train_f1 * 100:.2f}%")

Train accuracy: 99.86%
Train precision: 100.00%
Train recall: 99.46%
Train f1 score: 99.73%


In [86]:
# Fetching the accuracy, precision, recall and f1 of test data
test_predictions = descision_obj.predict(X_test)
test_accuracy = accuracy_score(y_test, test_predictions)
test_precision = precision_score(y_test, test_predictions)
test_recall = recall_score(y_test, test_predictions)
test_f1 = f1_score(y_test, test_predictions)

print(f"Test accuracy: {test_accuracy * 100:.2f}%")
print(f"Test precision: {test_precision * 100:.2f}%")
print(f"Test recall: {test_recall * 100:.2f}%")
print(f"Test f1 score: {test_f1 * 100:.2f}%")

Test accuracy: 71.41%
Test precision: 46.64%
Test recall: 48.97%
Test f1 score: 47.78%


# RandomForest

In [28]:
from sklearn.ensemble import RandomForestClassifier

In [87]:
# Initialize the Random Forest model
rf_clf = RandomForestClassifier(n_estimators=200, random_state=42)  # 200 trees in the forest

In [88]:
# Train the model with the training data
rf_clf.fit(X_train, y_train)

# Make predictions on the test data
predictions = rf_clf.predict(X_test)

# Confusion matrix
rf_clf_matrix = confusion_matrix(y_test, predict)
rf_clf_matrix


array([[3178,  436],
       [ 579,  738]])

In [89]:
# Fetching the accuracy, precision, recall and f1 of train data
train_predictions = rf_clf.predict(X_train)
train_accuracy = accuracy_score(y_train, train_predictions)
train_precision = precision_score(y_train, train_predictions)
train_recall = recall_score(y_train, train_predictions)
train_f1 = f1_score(y_train, train_predictions)

print(f"Train accuracy: {train_accuracy * 100:.2f}%")
print(f"Train precision: {train_precision * 100:.2f}%")
print(f"Train recall: {train_recall * 100:.2f}%")
print(f"Train f1 score: {train_f1 * 100:.2f}%")

Train accuracy: 99.86%
Train precision: 100.00%
Train recall: 99.46%
Train f1 score: 99.73%


In [90]:
# Fetching the accuracy, precision, recall and f1 of test data
test_predictions = rf_clf.predict(X_test)
test_accuracy = accuracy_score(y_test, test_predictions)
test_precision = precision_score(y_test, test_predictions)
test_recall = recall_score(y_test, test_predictions)
test_f1 = f1_score(y_test, test_predictions)

print(f"Test accuracy: {test_accuracy * 100:.2f}%")
print(f"Test precision: {test_precision * 100:.2f}%")
print(f"Test recall: {test_recall * 100:.2f}%")
print(f"Test f1 score: {test_f1 * 100:.2f}%")

Test accuracy: 78.18%
Test precision: 61.04%
Test recall: 50.57%
Test f1 score: 55.32%


In [101]:
from sklearn.svm import SVC

In [106]:
# Initialize the SVM model
svm_model = SVC()

In [107]:
# Train the model with the training data
svm_model.fit(X_train, y_train)

# Make predictions on the test data
predictions = svm_model.predict(X_test)

# Confusion matrix
svm_matrix = confusion_matrix(y_test, predict)
svm_matrix


array([[3178,  436],
       [ 579,  738]])

In [108]:
# Fetching the accuracy, precision, recall and f1 of train data
train_predictions = svm_model.predict(X_train)
train_accuracy = accuracy_score(y_train, train_predictions)
train_precision = precision_score(y_train, train_predictions)
train_recall = recall_score(y_train, train_predictions)
train_f1 = f1_score(y_train, train_predictions)

print(f"Train accuracy: {train_accuracy * 100:.2f}%")
print(f"Train precision: {train_precision * 100:.2f}%")
print(f"Train recall: {train_recall * 100:.2f}%")
print(f"Train f1 score: {train_f1 * 100:.2f}%")

Train accuracy: 82.62%
Train precision: 71.36%
Train recall: 55.98%
Train f1 score: 62.74%


In [109]:
# Fetching the accuracy, precision, recall and f1 of test data
test_predictions = svm_model.predict(X_test)
test_accuracy = accuracy_score(y_test, test_predictions)
test_precision = precision_score(y_test, test_predictions)
test_recall = recall_score(y_test, test_predictions)
test_f1 = f1_score(y_test, test_predictions)

print(f"Test accuracy: {test_accuracy * 100:.2f}%")
print(f"Test precision: {test_precision * 100:.2f}%")
print(f"Test recall: {test_recall * 100:.2f}%")
print(f"Test f1 score: {test_f1 * 100:.2f}%")

Test accuracy: 78.81%
Test precision: 62.78%
Test recall: 50.72%
Test f1 score: 56.11%


# KNN

In [110]:
from sklearn.neighbors import KNeighborsClassifier

In [111]:
knn = KNeighborsClassifier()

In [112]:
# Train the model with the training data
knn.fit(X_train, y_train)

# Make predictions on the test data
predictions = knn.predict(X_test)

# Confusion matrix
knn_matrix = confusion_matrix(y_test, predict)
knn_matrix


array([[3178,  436],
       [ 579,  738]])

In [113]:
# Fetching the accuracy, precision, recall and f1 of train data
train_predictions = knn.predict(X_train)
train_accuracy = accuracy_score(y_train, train_predictions)
train_precision = precision_score(y_train, train_predictions)
train_recall = recall_score(y_train, train_predictions)
train_f1 = f1_score(y_train, train_predictions)

print(f"Train accuracy: {train_accuracy * 100:.2f}%")
print(f"Train precision: {train_precision * 100:.2f}%")
print(f"Train recall: {train_recall * 100:.2f}%")
print(f"Train f1 score: {train_f1 * 100:.2f}%")

Train accuracy: 83.95%
Train precision: 70.21%
Train recall: 67.03%
Train f1 score: 68.58%


In [114]:
# Fetching the accuracy, precision, recall and f1 of test data
test_predictions = knn.predict(X_test)
test_accuracy = accuracy_score(y_test, test_predictions)
test_precision = precision_score(y_test, test_predictions)
test_recall = recall_score(y_test, test_predictions)
test_f1 = f1_score(y_test, test_predictions)

print(f"Test accuracy: {test_accuracy * 100:.2f}%")
print(f"Test precision: {test_precision * 100:.2f}%")
print(f"Test recall: {test_recall * 100:.2f}%")
print(f"Test f1 score: {test_f1 * 100:.2f}%")

Test accuracy: 74.57%
Test precision: 52.43%
Test recall: 51.56%
Test f1 score: 51.99%


# Gradient Boosting

In [117]:
from sklearn.ensemble import GradientBoostingClassifier

In [118]:
gb_classifier = GradientBoostingClassifier(n_estimators=100, learning_rate=0.1, max_depth=3, random_state=42)

# Fit the model on the training data
gb_classifier.fit(X_train, y_train)

# Predict the test set
predictions = gb_classifier.predict(X_test)

# Evaluate the model
accuracy = accuracy_score(y_test, predictions)
print(f"Accuracy: {accuracy * 100:.2f}%")

Accuracy: 78.91%


# Ada Boosting

In [119]:
from sklearn.ensemble import AdaBoostClassifier

In [122]:
ada_boost = AdaBoostClassifier(n_estimators=100, random_state=42)

# Fit the model on the training data
ada_boost.fit(X_train, y_train)

# Make predictions on the test data
y_pred = ada_boost.predict(X_test)

# Calculate the accuracy of the model
accuracy = accuracy_score(y_test, y_pred)

print(f'Accuracy: {accuracy * 100:.2f}%')

Accuracy: 79.25%
