In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LogisticRegression
from sklearn.tree import DecisionTreeClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score

# Load dataset
df = pd.read_csv("telecom_churn.csv")

# Encode categorical variables
df['International plan'] = df['International plan'].map({'No': 0, 'Yes': 1})
df['Voice mail plan'] = df['Voice mail plan'].map({'No': 0, 'Yes': 1})
df['Churn'] = df['Churn'].astype(int)

# Drop non-relevant categorical features
df = df.drop(columns=['State', 'Area code'])

# Split data into features and target variable
X = df.drop(columns=['Churn'])
y = df['Churn']

# Standardize numerical features
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

# Train-test split
X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, test_size=0.2, random_state=42, stratify=y)

# Initialize models
models = {
    "Logistic Regression": LogisticRegression(random_state=42),
    "Decision Tree": DecisionTreeClassifier(random_state=42),
    "KNN": KNeighborsClassifier(n_neighbors=5),
    "SVM": SVC(kernel='linear', random_state=42)
}

# Train models and evaluate results
results = []
for name, model in models.items():
    model.fit(X_train, y_train)
    y_pred = model.predict(X_test)
    results.append({
        "Model": name,
        "Accuracy": accuracy_score(y_test, y_pred),
        "Precision": precision_score(y_test, y_pred, zero_division=0),
        "Recall": recall_score(y_test, y_pred),
        "F1 Score": f1_score(y_test, y_pred)
    })

# Convert results to DataFrame
results_df = pd.DataFrame(results)
print(results_df)

# Plot results
plt.figure(figsize=(10, 6))
metrics = ["Accuracy", "Precision", "Recall", "F1 Score"]
colors = ["blue", "green", "red", "purple"]

for i, metric in enumerate(metrics):
    plt.plot(results_df["Model"], results_df[metric], marker='o', label=metric, color=colors[i])

plt.xlabel("Models")
plt.ylabel("Score")
plt.title("Comparison of Classification Models")
plt.legend()
plt.ylim(0, 1)
plt.grid(True)
plt.xticks(rotation=20)
plt.show()

# Pros and Cons Table
pros_cons_data = {
    "Model": ["Logistic Regression", "Decision Tree", "KNN", "SVM"],
    "Pros": [
        "Interpretable, efficient, works well with linearly separable data",
        "Handles non-linearity, easy to interpret, no need for scaling",
        "Simple, effective in small datasets, no training time required",
        "Good for small datasets, effective in high-dimensional spaces"
    ],
    "Cons": [
        "Struggles with non-linearity, sensitive to outliers",
        "Prone to overfitting, not stable to small changes",
        "Computationally expensive for large datasets, sensitive to noise",
        "Slow with large datasets, sensitive to parameter tuning"
    ]
}

pros_cons_df = pd.DataFrame(pros_cons_data)
print(pros_cons_df)


In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LogisticRegression
from sklearn.tree import DecisionTreeClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score