<a href="https://colab.research.google.com/github/Abdullah-Tanweer/DEVELOPERS-HUB-AI-INTERNSHIP/blob/main/TASK_2_DEV_HUB_(ADV).ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
pip install pandas scikit-learn joblib


In [None]:
import pandas as pd

# Load dataset
df = pd.read_csv("Telco-Customer-Churn.csv")

# Drop customerID
df.drop("customerID", axis=1, inplace=True)

# Replace 'No internet service' or similar values with 'No'
df.replace("No internet service", "No", inplace=True)
df.replace("No phone service", "No", inplace=True)

# Target variable
df['Churn'] = df['Churn'].map({'Yes': 1, 'No': 0})


In [None]:
from sklearn.compose import ColumnTransformer
from sklearn.preprocessing import StandardScaler, OneHotEncoder
from sklearn.pipeline import Pipeline
from sklearn.model_selection import train_test_split

# Identify feature types
num_features = ['tenure', 'MonthlyCharges', 'TotalCharges']
cat_features = [col for col in df.columns if col not in num_features + ['Churn']]

# Convert TotalCharges to numeric
df['TotalCharges'] = pd.to_numeric(df['TotalCharges'], errors='coerce')
df.dropna(inplace=True)

# Split dataset
X = df.drop("Churn", axis=1)
y = df["Churn"]
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, stratify=y, random_state=42)

# Preprocessor
preprocessor = ColumnTransformer(
    transformers=[
        ("num", StandardScaler(), num_features),
        ("cat", OneHotEncoder(handle_unknown="ignore"), cat_features),
    ]
)


In [None]:
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier

# Logistic Regression pipeline
logreg_pipeline = Pipeline(steps=[
    ("preprocessor", preprocessor),
    ("classifier", LogisticRegression(max_iter=1000))
])

# Random Forest pipeline
rf_pipeline = Pipeline(steps=[
    ("preprocessor", preprocessor),
    ("classifier", RandomForestClassifier(random_state=42))
])


In [None]:
from sklearn.model_selection import GridSearchCV

# Define parameter grids
logreg_param_grid = {
    "classifier__C": [0.01, 0.1, 1, 10],
    "classifier__penalty": ["l2"],
    "classifier__solver": ["lbfgs"]
}

rf_param_grid = {
    "classifier__n_estimators": [100, 200],
    "classifier__max_depth": [5, 10, None],
    "classifier__min_samples_split": [2, 5]
}

# Grid Search for Logistic Regression
logreg_grid = GridSearchCV(logreg_pipeline, logreg_param_grid, cv=5, scoring="accuracy", n_jobs=-1)
logreg_grid.fit(X_train, y_train)

# Grid Search for Random Forest
rf_grid = GridSearchCV(rf_pipeline, rf_param_grid, cv=5, scoring="accuracy", n_jobs=-1)
rf_grid.fit(X_train, y_train)


In [None]:
from sklearn.metrics import classification_report

# Logistic Regression Results
print("Logistic Regression Report:")
y_pred_logreg = logreg_grid.predict(X_test)
print(classification_report(y_test, y_pred_logreg))

# Random Forest Results
print("Random Forest Report:")
y_pred_rf = rf_grid.predict(X_test)
print(classification_report(y_test, y_pred_rf))


In [None]:
import joblib

# Save the best model
joblib.dump(rf_grid.best_estimator_, "churn_model_rf.joblib")
joblib.dump(logreg_grid.best_estimator_, "churn_model_logreg.joblib")
