In [2]:
import pandas as pd
import numpy as np

# Load dataset
df = pd.read_csv("data.csv")

# Check for missing values
df = df.dropna()  # Remove missing values if any
print(df.info())  # Check dataset structure


<class 'pandas.core.frame.DataFrame'>
RangeIndex: 7043 entries, 0 to 7042
Data columns (total 21 columns):
 #   Column            Non-Null Count  Dtype  
---  ------            --------------  -----  
 0   customerID        7043 non-null   object 
 1   gender            7043 non-null   object 
 2   SeniorCitizen     7043 non-null   int64  
 3   Partner           7043 non-null   object 
 4   Dependents        7043 non-null   object 
 5   tenure            7043 non-null   int64  
 6   PhoneService      7043 non-null   object 
 7   MultipleLines     7043 non-null   object 
 8   InternetService   7043 non-null   object 
 9   OnlineSecurity    7043 non-null   object 
 10  OnlineBackup      7043 non-null   object 
 11  DeviceProtection  7043 non-null   object 
 12  TechSupport       7043 non-null   object 
 13  StreamingTV       7043 non-null   object 
 14  StreamingMovies   7043 non-null   object 
 15  Contract          7043 non-null   object 
 16  PaperlessBilling  7043 non-null   object 


In [None]:
from sklearn.preprocessing import LabelEncoder, StandardScaler

# Encode categorical variables
label_enc = LabelEncoder()
df["Churn"] = label_enc.fit_transform(df["Churn"])

# Scale numerical data
scaler = StandardScaler()
df_scaled = df.copy()
df_scaled.iloc[:, :-1] = scaler.fit_transform(df_scaled.iloc[:, :-1])


In [None]:
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier
from sklearn.svm import SVC
from xgboost import XGBClassifier
from catboost import CatBoostClassifier

# Split dataset
X = df_scaled.drop("Churn", axis=1)
y = df_scaled["Churn"]
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Train models
models = {
    "Random Forest": RandomForestClassifier(),
    "Gradient Boosting": GradientBoostingClassifier(),
    "SVM": SVC(),
    "XGBoost": XGBClassifier(),
    "CatBoost": CatBoostClassifier(verbose=0)
}

results = {}

for name, model in models.items():
    model.fit(X_train, y_train)
    y_pred = model.predict(X_test)
    accuracy = accuracy_score(y_test, y_pred)
    results[name] = accuracy

# Print best model
best_model = max(results, key=results.get)
print("Best Algorithm:", best_model, "with accuracy:", results[best_model])


In [None]:
from flask import Flask, request, render_template
import pickle

app = Flask(__name__)

# Load the trained model
model = pickle.load(open("best_model.pkl", "rb"))

@app.route("/", methods=["GET", "POST"])
def predict():
    if request.method == "POST":
        # Get user input
        input_data = [float(request.form["feature1"]), float(request.form["feature2"])]
        prediction = model.predict([input_data])[0]
        return render_template("index.html", result=prediction)
    
    return render_template("index.html")

if __name__ == "__main__":
    app.run(debug=True)


In [None]:
import matplotlib.pyplot as plt
import seaborn as sns
import plotly.express as px

# Plot accuracy scores
plt.figure(figsize=(10, 5))
sns.barplot(x=list(results.keys()), y=list(results.values()))
plt.xlabel("Algorithms")
plt.ylabel("Accuracy Score")
plt.title("Model Comparison")
plt.show()

# Interactive Plotly Graph
fig = px.bar(x=list(results.keys()), y=list(results.values()), title="Algorithm Accuracy")
fig.show()
