In [3]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.ensemble import RandomForestClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import classification_report, confusion_matrix, accuracy_score
from sklearn.model_selection import GridSearchCV
import tkinter as tk
from tkinter import ttk, filedialog, messagebox
import matplotlib.pyplot as plt
from matplotlib.backends.backend_tkagg import FigureCanvasTkAgg
import seaborn as sns
from tkinter import scrolledtext

class CustomerChurnPredictor:
    def __init__(self, root):
        self.root = root
        self.root.title("Telco Customer Churn Prediction System")
        self.root.geometry("1200x800")
        
        # Data
        self.df = None
        self.X_train = None
        self.X_test = None
        self.y_train = None
        self.y_test = None
        self.model = None
        self.scaler = StandardScaler()
        self.label_encoders = {}
        self.predictions = None
        
        self.setup_gui()
    
    def setup_gui(self):
        # Main notebook
        notebook = ttk.Notebook(self.root)
        notebook.pack(fill=tk.BOTH, expand=True, padx=10, pady=10)
        
        # Data Loading Frame
        self.data_frame = ttk.Frame(notebook)
        notebook.add(self.data_frame, text="1. Load Data")
        self.create_data_frame()
        
        # Data Exploration Frame
        self.explore_frame = ttk.Frame(notebook)
        notebook.add(self.explore_frame, text="2. Explore Data")
        self.create_explore_frame()
        
        # Model Training Frame
        self.train_frame = ttk.Frame(notebook)
        notebook.add(self.train_frame, text="3. Train Model")
        self.create_train_frame()
        
        # Prediction Frame
        self.predict_frame = ttk.Frame(notebook)
        notebook.add(self.predict_frame, text="4. Predict")
        self.create_predict_frame()
    
    def create_data_frame(self):
        tk.Label(self.data_frame, text="Load Telco Customer Churn Dataset", 
                font=("Arial", 16, "bold")).pack(pady=20)
        
        btn_frame = tk.Frame(self.data_frame)
        btn_frame.pack(pady=20)
        
        tk.Button(btn_frame, text="Load Attached Dataset", 
                 command=self.load_dataset, bg="#4CAF50", fg="white",
                 font=("Arial", 12), width=20).pack(side=tk.LEFT, padx=10)
        
        tk.Button(btn_frame, text="Load from File", 
                 command=self.load_from_file, bg="#2196F3", fg="white",
                 font=("Arial", 12), width=20).pack(side=tk.LEFT, padx=10)
        
        self.status_label = tk.Label(self.data_frame, text="No data loaded", 
                                   fg="red", font=("Arial", 12))
        self.status_label.pack(pady=20)
        
        self.info_text = scrolledtext.ScrolledText(self.data_frame, height=10, width=80)
        self.info_text.pack(pady=20, padx=20, fill=tk.BOTH, expand=True)
    
    def create_explore_frame(self):
        tk.Label(self.explore_frame, text="Data Exploration & Preprocessing", 
                font=("Arial", 16, "bold")).pack(pady=20)
        
        btn_frame = tk.Frame(self.explore_frame)
        btn_frame.pack(pady=10)
        
        tk.Button(btn_frame, text="Show Dataset Info", command=self.show_info,
                 bg="#FF9800", fg="white", width=15).pack(side=tk.LEFT, padx=5)
        tk.Button(btn_frame, text="Show Statistics", command=self.show_stats,
                 bg="#FF9800", fg="white", width=15).pack(side=tk.LEFT, padx=5)
        tk.Button(btn_frame, text="Visualize Data", command=self.visualize_data,
                 bg="#FF9800", fg="white", width=15).pack(side=tk.LEFT, padx=5)
        tk.Button(btn_frame, text="Preprocess Data", command=self.preprocess_data,
                 bg="#9C27B0", fg="white", width=15).pack(side=tk.LEFT, padx=5)
        
        self.explore_text = scrolledtext.ScrolledText(self.explore_frame, height=15, width=100)
        self.explore_text.pack(pady=20, padx=20, fill=tk.BOTH, expand=True)
    
    def create_train_frame(self):
        tk.Label(self.train_frame, text="Model Training & Evaluation", 
                font=("Arial", 16, "bold")).pack(pady=20)
        
        # Model selection
        model_frame = tk.LabelFrame(self.train_frame, text="Select Model", padx=10, pady=10)
        model_frame.pack(pady=10, padx=20, fill=tk.X)
        
        self.model_var = tk.StringVar(value="RandomForest")
        tk.Radiobutton(model_frame, text="Random Forest", variable=self.model_var,
                      value="RandomForest").pack(side=tk.LEFT)
        tk.Radiobutton(model_frame, text="Logistic Regression", variable=self.model_var,
                      value="Logistic").pack(side=tk.LEFT)
        
        # Buttons
        btn_frame = tk.Frame(self.train_frame)
        btn_frame.pack(pady=20)
        
        tk.Button(btn_frame, text="Train Model", command=self.train_model,
                 bg="#4CAF50", fg="white", font=("Arial", 12), width=15).pack(side=tk.LEFT, padx=10)
        tk.Button(btn_frame, text="Evaluate Model", command=self.evaluate_model,
                 bg="#2196F3", fg="white", font=("Arial", 12), width=15).pack(side=tk.LEFT, padx=10)
        tk.Button(btn_frame, text="Feature Importance", command=self.show_feature_importance,
                 bg="#FF5722", fg="white", font=("Arial", 12), width=15).pack(side=tk.LEFT, padx=10)
        
        self.train_text = scrolledtext.ScrolledText(self.train_frame, height=20, width=100)
        self.train_text.pack(pady=20, padx=20, fill=tk.BOTH, expand=True)
    
    def create_predict_frame(self):
        tk.Label(self.predict_frame, text="Make Predictions", 
                font=("Arial", 16, "bold")).pack(pady=20)
        
        # Input frame for manual prediction
        input_frame = tk.LabelFrame(self.predict_frame, text="Manual Prediction", padx=10, pady=10)
        input_frame.pack(pady=10, padx=20, fill=tk.X)
        
        # Simplified inputs (using key features)
        self.tenure_var = tk.DoubleVar(value=12)
        self.monthly_var = tk.DoubleVar(value=70.0)
        self.contract_var = tk.StringVar(value="Month-to-month")
        self.internet_var = tk.StringVar(value="Fiber optic")
        self.paperless_var = tk.BooleanVar(value=True)
        
        tk.Label(input_frame, text="Tenure (months):").grid(row=0, column=0, sticky=tk.W, pady=5)
        tk.Scale(input_frame, from_=1, to=72, orient=tk.HORIZONTAL, 
                variable=self.tenure_var).grid(row=0, column=1, pady=5, padx=10)
        
        tk.Label(input_frame, text="Monthly Charges:").grid(row=1, column=0, sticky=tk.W, pady=5)
        tk.Scale(input_frame, from_=18, to=120, orient=tk.HORIZONTAL,
                variable=self.monthly_var).grid(row=1, column=1, pady=5, padx=10)
        
        tk.Label(input_frame, text="Contract:").grid(row=2, column=0, sticky=tk.W, pady=5)
        contract_combo = ttk.Combobox(input_frame, textvariable=self.contract_var,
                                     values=["Month-to-month", "One year", "Two year"])
        contract_combo.grid(row=2, column=1, pady=5, padx=10)
        
        tk.Label(input_frame, text="Internet Service:").grid(row=3, column=0, sticky=tk.W, pady=5)
        internet_combo = ttk.Combobox(input_frame, textvariable=self.internet_var,
                                    values=["DSL", "Fiber optic", "No"])
        internet_combo.grid(row=3, column=1, pady=5, padx=10)
        
        tk.Checkbutton(input_frame, text="Paperless Billing", 
                      variable=self.paperless_var).grid(row=4, column=0, columnspan=2, pady=5)
        
        tk.Button(input_frame, text="Predict Churn", command=self.manual_predict,
                 bg="#4CAF50", fg="white", width=15).grid(row=5, column=0, columnspan=2, pady=20)
        
        # Prediction results
        self.pred_result = tk.Label(self.predict_frame, text="Prediction will appear here",
                                  font=("Arial", 14), fg="blue")
        self.pred_result.pack(pady=20)
        
        # Batch prediction button
        tk.Button(self.predict_frame, text="Predict on Test Set", 
                 command=self.batch_predict, bg="#9C27B0", fg="white",
                 font=("Arial", 12), width=20).pack(pady=10)
        
        self.batch_result = scrolledtext.ScrolledText(self.predict_frame, height=15, width=100)
        self.batch_result.pack(pady=20, padx=20, fill=tk.BOTH, expand=True)
    
    def load_dataset(self):
        try:
            self.df = pd.read_csv('WA_Fn-UseC_-Telco-Customer-Churn.csv')
            self.status_label.config(text=f"✅ Dataset loaded: {self.df.shape[0]} rows, {self.df.shape[1]} columns", fg="green")
            
            info = f"""Dataset Info:
- Shape: {self.df.shape}
- Columns: {len(self.df.columns)}
- Target distribution:\n{self.df['Churn'].value_counts()}

First 5 rows:
{self.df.head().to_string()}
"""
            self.info_text.delete(1.0, tk.END)
            self.info_text.insert(tk.END, info)
            
        except Exception as e:
            messagebox.showerror("Error", f"Failed to load dataset: {str(e)}")
    
    def load_from_file(self):
        filename = filedialog.askopenfilename(filetypes=[("CSV files", "*.csv")])
        if filename:
            try:
                self.df = pd.read_csv(filename)
                self.status_label.config(text=f"✅ Dataset loaded: {self.df.shape[0]} rows, {self.df.shape[1]} columns", fg="green")
                self.load_dataset()  # Reuse the display logic
            except Exception as e:
                messagebox.showerror("Error", f"Failed to load file: {str(e)}")
    
    def show_info(self):
        if self.df is None:
            messagebox.showwarning("Warning", "Please load data first!")
            return
        
        info = f"""Dataset Information:
Shape: {self.df.shape}
Columns: {list(self.df.columns)}
Data Types:\n{self.df.dtypes.to_string()}
Missing Values:\n{self.df.isnull().sum().to_string()}
Churn Distribution:\n{self.df['Churn'].value_counts(normalize=True).to_string()}
"""
        self.explore_text.delete(1.0, tk.END)
        self.explore_text.insert(tk.END, info)
    
    def show_stats(self):
        if self.df is None:
            messagebox.showwarning("Warning", "Please load data first!")
            return
        
        stats = self.df.describe(include='all').to_string()
        self.explore_text.delete(1.0, tk.END)
        self.explore_text.insert(tk.END, f"Dataset Statistics:\n{stats}")
    
    def preprocess_data(self):
        if self.df is None:
            messagebox.showwarning("Warning", "Please load data first!")
            return
        
        try:
            # Clean TotalCharges
            self.df['TotalCharges'] = pd.to_numeric(self.df['TotalCharges'].replace(' ', np.nan), errors='coerce')
            
            # Drop customerID
            self.df = self.df.drop('customerID', axis=1)
            
            # Encode categorical variables
            categorical_cols = self.df.select_dtypes(include=['object']).columns.drop('Churn')
            for col in categorical_cols:
                le = LabelEncoder()
                self.df[col] = le.fit_transform(self.df[col].astype(str))
                self.label_encoders[col] = le
            
            # Encode target
            le_target = LabelEncoder()
            self.df['Churn'] = le_target.fit_transform(self.df['Churn'])
            self.label_encoders['Churn'] = le_target
            
            self.X = self.df.drop('Churn', axis=1)
            self.y = self.df['Churn']
            
            self.X_train, self.X_test, self.y_train, self.y_test = train_test_split(
                self.X, self.y, test_size=0.2, random_state=42, stratify=self.y
            )
            
            self.X_train_scaled = self.scaler.fit_transform(self.X_train)
            self.X_test_scaled = self.scaler.transform(self.X_test)
            
            self.explore_text.delete(1.0, tk.END)
            self.explore_text.insert(tk.END, f"""Preprocessing Complete!
- Training set: {self.X_train_scaled.shape}
- Test set: {self.X_test_scaled.shape}
- Features: {len(self.X.columns)}
- Classes: {len(np.unique(self.y))}
            
Ready for model training!""")
            
        except Exception as e:
            messagebox.showerror("Error", f"Preprocessing failed: {str(e)}")
    
    def train_model(self):
        if self.X_train_scaled is None:
            messagebox.showwarning("Warning", "Please preprocess data first!")
            return
        
        model_name = self.model_var.get()
        
        if model_name == "RandomForest":
            self.model = RandomForestClassifier(n_estimators=100, random_state=42, n_jobs=-1)
            self.model.fit(self.X_train, self.y_train)  # RandomForest doesn't need scaling
        else:
            self.model = LogisticRegression(random_state=42, max_iter=1000)
            self.model.fit(self.X_train_scaled, self.y_train)
        
        self.train_text.delete(1.0, tk.END)
        self.train_text.insert(tk.END, f"✅ Model trained successfully!\nModel: {model_name}\nReady for predictions!")
    
    def evaluate_model(self):
        if self.model is None:
            messagebox.showwarning("Warning", "Please train model first!")
            return
        
        try:
            if hasattr(self.model, 'predict_proba'):
                y_pred = self.model.predict(self.X_test if 'RandomForest' in str(type(self.model)) else self.X_test_scaled)
            else:
                y_pred = self.model.predict(self.X_test if 'RandomForest' in str(type(self.model)) else self.X_test_scaled)
            
            accuracy = accuracy_score(self.y_test, y_pred)
            report = classification_report(self.y_test, y_pred, 
                                         target_names=['No Churn', 'Churn'])
            
            self.train_text.delete(1.0, tk.END)
            self.train_text.insert(tk.END, f"""Model Evaluation Results:
Accuracy: {accuracy:.4f}

Classification Report:
{report}

Confusion Matrix:
{confusion_matrix(self.y_test, y_pred)}""")
            
        except Exception as e:
            messagebox.showerror("Error", f"Evaluation failed: {str(e)}")
    
    def show_feature_importance(self):
        if self.model is None:
            messagebox.showwarning("Warning", "Please train model first!")
            return
        
        try:
            if hasattr(self.model, 'feature_importances_'):
                importances = self.model.feature_importances_
                indices = np.argsort(importances)[::-1][:10]
                
                fig, ax = plt.subplots(figsize=(10, 6))
                features = self.X.columns[indices]
                ax.barh(range(len(features)), importances[indices])
                ax.set_yticks(range(len(features)))
                ax.set_yticklabels(features)
                ax.set_xlabel('Feature Importance')
                ax.set_title('Top 10 Feature Importances')
                
                plot_widget = FigureCanvasTkAgg(fig, self.train_frame)
                plot_widget.draw()
                plot_widget.get_tk_widget().pack()
                
        except Exception as e:
            messagebox.showerror("Error", f"Feature importance failed: {str(e)}")
    
    def manual_predict(self):
        if self.model is None:
            messagebox.showwarning("Warning", "Please train model first!")
            return
        
        try:
            # Create prediction input (simplified)
            input_data = np.zeros(len(self.X.columns))
            
            # Map input values to correct indices
            col_names = self.X.columns
            
            # Tenure
            tenure_idx = np.where(col_names == 'tenure')[0][0]
            input_data[tenure_idx] = self.tenure_var.get()
            
            # MonthlyCharges
            monthly_idx = np.where(col_names == 'MonthlyCharges')[0][0]
            input_data[monthly_idx] = self.monthly_var.get()
            
            # Contract
            contract_idx = np.where(col_names == 'Contract')[0][0]
            contract_val = self.label_encoders['Contract'].transform([self.contract_var.get()])[0]
            input_data[contract_idx] = contract_val
            
            # InternetService
            internet_idx = np.where(col_names == 'InternetService')[0][0]
            internet_val = self.label_encoders['InternetService'].transform([self.internet_var.get()])[0]
            input_data[internet_idx] = internet_val
            
            # PaperlessBilling
            paperless_idx = np.where(col_names == 'PaperlessBilling')[0][0]
            paperless_val = self.label_encoders['PaperlessBilling'].transform([str(self.paperless_var.get())])[0]
            input_data[paperless_idx] = paperless_val
            
            # Predict
            input_scaled = self.scaler.transform([input_data]) if 'RandomForest' not in str(type(self.model)) else [input_data]
            pred = self.model.predict(input_scaled)[0]
            prob = self.model.predict_proba(input_scaled)[0][1] if hasattr(self.model, 'predict_proba') else 0
            
            churn_status = 'Churn: YES' if pred == 1 else 'Churn: NO'
            risk = f"Churn Probability: {prob:.2%}"
            
            self.pred_result.config(text=f"{churn_status}\n{risk}", fg="red" if pred == 1 else "green")
            
        except Exception as e:
            messagebox.showerror("Error", f"Prediction failed: {str(e)}")
    
    def batch_predict(self):
        if self.model is None:
            messagebox.showwarning("Warning", "Please train model first!")
            return
        
        try:
            X_test_use = self.X_test if 'RandomForest' in str(type(self.model)) else self.X_test_scaled
            self.predictions = self.model.predict(X_test_use)
            probs = self.model.predict_proba(X_test_use)[:, 1] if hasattr(self.model, 'predict_proba') else None
            
            accuracy = accuracy_score(self.y_test, self.predictions)
            
            self.batch_result.delete(1.0, tk.END)
            self.batch_result.insert(tk.END, f"""Batch Prediction Results:
Test Accuracy: {accuracy:.4f}
Predictions made: {len(self.predictions)}

Churn Distribution in Test Set:
- Predicted No Churn: {sum(1-self.predictions)}
- Predicted Churn: {sum(self.predictions)}

Average Churn Probability: {np.mean(probs):.2%}""" if probs is not None else "Predictions completed!")
            
        except Exception as e:
            messagebox.showerror("Error", f"Batch prediction failed: {str(e)}")
    
    def visualize_data(self):
        if self.df is None:
            messagebox.showwarning("Warning", "Please load data first!")
            return
        
        try:
            fig, axes = plt.subplots(2, 2, figsize=(12, 10))
            
            # Churn distribution
            self.df['Churn'].value_counts().plot(kind='pie', ax=axes[0,0], autopct='%1.1f%%')
            axes[0,0].set_title('Churn Distribution')
            
            # Tenure vs Churn
            sns.boxplot(data=self.df, x='Churn', y='tenure', ax=axes[0,1])
            axes[0,1].set_title('Tenure by Churn')
            
            # Monthly charges vs Churn
            sns.boxplot(data=self.df, x='Churn', y='MonthlyCharges', ax=axes[1,0])
            axes[1,0].set_title('Monthly Charges by Churn')
            
            # Contract vs Churn
            pd.crosstab(self.df['Contract'], self.df['Churn']).plot(kind='bar', ax=axes[1,1])
            axes[1,1].set_title('Contract vs Churn')
            
            plt.tight_layout()
            
            plot_widget = FigureCanvasTkAgg(fig, self.explore_frame)
            plot_widget.draw()
            plot_widget.get_tk_widget().pack()
            
        except Exception as e:
            messagebox.showerror("Error", f"Visualization failed: {str(e)}")

if __name__ == "__main__":
    root = tk.Tk()
    app = CustomerChurnPredictor(root)
    root.mainloop()
