In [1]:
# Importing necessary libraries
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report


In [2]:
# Helper function to collect customer data
def get_user_input(feature_names):
    # Initialize empty customer data
    customer_data = {feature: 0 for feature in feature_names}
    
    print("\n--- Customer Information Form ---")
    
    # Let's try to handle basic input errors
    try:
        # Basic customer details
        print("\nBasic Information:")
        credit_score = int(input("Credit Score (300-850): "))
        if not (300 <= credit_score <= 850):
            print("Warning: Credit score seems unusual!")
        customer_data['CreditScore'] = credit_score

        age = int(input("Age: "))
        if not (18 <= age <= 100):
            print("Warning: Please verify age input!")
        customer_data['Age'] = age

        # Banking details
        print("\nBanking Details:")
        customer_data['Tenure'] = float(input("Years with bank: "))
        customer_data['Balance'] = float(input("Account balance ($): "))
        customer_data['NumOfProducts'] = int(input("Number of bank products (1-4): "))
        
        # Simple yes/no questions
        print("\nAdditional Information:")
        has_card = input("Has credit card? (y/n): ").lower()
        customer_data['HasCrCard'] = 1 if has_card == 'y' else 0
        
        is_active = input("Active member? (y/n): ").lower()
        customer_data['IsActiveMember'] = 1 if is_active == 'y' else 0
        
        customer_data['EstimatedSalary'] = float(input("Estimated salary ($): "))
        
        # Location and gender
        geography = input("\nCountry (France/Spain/Germany): ").capitalize()
        for col in [c for c in feature_names if c.startswith('Geography_')]:
            country = col.split('_')[1]
            customer_data[col] = 1 if geography == country else 0
        
        gender = input("Gender (M/F): ").upper()
        gender = 'Male' if gender == 'M' else 'Female'
        for col in [c for c in feature_names if c.startswith('Gender_')]:
            gen = col.split('_')[1]
            customer_data[col] = 1 if gender == gen else 0
    
    except ValueError:
        print("Error: Please enter valid numerical values!")
        return None

    # Return data in correct format
    return pd.DataFrame([customer_data])[feature_names]


In [None]:
def main():
    # Load and prep the data
    print("Loading customer database...")
    df = pd.read_csv('Churn_Modelling.csv')
    
    # Clean up unnecessary columns
    cleaned_df = df.drop(['Surname', 'RowNumber', 'CustomerId'], axis=1)
    processed_df = pd.get_dummies(cleaned_df, columns=['Geography', 'Gender'])

    # Split features and target
    X = processed_df.drop('Exited', axis=1)
    y = processed_df['Exited']
    feature_names = X.columns.tolist()

    # Train/test split and scaling
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
    
    scaler = StandardScaler()
    X_train_scaled = scaler.fit_transform(X_train)
    X_test_scaled = scaler.transform(X_test)

    # Train model
    print("Training model (this might take a minute)...")
    model = RandomForestClassifier(n_estimators=100, random_state=42, n_jobs=-1)
    model.fit(X_train_scaled, y_train)
    print("Model ready!")

    # Main program loop
    while True:
        print("\n=== Customer Churn Prediction Tool ===")
        print("1. Predict customer churn")
        print("2. Check model accuracy")
        print("3. View important factors")
        print("4. Quit")
        
        choice = input("\nWhat would you like to do? (1-4): ")

        if choice == '1':
            # Get and process customer data
            customer_data = get_user_input(feature_names)
            if customer_data is not None:
                # Make prediction
                scaled_data = scaler.transform(customer_data)
                prediction = model.predict(scaled_data)[0]
                churn_prob = model.predict_proba(scaled_data)[0][1]
                
                # Show results
                print("\n=== Prediction Results ===")
                if prediction == 1:
                    print("Warning: Customer likely to leave!")
                else:
                    print("Good news: Customer likely to stay!")
                print(f"Churn probability: {churn_prob:.1%}")

        elif choice == '2':
            # Show model performance
            test_preds = model.predict(X_test_scaled)
            print(f"\nModel accuracy: {accuracy_score(y_test, test_preds):.1%}")
            
            # Visualize results
            plt.figure(figsize=(8, 6))
            sns.heatmap(confusion_matrix(y_test, test_preds), 
                       annot=True, fmt='d', cmap='Blues')
            plt.title('Prediction Results Overview')
            plt.xlabel('Predicted')
            plt.ylabel('Actual')
            plt.show()

        elif choice == '3':
            # Show feature importance
            importance = pd.DataFrame({
                'Feature': feature_names,
                'Importance': model.feature_importances_
            }).sort_values('Importance', ascending=False)
            
            plt.figure(figsize=(10, 6))
            sns.barplot(data=importance.head(10), x='Importance', y='Feature')
            plt.title('Top Factors in Customer Churn')
            plt.tight_layout()
            plt.show()

        elif choice == '4':
            print("\nGoodbye!")
            break

        else:
            print("\nInvalid choice, please try again.")

if __name__ == "__main__":
    main()

Loading customer database...
Training model (this might take a minute)...
Model ready!

=== Customer Churn Prediction Tool ===
1. Predict customer churn
2. Check model accuracy
3. View important factors
4. Quit
