In [10]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.ensemble import RandomForestRegressor
from sklearn.tree import DecisionTreeRegressor
from xgboost import XGBRegressor
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score
import numpy as np

# Load the data
data = pd.read_csv('customer_data.csv')

# Data preprocessing
# Drop name column as it's not useful for prediction
data = data.drop('name', axis=1)

# Encode categorical variables
label_encoders = {}
categorical_cols = ['gender', 'education', 'country']
for col in categorical_cols:
    le = LabelEncoder()
    data[col] = le.fit_transform(data[col])
    label_encoders[col] = le

# Feature engineering - create interaction terms
data['income_purchase_freq'] = data['income'] * data['purchase_frequency']

# Split into features and target
X = data.drop('spending', axis=1)
y = data['spending']

# Split into train and test sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=21)

# Scale numerical features
scaler = StandardScaler()
numerical_cols = ['age', 'income', 'purchase_frequency', 'income_purchase_freq']
X_train[numerical_cols] = scaler.fit_transform(X_train[numerical_cols])
X_test[numerical_cols] = scaler.transform(X_test[numerical_cols])
# Initialize models
models = {
    'Decision Tree': DecisionTreeRegressor(random_state=42),
    'Random Forest': RandomForestRegressor(random_state=42),
    'XGBoost': XGBRegressor(random_state=42)
}

# Train and evaluate models
results = {}
for name, model in models.items():
    # Train the model
    model.fit(X_train, y_train)
    
    # Make predictions
    y_pred = model.predict(X_test)
    
    # Calculate metrics
    mae = mean_absolute_error(y_test, y_pred)
    mse = mean_squared_error(y_test, y_pred)
    rmse = np.sqrt(mse)
    r2 = r2_score(y_test, y_pred)
    
    # Store results
    results[name] = {
        'MAE': mae,
        'RMSE': rmse,
        'R2': r2
    }

# Display results
for model_name, metrics in results.items():
    print(f"\n{model_name} Performance:")
    print(f"Mean Absolute Error: {metrics['MAE']:.2f}")
    print(f"Root Mean Squared Error: {metrics['RMSE']:.2f}")
    print(f"R-squared: {metrics['R2']:.2f}")


Decision Tree Performance:
Mean Absolute Error: 983.92
Root Mean Squared Error: 1390.13
R-squared: 0.94

Random Forest Performance:
Mean Absolute Error: 551.63
Root Mean Squared Error: 776.65
R-squared: 0.98

XGBoost Performance:
Mean Absolute Error: 381.84
Root Mean Squared Error: 568.82
R-squared: 0.99


In [11]:
import gradio as gr
import joblib
import pandas as pd
import numpy as np

# Load model and preprocessing objects
model = joblib.load('model/xgb_spend_predictor.pkl')
label_encoders = joblib.load('model/label_encoders.pkl')
scaler = joblib.load('model/scaler.pkl')

# List of countries from your original data (sample)
COUNTRIES = [
    "United States of America", "United Kingdom", "Canada", "Australia",
    "Germany", "France", "Japan", "India", "Brazil", "Singapore",
    "Slovenia", "Aruba", "Cyprus", "Palau", "Zambia"  # Add more as needed
]

def predict_spending(age, gender, education, income, country, purchase_frequency):
    try:
        # Create DataFrame
        input_data = pd.DataFrame([[
            int(age),
            gender,
            education,
            float(income),
            country,
            float(purchase_frequency)
        ]], columns=['age', 'gender', 'education', 'income', 'country', 'purchase_frequency'])
        
        # Encode categorical variables
        for col in ['gender', 'education', 'country']:
            le = label_encoders[col]
            if country in le.classes_:
                input_data[col] = le.transform([input_data[col].iloc[0]])[0]
            else:
                # Handle unseen countries by using the most common class
                input_data[col] = le.transform([le.classes_[0]])[0]
        
        # Feature engineering
        input_data['income_purchase_freq'] = input_data['income'] * input_data['purchase_frequency']
        
        # Scale numerical features
        numerical_cols = ['age', 'income', 'purchase_frequency', 'income_purchase_freq']
        input_data[numerical_cols] = scaler.transform(input_data[numerical_cols])
        
        # Make prediction
        prediction = model.predict(input_data)
        
        return f"${round(float(prediction[0]), 2):,.2f}"
    
    except Exception as e:
        return f"Error: {str(e)}"

# Create Gradio interface
with gr.Blocks(theme=gr.themes.Soft()) as demo:
    gr.Markdown("# 🛍️ Customer Spend Predictor")
    gr.Markdown("Predict annual customer spending based on demographic and behavioral data")
    
    with gr.Row():
        with gr.Column():
            age = gr.Slider(18, 100, label="Age", value=30)
            gender = gr.Radio(["Male", "Female"], label="Gender")
            education = gr.Dropdown(
                ["High School", "Bachelor", "Master", "PhD"],
                label="Education Level"
            )
        with gr.Column():
            income = gr.Number(label="Annual Income ($)", value=50000)
            country = gr.Dropdown(COUNTRIES, label="Country")
            purchase_frequency = gr.Slider(
                0, 1, step=0.1, label="Purchase Frequency (per month)", value=0.5
            )
    
    submit_btn = gr.Button("Predict Spending", variant="primary")
    
    with gr.Row():
        output = gr.Label(label="Predicted Annual Spending")
        gr.Markdown("""
        <div style='margin-top: 20px; font-size: 0.9em; color: #666;'>
        <b>Note:</b> This model uses XGBoost with 99% accuracy
        </div>
        """)
    
    submit_btn.click(
        fn=predict_spending,
        inputs=[age, gender, education, income, country, purchase_frequency],
        outputs=output
    )
    
    gr.Examples(
        examples=[
            [35, "Male", "Bachelor", 75000, "United States of America", 0.7],
            [42, "Female", "Master", 90000, "United Kingdom", 0.5],
            [28, "Female", "PhD", 120000, "Canada", 0.9]
        ],
        fn=predict_spending,
        inputs=[age, gender, education, income, country, purchase_frequency],
        outputs=output,
        cache_examples=True
    )

demo.launch(share=True)

* Running on local URL:  http://127.0.0.1:7863
Caching examples at: 'C:\Users\Gokulraj R\MBA_SEM_3\Assignment\Customer Prediction\.gradio\cached_examples\75'
* Running on public URL: https://b67db85c08ab5496be.gradio.live

This share link expires in 72 hours. For free permanent hosting and GPU upgrades, run `gradio deploy` from the terminal in the working directory to deploy to Hugging Face Spaces (https://huggingface.co/spaces)


