In [9]:
# Business Analytics Model: Bank Transactions Analysis
# Module 4 Assignment

"""
Assignment Overview:
- Develop business analytics model using cleaned bank transactions data
- Implement customer segmentation and behavior prediction
- Evaluate model performance with relevant metrics
- Document entire process in Jupyter Notebook
"""

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.cluster import KMeans
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split, cross_val_score
from sklearn.metrics import classification_report, confusion_matrix, silhouette_score
from sklearn.linear_model import LogisticRegression
from sklearn.svm import SVC
import warnings
warnings.filterwarnings('ignore')

print("✅ Business Analytics Model Development Started")
print("📊 Module 4 Assignment: Bank Transactions Analysis")

✅ Business Analytics Model Development Started
📊 Module 4 Assignment: Bank Transactions Analysis


In [11]:
print("🔧 STEP 1: DATA LOADING AND FINAL PREPARATION")

import pandas as pd
import os
import sys

# Paths
cleaned_csv_path = r"C:\Users\FORUM\Desktop\Nexford Submissions for MSDA\bank_transactions_ready.csv"
raw_csv_path = r"C:\Users\FORUM\Desktop\Nexford Submissions for MSDA\bank_transactions.csv"
src_path = r"C:\Users\FORUM\Desktop\Nexford Submissions for MSDA\src"

# Step 1: Load cleaned CSV if it exists
if os.path.exists(cleaned_csv_path):
    df = pd.read_csv(cleaned_csv_path)
    print(f"✅ Cleaned data loaded successfully: {df.shape}")
else:
    print("⚠️ Cleaned data not found. Loading raw data...")
    df_raw = pd.read_csv(raw_csv_path)
    
    # Step 2: Attempt to load cleaning modules if they exist
    if os.path.exists(src_path):
        sys.path.append(src_path)
        try:
            from data_cleaning import clean_data
            from feature_engineering import engineer_features
            df = engineer_features(clean_data(df_raw))
            print(f"✅ Raw data processed successfully: {df.shape}")
        except ModuleNotFoundError:
            print("⚠️ data_cleaning or feature_engineering modules not found. Using raw data as-is.")
            df = df_raw
    else:
        print("⚠️ src folder not found. Using raw data as-is.")
        df = df_raw

# Step 3: Dataset overview
print("\n📋 Dataset Overview:")
print(f"Shape: {df.shape}")
print(f"Columns: {df.columns.tolist()}")

# Step 4: Data quality checks
print("\n🔍 Final Data Quality Check:")
print(f"Missing values: {df.isnull().sum().sum()}")
print(f"Duplicate rows: {df.duplicated().sum()}")

# Step 5: Display sample
print("\n📊 Sample Data (First 3 rows):")
display(df.head(3))


🔧 STEP 1: DATA LOADING AND FINAL PREPARATION
⚠️ Cleaned data not found. Loading raw data...
⚠️ src folder not found. Using raw data as-is.

📋 Dataset Overview:
Shape: (1048567, 9)
Columns: ['TransactionID', 'CustomerID', 'CustomerDOB', 'CustGender', 'CustLocation', 'CustAccountBalance', 'TransactionDate', 'TransactionTime', 'TransactionAmount (INR)']

🔍 Final Data Quality Check:
Missing values: 7017
Duplicate rows: 0

📊 Sample Data (First 3 rows):


Unnamed: 0,TransactionID,CustomerID,CustomerDOB,CustGender,CustLocation,CustAccountBalance,TransactionDate,TransactionTime,TransactionAmount (INR)
0,T1,C5841053,10/1/94,F,JAMSHEDPUR,17819.05,2/8/16,143207,25.0
1,T2,C2142763,4/4/57,M,JHAJJAR,2270.69,2/8/16,141858,27999.0
2,T3,C4417068,26/11/96,F,MUMBAI,17874.44,2/8/16,142712,459.0


In [12]:
# Step 2: Define Business Objectives and Implementation Plan
print("🎯 STEP 2: BUSINESS OBJECTIVES AND IMPLEMENTATION PLAN")

business_objectives = """
BUSINESS ANALYTICS OBJECTIVES:

1. CUSTOMER SEGMENTATION
   - Group customers based on transaction behavior
   - Identify high-value customer segments
   - Enable targeted marketing strategies

2. TRANSACTION BEHAVIOR PREDICTION
   - Predict high-value transactions
   - Identify patterns for fraud detection
   - Forecast customer transaction patterns

3. CUSTOMER LIFETIME VALUE ANALYSIS
   - Segment customers by potential value
   - Identify retention opportunities
   - Optimize resource allocation

MODELING APPROACH:
1. Unsupervised Learning: K-means clustering for customer segmentation
2. Supervised Learning: Classification for high-value transaction prediction
3. Ensemble Methods: Random Forest for feature importance analysis
"""

print(business_objectives)

# Step-by-step implementation plan
implementation_plan = """
📋 STEP-BY-STEP IMPLEMENTATION PLAN:

PHASE 1: DATA PREPARATION
✓ Load and validate cleaned data
✓ Feature selection for modeling
✓ Data normalization and encoding

PHASE 2: CUSTOMER SEGMENTATION (K-means Clustering)
- Select clustering features
- Determine optimal number of clusters
- Apply K-means algorithm
- Analyze and interpret segments

PHASE 3: HIGH-VALUE TRANSACTION PREDICTION
- Create binary classification target
- Split data into training/testing sets
- Train multiple classification models
- Evaluate and select best performer

PHASE 4: MODEL EVALUATION & BUSINESS INSIGHTS
- Analyze model performance metrics
- Extract feature importance
- Generate business recommendations
- Document findings and limitations
"""

print(implementation_plan)

🎯 STEP 2: BUSINESS OBJECTIVES AND IMPLEMENTATION PLAN

BUSINESS ANALYTICS OBJECTIVES:

1. CUSTOMER SEGMENTATION
   - Group customers based on transaction behavior
   - Identify high-value customer segments
   - Enable targeted marketing strategies

2. TRANSACTION BEHAVIOR PREDICTION
   - Predict high-value transactions
   - Identify patterns for fraud detection
   - Forecast customer transaction patterns

3. CUSTOMER LIFETIME VALUE ANALYSIS
   - Segment customers by potential value
   - Identify retention opportunities
   - Optimize resource allocation

MODELING APPROACH:
1. Unsupervised Learning: K-means clustering for customer segmentation
2. Supervised Learning: Classification for high-value transaction prediction
3. Ensemble Methods: Random Forest for feature importance analysis


📋 STEP-BY-STEP IMPLEMENTATION PLAN:

PHASE 1: DATA PREPARATION
✓ Load and validate cleaned data
✓ Feature selection for modeling
✓ Data normalization and encoding

PHASE 2: CUSTOMER SEGMENTATION (K-means 

In [None]:
# Step 3: Feature Selection and Preprocessing
print("⚙️ STEP 3: FEATURE SELECTION AND PREPROCESSING")

# Select features for modeling
modeling_features = [
    'TransactionAmount (INR)', 
    'CustAccountBalance',
    'CustomerAge',
    'TransactionHour',
    'BalanceToTransactionRatio',
    'IsMetroCity',
    'HighValueTransaction'
]

# Only use available features
available_features = [col for col in modeling_features if col in df.columns]
print(f"Available features for modeling: {available_features}")

# Create modeling dataset
X = df[available_features].copy()

# Handle categorical variables
categorical_cols = X.select_dtypes(include=['object', 'bool']).columns
print(f"Categorical columns to encode: {categorical_cols.tolist()}")

# Encode categorical variables
label_encoders = {}
for col in categorical_cols:
    if col in X.columns:
        le = LabelEncoder()
        X[col] = le.fit_transform(X[col].astype(str))
        label_encoders[col] = le

# Handle missing values if any
X = X.fillna(X.median())

print(f"Final modeling dataset shape: {X.shape}")
print("\n📊 Modeling Features Summary:")
print(X.describe())

# Feature correlation analysis
plt.figure(figsize=(10, 8))
correlation_matrix = X.corr()
sns.heatmap(correlation_matrix, annot=True, cmap='coolwarm', center=0, 
            square=True, fmt='.2f', linewidths=0.5)
plt.title('Feature Correlation Matrix')
plt.tight_layout()
plt.show()

In [None]:
# Step 4: Customer Segmentation using K-means Clustering
print("👥 STEP 4: CUSTOMER SEGMENTATION MODEL")

# Select features for clustering (remove target variable if present)
clustering_features = [col for col in available_features if col != 'HighValueTransaction']
X_cluster = X[clustering_features].copy()

# Standardize features for clustering
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X_cluster)

# Determine optimal number of clusters using Elbow Method
print("🔍 Determining optimal number of clusters...")

wcss = []  # Within-cluster sum of squares
silhouette_scores = []
cluster_range = range(2, 8)

for k in cluster_range:
    kmeans = KMeans(n_clusters=k, random_state=42, n_init=10)
    kmeans.fit(X_scaled)
    wcss.append(kmeans.inertia_)
    
    # Calculate silhouette score
    if k > 1:  # Silhouette score requires at least 2 clusters
        score = silhouette_score(X_scaled, kmeans.labels_)
        silhouette_scores.append(score)

# Plot Elbow Method
fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(15, 5))

# Elbow curve
ax1.plot(cluster_range, wcss, 'bo-', linewidth=2, markersize=8)
ax1.set_xlabel('Number of Clusters')
ax1.set_ylabel('Within-Cluster Sum of Squares (WCSS)')
ax1.set_title('Elbow Method for Optimal K')
ax1.grid(True, alpha=0.3)

# Silhouette scores
ax2.plot(range(2, 8), silhouette_scores, 'go-', linewidth=2, markersize=8)
ax2.set_xlabel('Number of Clusters')
ax2.set_ylabel('Silhouette Score')
ax2.set_title('Silhouette Scores for Different K Values')
ax2.grid(True, alpha=0.3)

plt.tight_layout()
plt.show()

# Choose optimal K (based on elbow and silhouette score)
optimal_k = 4  # You can adjust this based on the plots
print(f"🎯 Selected optimal number of clusters: {optimal_k}")

# Apply K-means with optimal K
kmeans = KMeans(n_clusters=optimal_k, random_state=42, n_init=10)
cluster_labels = kmeans.fit_predict(X_scaled)

# Add cluster labels to original dataframe
df['CustomerSegment'] = cluster_labels
X['CustomerSegment'] = cluster_labels

print(f"✅ Customer segmentation completed with {optimal_k} clusters")
print(f"Cluster distribution:\n{df['CustomerSegment'].value_counts().sort_index()}")

In [None]:
# Step 5: Customer Segment Analysis
print("📊 STEP 5: CUSTOMER SEGMENT ANALYSIS")

# Analyze characteristics of each cluster
segment_analysis = df.groupby('CustomerSegment').agg({
    'TransactionAmount (INR)': ['mean', 'median', 'count'],
    'CustAccountBalance': ['mean', 'median'],
    'CustomerAge': ['mean', 'median'],
    'TransactionHour': ['mean', 'median'],
    'BalanceToTransactionRatio': ['mean', 'median'],
    'IsMetroCity': 'mean'  # Proportion in metro cities
}).round(2)

print("📈 Customer Segment Profiles:")
display(segment_analysis)

# Visualize segment characteristics
fig, axes = plt.subplots(2, 3, figsize=(18, 12))

# Plot 1: Average Transaction Amount by Segment
segment_analysis[('TransactionAmount (INR)', 'mean')].plot(kind='bar', ax=axes[0,0], color='skyblue')
axes[0,0].set_title('Average Transaction Amount by Segment')
axes[0,0].set_ylabel('Amount (INR)')

# Plot 2: Average Account Balance by Segment
segment_analysis[('CustAccountBalance', 'mean')].plot(kind='bar', ax=axes[0,1], color='lightgreen')
axes[0,1].set_title('Average Account Balance by Segment')
axes[0,1].set_ylabel('Balance (INR)')

# Plot 3: Average Age by Segment
segment_analysis[('CustomerAge', 'mean')].plot(kind='bar', ax=axes[0,2], color='gold')
axes[0,2].set_title('Average Customer Age by Segment')
axes[0,2].set_ylabel('Age')

# Plot 4: Metro City Proportion by Segment
segment_analysis[('IsMetroCity', 'mean')].plot(kind='bar', ax=axes[1,0], color='lightcoral')
axes[1,0].set_title('Metro City Proportion by Segment')
axes[1,0].set_ylabel('Proportion')

# Plot 5: Transaction Count by Segment
segment_analysis[('TransactionAmount (INR)', 'count')].plot(kind='bar', ax=axes[1,1], color='violet')
axes[1,1].set_title('Transaction Count by Segment')
axes[1,1].set_ylabel('Count')

# Plot 6: Balance to Transaction Ratio
segment_analysis[('BalanceToTransactionRatio', 'mean')].plot(kind='bar', ax=axes[1,2], color='orange')
axes[1,2].set_title('Balance to Transaction Ratio by Segment')
axes[1,2].set_ylabel('Ratio')

plt.tight_layout()
plt.show()

# Create segment profiles
segment_profiles = """
🎯 CUSTOMER SEGMENT PROFILES:

Segment 0: [Description based on your analysis]
- Average Transaction: ₹{}
- Average Balance: ₹{}
- Typical Age: {}
- Metro City: {}%

Segment 1: [Description based on your analysis]
- Average Transaction: ₹{}
- Average Balance: ₹{}
- Typical Age: {}
- Metro City: {}%

Segment 2: [Description based on your analysis]
- Average Transaction: ₹{}
- Average Balance: ₹{}
- Typical Age: {}
- Metro City: {}%

Segment 3: [Description based on your analysis]
- Average Transaction: ₹{}
- Average Balance: ₹{}
- Typical Age: {}
- Metro City: {}%
""".format(
    segment_analysis[('TransactionAmount (INR)', 'mean')].iloc[0],
    segment_analysis[('CustAccountBalance', 'mean')].iloc[0],
    segment_analysis[('CustomerAge', 'mean')].iloc[0],
    segment_analysis[('IsMetroCity', 'mean')].iloc[0] * 100,
    # Repeat for other segments...
)

print(segment_profiles)

In [None]:
# Step 6: High-Value Transaction Prediction
print("💰 STEP 6: HIGH-VALUE TRANSACTION PREDICTION MODEL")

# Prepare data for classification
if 'HighValueTransaction' in df.columns:
    # Use HighValueTransaction as target
    y = df['HighValueTransaction']
else:
    # Create target variable (top 20% transactions by amount)
    high_value_threshold = df['TransactionAmount (INR)'].quantile(0.8)
    y = (df['TransactionAmount (INR)'] > high_value_threshold).astype(int)
    df['HighValueTransaction'] = y

print(f"Target variable distribution:\n{y.value_counts()}")
print(f"High-value transactions: {y.sum()} ({y.mean()*100:.1f}% of total)")

# Prepare features (exclude target and segment for training)
X_classification = X.drop(['HighValueTransaction', 'CustomerSegment'], axis=1, errors='ignore')

# Split data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(
    X_classification, y, test_size=0.3, random_state=42, stratify=y
)

print(f"Training set: {X_train.shape}")
print(f"Testing set: {X_test.shape}")

# Train multiple classification models
models = {
    'Random Forest': RandomForestClassifier(n_estimators=100, random_state=42),
    'Logistic Regression': LogisticRegression(random_state=42),
    'SVM': SVC(random_state=42)
}

# Evaluate each model
model_results = {}

print("🚀 Training classification models...")

for name, model in models.items():
    print(f"\n📊 Training {name}...")
    
    # Train model
    model.fit(X_train, y_train)
    
    # Make predictions
    y_pred = model.predict(X_test)
    
    # Calculate metrics
    from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score
    
    accuracy = accuracy_score(y_test, y_pred)
    precision = precision_score(y_test, y_pred)
    recall = recall_score(y_test, y_pred)
    f1 = f1_score(y_test, y_pred)
    
    model_results[name] = {
        'model': model,
        'accuracy': accuracy,
        'precision': precision,
        'recall': recall,
        'f1_score': f1
    }
    
    print(f"✅ {name} Results:")
    print(f"   Accuracy:  {accuracy:.3f}")
    print(f"   Precision: {precision:.3f}")
    print(f"   Recall:    {recall:.3f}")
    print(f"   F1-Score:  {f1:.3f}")

# Compare model performance
results_df = pd.DataFrame(model_results).T
results_df = results_df[['accuracy', 'precision', 'recall', 'f1_score']]

print("\n🏆 MODEL PERFORMANCE COMPARISON:")
display(results_df.sort_values('f1_score', ascending=False))

# Select best model based on F1-score
best_model_name = results_df['f1_score'].idxmax()
best_model = model_results[best_model_name]['model']
print(f"\n🎯 Best Performing Model: {best_model_name}")

In [None]:
# Step 7: Model Evaluation and Interpretation
print("📈 STEP 7: MODEL EVALUATION AND INTERPRETATION")

# Detailed evaluation of best model
print(f"\n🔍 Detailed Evaluation for {best_model_name}:")

# Confusion Matrix
y_pred_best = best_model.predict(X_test)
cm = confusion_matrix(y_test, y_pred_best)

plt.figure(figsize=(10, 4))

plt.subplot(1, 2, 1)
sns.heatmap(cm, annot=True, fmt='d', cmap='Blues', 
            xticklabels=['Not High Value', 'High Value'],
            yticklabels=['Not High Value', 'High Value'])
plt.title(f'Confusion Matrix - {best_model_name}')
plt.ylabel('Actual')
plt.xlabel('Predicted')

# Classification Report
plt.subplot(1, 2, 2)
cr = classification_report(y_test, y_pred_best, output_dict=True)
cr_df = pd.DataFrame(cr).transpose()
sns.heatmap(cr_df.iloc[:-1, :-1], annot=True, cmap='viridis')
plt.title('Classification Report Heatmap')

plt.tight_layout()
plt.show()

print("📋 Classification Report:")
print(classification_report(y_test, y_pred_best))

# Feature Importance (for tree-based models)
if hasattr(best_model, 'feature_importances_'):
    print("\n🎯 FEATURE IMPORTANCE ANALYSIS:")
    feature_importance = pd.DataFrame({
        'feature': X_classification.columns,
        'importance': best_model.feature_importances_
    }).sort_values('importance', ascending=False)
    
    plt.figure(figsize=(10, 6))
    sns.barplot(data=feature_importance.head(10), x='importance', y='feature')
    plt.title(f'Top 10 Feature Importance - {best_model_name}')
    plt.tight_layout()
    plt.show()
    
    print("Top 10 Most Important Features:")
    display(feature_importance.head(10))

In [None]:
# Step 8: Business Insights and Recommendations
print("💡 STEP 8: BUSINESS INSIGHTS AND RECOMMENDATIONS")

# Generate business insights
total_customers = len(df)
high_value_customers = df[df['HighValueTransaction'] == 1]['CustomerID'].nunique()

insights = f"""
📊 BUSINESS INSIGHTS FROM MODEL ANALYSIS:

1. CUSTOMER SEGMENTATION RESULTS:
   - {optimal_k} distinct customer segments identified
   - Segment sizes vary from {df['CustomerSegment'].value_counts().min()} to {df['CustomerSegment'].value_counts().max()} customers
   - Clear differentiation in transaction patterns and balances

2. HIGH-VALUE TRANSACTION PREDICTION:
   - Best model: {best_model_name} with F1-score of {model_results[best_model_name]['f1_score']:.3f}
   - Can predict high-value transactions with {model_results[best_model_name]['precision']:.1%} precision
   - {high_value_customers} unique high-value customers identified ({high_value_customers/total_customers*100:.1f}% of total)

3. KEY PREDICTORS OF HIGH-VALUE TRANSACTIONS:
"""

if 'feature_importance' in locals():
    top_features = feature_importance.head(3)['feature'].tolist()
    insights += f"   - {top_features[0]}\n   - {top_features[1]}\n   - {top_features[2]}\n"

insights += """
🎯 STRATEGIC RECOMMENDATIONS:

1. TARGETED MARKETING:
   - Develop segment-specific marketing campaigns
   - Focus on high-value customer retention
   - Personalize communication based on segment characteristics

2. RISK MANAGEMENT:
   - Use prediction model for transaction monitoring
   - Implement alerts for unusual high-value transactions
   - Enhance fraud detection capabilities

3. CUSTOMER SERVICE OPTIMIZATION:
   - Allocate resources based on customer value
   - Develop segment-specific service protocols
   - Implement proactive engagement for high-value segments

4. PRODUCT DEVELOPMENT:
   - Create tailored financial products for each segment
   - Develop premium services for high-value customers
   - Optimize digital banking features based on usage patterns
"""

print(insights)

In [None]:
# Step 9: Model Deployment Preparation
print("🚀 STEP 9: MODEL DEPLOYMENT PREPARATION")

# Save the trained models and preprocessing objects
import joblib
import os

# Create models directory if it doesn't exist
os.makedirs('../models', exist_ok=True)

# Save best model
joblib.dump(best_model, '../models/high_value_prediction_model.pkl')

# Save preprocessing objects
joblib.dump(scaler, '../models/scaler.pkl')
joblib.dump(label_encoders, '../models/label_encoders.pkl')

# Save cluster model
joblib.dump(kmeans, '../models/customer_segmentation_model.pkl')

print("✅ Models saved successfully:")
print("   - ../models/high_value_prediction_model.pkl")
print("   - ../models/scaler.pkl") 
print("   - ../models/label_encoders.pkl")
print("   - ../models/customer_segmentation_model.pkl")

# Create prediction function for demonstration
def predict_high_value_transaction(features):
    """
    Predict if a transaction is high-value
    """
    # Load model and preprocessing objects
    model = joblib.load('../models/high_value_prediction_model.pkl')
    scaler = joblib.load('../models/scaler.pkl')
    
    # Preprocess features and make prediction
    features_scaled = scaler.transform(features.reshape(1, -1))
    prediction = model.predict(features_scaled)
    probability = model.predict_proba(features_scaled)
    
    return prediction[0], probability[0][1]

print("\n🎯 Model Deployment Ready!")
print("   Models can be integrated into banking systems for:")
print("   - Real-time transaction monitoring")
print("   - Customer segmentation dashboards")
print("   - Marketing campaign optimization")

In [None]:
# Step 10: Final Summary and Documentation
print("📝 STEP 10: FINAL SUMMARY AND DOCUMENTATION")

# Generate model performance summary
summary = f"""
🎉 BUSINESS ANALYTICS MODEL DEVELOPMENT COMPLETE

MODELING SUMMARY:

1. DATA PREPARATION:
   - Records Processed: {len(df):,}
   - Features Used: {len(available_features)}
   - Data Quality: {((len(df) - df.isnull().sum().sum()) / len(df) * 100):.1f}% complete

2. CUSTOMER SEGMENTATION:
   - Algorithm: K-means Clustering
   - Optimal Clusters: {optimal_k}
   - Silhouette Score: {silhouette_score(X_scaled, cluster_labels):.3f}
   - Segments Identified: {df['CustomerSegment'].nunique()}

3. HIGH-VALUE TRANSACTION PREDICTION:
   - Best Model: {best_model_name}
   - Accuracy: {model_results[best_model_name]['accuracy']:.3f}
   - Precision: {model_results[best_model_name]['precision']:.3f}
   - Recall: {model_results[best_model_name]['recall']:.3f}
   - F1-Score: {model_results[best_model_name]['f1_score']:.3f}

4. BUSINESS IMPACT:
   - Enabled data-driven customer segmentation
   - Provided high-value transaction prediction capability
   - Delivered actionable insights for marketing and risk management
   - Established foundation for advanced analytics

NEXT STEPS:
1. Integrate models into production systems
2. Develop real-time monitoring dashboards
3. Implement A/B testing for model improvements
4. Expand model features with additional customer data

MODEL LIMITATIONS:
- Based on historical transaction data only
- Does not include external economic factors
- Requires regular retraining with new data
- Performance may vary with changing customer behavior
"""

print(summary)

# Save model results to file
results_output = {
    'segmentation_performance': {
        'optimal_clusters': optimal_k,
        'silhouette_score': silhouette_score(X_scaled, cluster_labels),
        'cluster_distribution': df['CustomerSegment'].value_counts().to_dict()
    },
    'classification_performance': {
        'best_model': best_model_name,
        'accuracy': model_results[best_model_name]['accuracy'],
        'precision': model_results[best_model_name]['precision'],
        'recall': model_results[best_model_name]['recall'],
        'f1_score': model_results[best_model_name]['f1_score']
    },
    'business_metrics': {
        'total_customers': total_customers,
        'high_value_customers': high_value_customers,
        'high_value_percentage': (high_value_customers / total_customers * 100)
    }
}

import json
with open('../reports/model_performance_metrics.json', 'w') as f:
    json.dump(results_output, f, indent=2)

print("✅ Model performance metrics saved to '../reports/model_performance_metrics.json'")
print("\n🎊 MODULE 4 ASSIGNMENT COMPLETED SUCCESSFULLY!")

In [None]:
# Add the new model notebook and files to GitHub
git add notebooks/business_analytics_model.ipynb
git add models/
git add reports/model_performance_metrics.json

git commit -m "Add Module 4: Business analytics model with customer segmentation and prediction"
git push origin main