## Step 1: Install Required Packages

In [None]:
"""
Run this notebook to execute the complete BI Dashboard pipeline
using the modular Python files
"""

In [None]:
# Install required packages
!pip install -q pandas numpy scikit-learn xgboost prophet matplotlib seaborn

## Step 2: Import Modules

In [None]:
# Import our custom modules
from preprocessing import DataPreprocessor
from segmentation_model import CustomerSegmentation
from churn_model import ChurnPredictor
from sales_forecast_model import SalesForecaster
from return_model import ReturnPredictor

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

print("✓ All modules imported successfully!")

## Step 3: Data Preprocessing

In [None]:
# Initialize preprocessor
preprocessor = DataPreprocessor(data_path='')

# Run complete preprocessing pipeline
data = preprocessor.process_all()

print("\n✓ Preprocessing Complete!")
print(f"Transaction data: {data['transaction_data'].shape}")
print(f"Customer master: {data['customer_master'].shape}")
print(f"Sales data: {data['sales_data'].shape}")
print(f"Return data: {data['return_data'].shape}")

In [None]:
# Preview customer master data
data['customer_master'].head()

## Step 4: Model 1 - Customer Segmentation (K-Means)

In [None]:
# Train Customer Segmentation Model
seg_model = CustomerSegmentation(n_clusters=4)
customer_data_with_segments = seg_model.train(data['customer_master'])

# Save the model
seg_model.save_model()

In [None]:
# Visualize segment distribution
plt.figure(figsize=(10, 6))
segment_counts = customer_data_with_segments['segment'].value_counts().sort_index()
plt.bar(segment_counts.index, segment_counts.values, color='skyblue', edgecolor='black')
plt.xlabel('Segment', fontsize=12)
plt.ylabel('Number of Customers', fontsize=12)
plt.title('Customer Segmentation Distribution', fontsize=14, fontweight='bold')
plt.xticks(segment_counts.index)
plt.grid(axis='y', alpha=0.3)
plt.show()

## Step 5: Model 2 - Churn Prediction (XGBoost)

In [None]:
# Train Churn Prediction Model
churn_model = ChurnPredictor()
churn_model.train(data['customer_master'])

# Save the model
churn_model.save_model()

## Step 6: Model 3 - Sales Forecasting (Prophet)

In [None]:
# Train Sales Forecasting Model
sales_model = SalesForecaster()
sales_model.train(data['sales_data'])

# Save the model
sales_model.save_model()

In [None]:
# Generate 90-day forecast
forecast = sales_model.predict(periods=90)

# Plot forecast
fig = sales_model.model.plot(forecast)
plt.title('Sales Forecast for Next 90 Days')
plt.xlabel('Date')
plt.ylabel('Sales Value')
plt.show()

## Step 7: Model 4 - Product Return Prediction (Random Forest)

In [None]:
# Train Product Return Prediction Model
return_model = ReturnPredictor()
return_model.train(data['return_data'])

# Save the model
return_model.save_model()

## Step 8: Create Prediction CSV Templates

In [None]:
# Create Customer Predictions CSV
customer_template = data['customer_master'][[
    'customer_unique_id', 'recency', 'frequency', 'monetary',
    'avg_review_score', 'has_left_bad_review', 'avg_days_between_purchases',
    'avg_delivery_time', 'avg_delivery_lateness', 'avg_approval_hours',
    'number_of_low_reviews', 'std_dev_days_between_purchases',
    'frequency_last_90_days', 'monetary_last_90_days', 'freq_ratio_90d_alltime'
]].head(100).copy()

customer_template['predicted_segment'] = ''
customer_template['predicted_churn'] = ''
customer_template['churn_probability'] = ''

customer_template.to_csv('Predictions_Customer.csv', index=False)
print("✓ Created Predictions_Customer.csv")

In [None]:
# Create Product Predictions CSV
product_template = data['return_data'][[
    'price', 'freight_value', 'product_category_name',
    'product_name_lenght', 'product_description_lenght', 'product_photos_qty',
    'product_weight_g', 'product_length_cm', 'product_height_cm', 'product_width_cm'
]].head(100).copy()

product_template['predicted_return'] = ''
product_template['return_probability'] = ''

product_template.to_csv('Predictions_Product.csv', index=False)
print("✓ Created Predictions_Product.csv")

## Step 9: Make Predictions on Test Data

In [None]:
# Load models and make predictions on customer data
seg_model_loaded = CustomerSegmentation()
seg_model_loaded.load_model('models/segmentation_model.pkl')

# Read predictions CSV
customer_pred_df = pd.read_csv('Predictions_Customer.csv')

# Make predictions
segments = seg_model_loaded.predict(customer_pred_df)
customer_pred_df['predicted_segment'] = segments

print("✓ Customer segmentation predictions complete!")
print(f"Segment distribution: {pd.Series(segments).value_counts().to_dict()}")

In [None]:
# Churn predictions
churn_model_loaded = ChurnPredictor()
churn_model_loaded.load_model('models/churn_model.pkl')

churn_pred, churn_proba = churn_model_loaded.predict(customer_pred_df)
customer_pred_df['predicted_churn'] = churn_pred
customer_pred_df['churn_probability'] = churn_proba

print("✓ Churn predictions complete!")
print(f"Churn distribution: {pd.Series(churn_pred).value_counts().to_dict()}")

In [None]:
# Save customer predictions
customer_pred_df.to_csv('Predictions_Customer.csv', index=False)
print("✓ Customer predictions saved to Predictions_Customer.csv")

# Display sample predictions
customer_pred_df[['customer_unique_id', 'predicted_segment', 'predicted_churn', 'churn_probability']].head(10)

In [None]:
# Product return predictions
return_model_loaded = ReturnPredictor()
return_model_loaded.load_model('models/return_model.pkl')

product_pred_df = pd.read_csv('Predictions_Product.csv')
return_pred, return_proba = return_model_loaded.predict(product_pred_df)

product_pred_df['predicted_return'] = return_pred
product_pred_df['return_probability'] = return_proba

# Save predictions
product_pred_df.to_csv('Predictions_Product.csv', index=False)
print("✓ Product predictions saved to Predictions_Product.csv")

# Display sample predictions
product_pred_df[['product_category_name', 'price', 'predicted_return', 'return_probability']].head(10)

In [None]:
# Sales forecast
sales_model_loaded = SalesForecaster()
sales_model_loaded.load_model('models/sales_forecast_model.pkl')

sales_forecast = sales_model_loaded.forecast_to_csv('Predictions_Sales.csv', periods=90)
print("✓ Sales forecast saved to Predictions_Sales.csv")

# Display forecast summary
sales_forecast.head(10)

## Step 10: Summary and Visualizations

In [None]:
# Summary Statistics
print("="*60)
print("BI DASHBOARD - PREDICTION SUMMARY")
print("="*60)
print(f"\n📊 Customer Analysis:")
print(f"  Total Customers: {len(customer_pred_df)}")
print(f"  Segments: {customer_pred_df['predicted_segment'].value_counts().to_dict()}")
print(f"  Churn Rate: {(customer_pred_df['predicted_churn'].sum() / len(customer_pred_df) * 100):.2f}%")
print(f"  Avg Churn Probability: {customer_pred_df['churn_probability'].mean():.2%}")

print(f"\n📦 Product Analysis:")
print(f"  Total Products: {len(product_pred_df)}")
print(f"  Return Rate: {(product_pred_df['predicted_return'].sum() / len(product_pred_df) * 100):.2f}%")
print(f"  Avg Return Probability: {product_pred_df['return_probability'].mean():.2%}")

print(f"\n💰 Sales Forecast:")
print(f"  Forecast Period: 90 days")
print(f"  Total Predicted Sales: ${sales_forecast['predicted_sales'].sum():,.2f}")
print(f"  Avg Daily Sales: ${sales_forecast['predicted_sales'].mean():,.2f}")

print("\n✅ All predictions completed and saved!")
print("="*60)

In [None]:
# Visualize churn probability distribution
plt.figure(figsize=(12, 5))

plt.subplot(1, 2, 1)
plt.hist(customer_pred_df['churn_probability'], bins=30, color='coral', edgecolor='black', alpha=0.7)
plt.xlabel('Churn Probability')
plt.ylabel('Number of Customers')
plt.title('Distribution of Churn Probability')
plt.grid(axis='y', alpha=0.3)

plt.subplot(1, 2, 2)
plt.hist(product_pred_df['return_probability'], bins=30, color='lightgreen', edgecolor='black', alpha=0.7)
plt.xlabel('Return Probability')
plt.ylabel('Number of Products')
plt.title('Distribution of Return Probability')
plt.grid(axis='y', alpha=0.3)

plt.tight_layout()
plt.show()