In [None]:
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
import joblib
import os
import sys
from sklearn.cluster import KMeans
from sklearn.metrics import silhouette_score
from mpl_toolkits.mplot3d import Axes3D

# =================================================================
# SECTION 1: SETUP & ECONOMETRIC DEFLATION
# =================================================================

# 1.1 Path Setup
CURRENT_DIR = os.getcwd()
PROJECT_ROOT = os.path.abspath(os.path.join(CURRENT_DIR, '..'))
if PROJECT_ROOT not in sys.path:
    sys.path.append(PROJECT_ROOT)

# 1.2 Import Logic from src
from src.feature_engineering import (
    add_time_features, calculate_rfm_metrics, 
    prepare_rfm_for_clustering, perform_abc_analysis
)

# 1.3 Data Loading & Initial Prep
PROCESSED_PATH = r"C:\Users\Jesus Sanchez\Desktop\ALEXIS\1. Pre-Trabajo\1. Supply Chain Intelligence\data\processed\cleaned_retail_data.csv"
df_raw = pd.read_csv(PROCESSED_PATH, parse_dates=['InvoiceDate'])

if 'TotalSum' not in df_raw.columns:
    df_raw['TotalSum'] = df_raw['Quantity'] * df_raw['Price']

df_raw['Year'] = df_raw['InvoiceDate'].dt.year

# 1.4 Inflation Adjustment (Deflating 2011 to 2010 Constant Prices)
INFLATION_2011 = 0.039  # 3.9%
df_2010 = df_raw[df_raw['Year'] == 2010].copy()
df_2011 = df_raw[df_raw['Year'] == 2011].copy()

print(f"ğŸ“‰ Deflating 2011 values (Inflation: {INFLATION_2011*100}%)...")
df_2011['TotalSum'] = df_2011['TotalSum'] / (1 + INFLATION_2011)
df_2011['Price'] = df_2011['Price'] / (1 + INFLATION_2011)

# Apply Time Features
df_2010 = add_time_features(df_2010)
df_2011 = add_time_features(df_2011)

# =================================================================
# SECTION 2: LOGISTICS WORKLOAD (BAR CHARTS)
# =================================================================

print("ğŸ“Š Generating Comparative Logistics Bar Charts...")
DAYS_ORDER = ['Monday', 'Tuesday', 'Wednesday', 'Thursday', 'Friday', 'Saturday', 'Sunday']
fig, axes = plt.subplots(2, 2, figsize=(20, 10))

for i, (year_df, yr, color) in enumerate([(df_2010, 2010, 'Blues_d'), (df_2011, 2011, 'Oranges_d')]):
    sns.countplot(data=year_df, x='DayName', order=DAYS_ORDER, palette=color, ax=axes[0, i])
    axes[0, i].set_title(f'Logistics Volume by Day: {yr}', fontweight='bold')
    sns.countplot(data=year_df, x='Hour', palette=color, ax=axes[1, i])
    axes[1, i].set_title(f'Hourly Operational Peaks: {yr}', fontweight='bold')

plt.tight_layout()
plt.show()

# =================================================================
# SECTION 3: RFM & AI CLUSTERING (K=4)
# =================================================================

def process_year_intelligence(df_year):
    rfm_year = calculate_rfm_metrics(df_year)
    scaled_data, rfm_scaler = prepare_rfm_for_clustering(rfm_year)
    
    kmeans = KMeans(n_clusters=4, init='k-means++', random_state=42, n_init=10)
    rfm_year['Cluster_ID'] = kmeans.fit_predict(scaled_data)
    
    # Manual Labeling
    rfm_year['R_Score'] = pd.qcut(rfm_year['Recency'], 5, labels=[5, 4, 3, 2, 1]).astype(int)
    rfm_year['F_Score'] = pd.qcut(rfm_year['Frequency'].rank(method='first'), 5, labels=[1, 2, 3, 4, 5]).astype(int)
    rfm_year['M_Score'] = pd.qcut(rfm_year['Monetary'], 5, labels=[1, 2, 3, 4, 5]).astype(int)
    
    def assign_segment(row):
        r, f, m = row['R_Score'], row['F_Score'], row['M_Score']
        if r >= 4 and f >= 4 and m >= 4: return 'Champions (VIP)'
        elif m >= 4 and r >= 3: return 'Big Spenders'
        elif r <= 2: return 'At Risk / Hibernating'
        else: return 'Core Customers'
    
    rfm_year['Segment'] = rfm_year.apply(assign_segment, axis=1)
    return rfm_year, scaled_data, rfm_scaler, kmeans

rfm_2010, scaled_2010, _, _ = process_year_intelligence(df_2010)
rfm_2011, scaled_2011, scaler_2011, model_2011 = process_year_intelligence(df_2011)

# 3.1 Side-by-Side 3D Visuals
fig = plt.figure(figsize=(20, 8))
for i, (scaled, rfm_y, yr) in enumerate([(scaled_2010, rfm_2010, 2010), (scaled_2011, rfm_2011, 2011)]):
    ax = fig.add_subplot(1, 2, i+1, projection='3d')
    ax.scatter(scaled['Recency'], scaled['Frequency'], scaled['Monetary'], 
               c=rfm_y['Cluster_ID'], cmap='viridis', s=40, alpha=0.6)
    ax.set_title(f'3D Cluster Structure: {yr} (Constant Prices)', fontsize=14, fontweight='bold')
plt.show()

# =================================================================
# SECTION 4: IMPACT SUMMARY (CONSTANT Â£)
# =================================================================

def run_economic_impact(df_year, rfm_year):
    abc_res = perform_abc_analysis(df_year)
    sku_map = abc_res.set_index('Description')['ABC_Class'].to_dict()
    df_year['ABC_Class'] = df_year['Description'].map(sku_map)
    
    wealth = rfm_year.groupby('Segment')['Monetary'].sum().reset_index()
    wealth['Revenue_%'] = (wealth['Monetary'] / wealth['Monetary'].sum()) * 100
    wealth = wealth.sort_values(by='Revenue_%', ascending=False)
    return wealth, df_year

wealth_2010, df_2010 = run_economic_impact(df_2010, rfm_2010)
wealth_2011, df_2011 = run_economic_impact(df_2011, rfm_2011)

print("\nğŸ’° --- IMPACT SUMMARY 2010 ---")
display(wealth_2010.style.format({'Monetary': 'Â£{:,.2f}', 'Revenue_%': '{:.1f}%'}))
print("\nğŸ’° --- IMPACT SUMMARY 2011 (Constant 2010 Prices) ---")
display(wealth_2011.style.format({'Monetary': 'Â£{:,.2f}', 'Revenue_%': '{:.1f}%'}))

# =================================================================
# SECTION 5: ROOT CAUSE & PERSISTENCE
# =================================================================

df_en_2010 = df_2010.merge(rfm_2010[['Segment']], left_on='Customer ID', right_index=True, how='left')
df_en_2011 = df_2011.merge(rfm_2011[['Segment']], left_on='Customer ID', right_index=True, how='left')

fig, axes = plt.subplots(1, 2, figsize=(22, 7), sharey=True)
order = ['Champions (VIP)', 'Big Spenders', 'Core Customers', 'At Risk / Hibernating']

for i, (df_en, yr) in enumerate([(df_en_2010, 2010), (df_en_2011, 2011)]):
    pd.crosstab(df_en['Segment'], df_en['ABC_Class'], normalize='index').reindex(order).plot(
        kind='bar', stacked=True, color=['#27AE60', '#F1C40F', '#E74C3C'], ax=axes[i], edgecolor='black'
    )
    axes[i].set_title(f'Inventory Dependency Root Cause: {yr}', fontweight='bold')
plt.show()

# Persistence
models_path = '../models'
if not os.path.exists(models_path): os.makedirs(models_path)
joblib.dump(model_2011, os.path.join(models_path, 'rfm_kmeans_2011.pkl'))
joblib.dump(scaler_2011, os.path.join(models_path, 'rfm_scaler_2011.pkl'))

print(f"âœ… Side-by-Side Analysis Complete. Values adjusted for inflation.")