<a href="https://colab.research.google.com/github/Helian1505/Avocado_Sales_Optimization/blob/main/Avocado_hass.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from datetime import timedelta, date

# ==========================================
# PHASE 1: DATA GENERATION & ASSUMPTIONS
# ==========================================
print("Generating synthetic data based on Colombian market parameters...")

DAILY_UNITS_BASE = 15000         # Units per day (Large retail chain)
AVERAGE_PRICE_UNIT = 1000        # COP per unit (Hass Avocado)
ESTIMATED_REJECTION_RATE = 0.15  # 15% loss due to poor quality/immaturity
LOW_MATURITY_THRESHOLD = 2.5     # Critical maturity level (1-5 scale)

days = 365
start_date = date(2025, 1, 1)
dates = [start_date + timedelta(days=i) for i in range(days)]

np.random.seed(42) # For reproducibility
df_avocado = pd.DataFrame({
    'Date': dates,
    'Base_Demand': np.random.randint(DAILY_UNITS_BASE * 0.9, DAILY_UNITS_BASE * 1.1, days),
    'Unit_Price_COP': AVERAGE_PRICE_UNIT,
    'Gondola_Avg_Maturity': np.random.normal(3.2, 0.7, days).clip(1, 5).round(1)
})

# Current Scenario Logic
df_avocado['Maturity_Impact_Ratio'] = np.where(df_avocado['Gondola_Avg_Maturity'] < LOW_MATURITY_THRESHOLD,
                                            ESTIMATED_REJECTION_RATE, 0)
df_avocado['Units_Lost'] = df_avocado['Base_Demand'] * df_avocado['Maturity_Impact_Ratio']
df_avocado['Actual_Revenue_Current'] = (df_avocado['Base_Demand'] - df_avocado['Units_Lost']) * df_avocado['Unit_Price_COP']

# ==========================================
# PHASE 2: SOLUTION MODELING (THE MACHINE)
# ==========================================
MACHINE_COST_COP = 150_000_000
DAILY_OPERATIONAL_COST = 150_000
OPTIMIZED_REJECTION_RATE = 0.03 # Machine reduces rejection from 15% to 3%

df_avocado['Optimized_Impact_Ratio'] = np.where(df_avocado['Maturity_Impact_Ratio'] > 0,
                                               OPTIMIZED_REJECTION_RATE, 0)
df_avocado['Units_Lost_Optimized'] = df_avocado['Base_Demand'] * df_avocado['Optimized_Impact_Ratio']
df_avocado['Actual_Revenue_Optimized'] = (df_avocado['Base_Demand'] - df_avocado['Units_Lost_Optimized']) * df_avocado['Unit_Price_COP']

# Financial Metrics
total_recovered_revenue = (df_avocado['Units_Lost'].sum() - df_avocado['Units_Lost_Optimized'].sum()) * AVERAGE_PRICE_UNIT
annual_net_benefit = total_recovered_revenue - (DAILY_OPERATIONAL_COST * 365)
annual_roi = (annual_net_benefit / MACHINE_COST_COP) * 100
payback_months = MACHINE_COST_COP / (annual_net_benefit / 12)

# ==========================================
# PHASE 3: VISUALIZATIONS & SAVING IMAGES
# ==========================================
sns.set_style("whitegrid")

# 1. Root Cause Analysis (Scatter Plot)
plt.figure(figsize=(10, 6))
sns.scatterplot(x='Gondola_Avg_Maturity', y='Units_Lost', data=df_avocado, alpha=0.6)
plt.axvline(LOW_MATURITY_THRESHOLD, color='red', linestyle='--', label='Critical Maturity Threshold (2.5)')
plt.title('Impact of Avocado Immaturity on Sales Loss', fontsize=14)
plt.xlabel('Average Shelf Maturity (1-5 Scale)')
plt.ylabel('Units Lost (Customer Rejection)')
plt.legend()
plt.savefig('maturity_vs_loss.png', dpi=300, bbox_inches='tight')
print("Image 1 saved: maturity_vs_loss.png")

# 2. Business Impact (Comparison Bar Chart)
current_total = df_avocado['Actual_Revenue_Current'].sum()
opt_total = df_avocado['Actual_Revenue_Optimized'].sum()

plt.figure(figsize=(8, 6))
ax = sns.barplot(x=['Current State', 'With Tech Solution'], y=[current_total, opt_total], palette=['#e74c3c', '#2ecc71'])
plt.title('Annual Revenue Comparison (COP)', fontsize=14)
ax.get_yaxis().set_major_formatter(plt.FuncFormatter(lambda x, loc: f'${x/1e6:,.0f}M'))
plt.ylabel('Total Annual Revenue (Millions)')
plt.savefig('revenue_comparison.png', dpi=300, bbox_inches='tight')
print("Image 2 saved: revenue_comparison.png")

print(f"\nFINANCIAL SUMMARY:")
print(f"Annual Net Benefit: ${annual_net_benefit:,.0f} COP")
print(f"Annual ROI: {annual_roi:.2f}%")
print(f"Payback Period: {payback_months:.1f} months")