# Data Exploration - Transportation Problem
## PT. MediCare Indonesia

        This notebook explores the transportation problem data including:
        - Warehouse capacities
        - Destination demands
        - Transportation costs
        - Data validation

In [None]:
# Import libraries
        import sys
        sys.path.append('../src')

from model_formulation import TransportationData, ModelFormulation
import pandas as pd
        import numpy as np
        import matplotlib.pyplot as plt
        import seaborn as sns

# Set display options
pd.set_option('display.max_columns', None)
pd.set_option('display.width', None)
pd.set_option('display.max_colwidth', None)

# Set plot style
plt.style.use('seaborn-v0_8-darkgrid')
sns.set_palette("husl")

print("✓ Libraries imported successfully!")

In [None]:
# Create data instance
data = TransportationData()

# Print problem summary
data.print_problem_summary()

In [None]:
# Get supply dataframe
df_supply = data.get_supply_dataframe()

print("="*50)
print("WAREHOUSE CAPACITY ANALYSIS")
print("="*50)
print(df_supply)

# Calculate statistics
        print(f"\nTotal Supply: {df_supply['Capacity'].sum()} units")
print(f"Average Capacity: {df_supply['Capacity'].mean():.2f} units")
print(f"Max Capacity: {df_supply['Capacity'].max()} units ({df_supply.loc[df_supply['Capacity'].idxmax(), 'Warehouse']})")
print(f"Min Capacity: {df_supply['Capacity'].min()} units ({df_supply.loc[df_supply['Capacity'].idxmin(), 'Warehouse']})")
print(f"Std Deviation: {df_supply['Capacity'].std():.2f} units")

In [None]:
# Visualize supply capacity
fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(14, 5))

# Bar chart
        colors = ['#2E86AB', '#A23B72', '#06A77D', '#F18F01']
ax1.bar(df_supply['Warehouse'], df_supply['Capacity'], color=colors, alpha=0.8)
ax1.set_ylabel('Capacity (units)', fontsize=11, fontweight='bold')
ax1.set_title('Warehouse Capacity', fontsize=13, fontweight='bold')
ax1.grid(axis='y', alpha=0.3)

for i, (w, c) in enumerate(zip(df_supply['Warehouse'], df_supply['Capacity'])):
ax1.text(i, c + 10, str(c), ha='center', va='bottom', fontweight='bold')

# Pie chart
        ax2.pie(df_supply['Capacity'], labels=df_supply['Warehouse'], autopct='%1.1f%%',
            colors=colors, startangle=90)
ax2.set_title('Capacity Distribution', fontsize=13, fontweight='bold')

plt.tight_layout()
plt.show()

In [None]:
# Get demand dataframe
df_demand = data.get_demand_dataframe()

print("="*50)
print("DESTINATION DEMAND ANALYSIS")
print("="*50)
print(df_demand)

# Calculate statistics
        print(f"\nTotal Demand: {df_demand['Demand'].sum()} units")
print(f"Average Demand: {df_demand['Demand'].mean():.2f} units")
print(f"Max Demand: {df_demand['Demand'].max()} units ({df_demand.loc[df_demand['Demand'].idxmax(), 'Destination']})")
print(f"Min Demand: {df_demand['Demand'].min()} units ({df_demand.loc[df_demand['Demand'].idxmin(), 'Destination']})")
print(f"Std Deviation: {df_demand['Demand'].std():.2f} units")

In [None]:
# Visualize demand
        fig, ax = plt.subplots(figsize=(12, 6))

colors = ['#2E86AB', '#A23B72', '#06A77D', '#F18F01', '#C73E1D']
bars = ax.barh(df_demand['Destination'], df_demand['Demand'], color=colors, alpha=0.8)

ax.set_xlabel('Demand (units)', fontsize=11, fontweight='bold')
ax.set_title('Destination Demand', fontsize=13, fontweight='bold')
ax.grid(axis='x', alpha=0.3)

for bar, demand in zip(bars, df_demand['Demand']):
width = bar.get_width()
ax.text(width + 5, bar.get_y() + bar.get_height()/2, str(int(demand)),
    ha='left', va='center', fontweight='bold')

plt.tight_layout()
plt.show()

In [None]:
# Get cost matrix
df_costs = data.get_cost_matrix()

print("="*70)
print("TRANSPORTATION COST MATRIX (Rp thousands per unit)")
print("="*70)
print(df_costs)

# Calculate statistics
        print(f"\nCost Statistics:")
print(f"  Mean Cost: Rp {df_costs.values.flatten().mean():.2f}k per unit")
print(f"  Median Cost: Rp {np.median(df_costs.values.flatten()):.2f}k per unit")
print(f"  Min Cost: Rp {df_costs.values.flatten().min():.0f}k per unit")
print(f"  Max Cost: Rp {df_costs.values.flatten().max():.0f}k per unit")
print(f"  Std Deviation: Rp {df_costs.values.flatten().std():.2f}k")

In [None]:
# Create heatmap of costs
        fig, ax = plt.subplots(figsize=(12, 8))

im = sns.heatmap(df_costs, annot=True, fmt='.0f', cmap='RdYlGn_r',
    cbar_kws={'label': 'Cost (Rp thousands)'}, ax=ax)

ax.set_title('Transportation Cost Heatmap', fontsize=14, fontweight='bold', pad=15)
ax.set_xlabel('Destination', fontsize=11, fontweight='bold')
ax.set_ylabel('Warehouse', fontsize=11, fontweight='bold')

plt.tight_layout()
plt.show()

In [None]:
# Find cheapest and most expensive routes
        routes = []
for warehouse in data.warehouses:
for destination in data.destinations:
cost = data.costs[(warehouse, destination)]
routes.append({
    'From': warehouse,
    'To': destination.replace('_', ' '),
    'Cost (Rp ribu)': cost
})

df_routes = pd.DataFrame(routes)

print("="*70)
print("TOP 5 CHEAPEST ROUTES")
print("="*70)
print(df_routes.nsmallest(5, 'Cost (Rp ribu)').to_string(index=False))

print("\n" + "="*70)
print("TOP 5 MOST EXPENSIVE ROUTES")
print("="*70)
print(df_routes.nlargest(5, 'Cost (Rp ribu)').to_string(index=False))

In [None]:
# Check if problem is balanced
total_supply = data.get_total_supply()
total_demand = data.get_total_demand()

print("="*70)
print("SUPPLY-DEMAND BALANCE ANALYSIS")
print("="*70)
print(f"Total Supply: {total_supply} units")
print(f"Total Demand: {total_demand} units")
print(f"Difference: {total_supply - total_demand} units")

if data.is_balanced():
print("\n✓ Problem is BALANCED")
else:
if total_supply > total_demand:
print(f"\n⚠️  SURPLUS: {total_supply - total_demand} units")
print("   Solution: Add dummy destination with zero cost")
else:
print(f"\n⚠️  SHORTAGE: {total_demand - total_supply} units")
print("   Solution: Add dummy warehouse or reduce demand")

# Visualize balance
        fig, ax = plt.subplots(figsize=(8, 6))

categories = ['Supply', 'Demand']
values = [total_supply, total_demand]
colors = ['#2E86AB', '#A23B72']

bars = ax.bar(categories, values, color=colors, alpha=0.8, width=0.5)
ax.set_ylabel('Units', fontsize=11, fontweight='bold')
ax.set_title('Supply vs Demand', fontsize=13, fontweight='bold')
ax.grid(axis='y', alpha=0.3)

for bar, value in zip(bars, values):
height = bar.get_height()
ax.text(bar.get_x() + bar.get_width()/2., height + 20,
    str(value), ha='center', va='bottom', fontweight='bold', fontsize=12)

plt.tight_layout()
plt.show()

In [None]:
# Validate data
        is_valid, errors = data.validate_data()

print("="*70)
print("DATA VALIDATION")
print("="*70)

if is_valid:
print("✓ All validation checks PASSED")
print("\nThe data is ready for optimization!")
else:
print("✗ Validation FAILED")
print("\nErrors found:")
for error in errors:
print(f"  - {error}")

In [None]:
# Export data to Excel
        data.export_to_excel('../data/input_data.xlsx')

print("✓ Data exported successfully!")
print("\nYou can now proceed to the optimization step.")

In [None]:
# Print mathematical formulation
formulation = ModelFormulation(data)
formulation.print_mathematical_formulation()

## Summary

In this notebook, we have:
1. ✅ Loaded and explored the transportation problem data
2. ✅ Analyzed warehouse capacities and destination demands
3. ✅ Examined the transportation cost matrix
4. ✅ Identified cheapest and most expensive routes
5. ✅ Checked supply-demand balance
6. ✅ Validated data integrity
7. ✅ Exported data to Excel
8. ✅ Reviewed mathematical formulation

### Key Findings:
- Total Supply: 1,300 units
- Total Demand: 1,250 units
- Problem Status: **UNBALANCED** (50 units surplus)
- Cost Range: Rp 4k - Rp 25k per unit
- Cheapest Route: Tangerang → RS Tangerang (Rp 4k)
- Most Expensive: Tangerang → RS Bogor (Rp 25k)

### Next Steps:
1. Proceed to `02_manual_solution.ipynb` for VAM method
2. Then `03_excel_solver_solution.ipynb` for Excel Solver
3. Finally `04_python_optimization.ipynb` for automated optimization

---
**Note**: All data has been validated and is ready for optimization!