# Packaging Recommendation System

This notebook performs:
1. Data Loading & Cleaning
2. Advanced Material Mapping
3. Model Training (Sustainability Predictor)
4. Recommendation Engine

In [26]:
# Import required libraries
# Note: Install required packages if needed:
# pip install pandas numpy scikit-learn matplotlib seaborn fuzzywuzzy python-Levenshtein xgboost

import pandas as pd
import numpy as np
from sklearn.ensemble import RandomForestRegressor
from sklearn.model_selection import train_test_split, cross_val_score
from sklearn.preprocessing import OneHotEncoder
from sklearn.metrics import mean_squared_error, mean_absolute_error
import matplotlib.pyplot as plt
import seaborn as sns
import xgboost as xgb

# Try importing fuzzywuzzy, fallback to thefuzz if needed
try:
    from fuzzywuzzy import fuzz, process
except ImportError:
    try:
        from thefuzz import fuzz, process
    except ImportError:
        print("Warning: fuzzywuzzy or thefuzz not found. Install with: pip install fuzzywuzzy python-Levenshtein")
        # Fallback: simple substring matching only
        def process_extractOne(text, choices, scorer=None):
            # Simple fallback - just return first match or None
            for choice in choices:
                if text.lower() in choice.lower() or choice.lower() in text.lower():
                    return (choice, 100)
            return None
        process = type('obj', (object,), {'extractOne': process_extractOne})
        fuzz = type('obj', (object,), {'token_sort_ratio': lambda x, y: 0})

import warnings
warnings.filterwarnings('ignore')

# Set style for plots
try:
    plt.style.use('seaborn-v0_8-darkgrid')
except:
    plt.style.use('seaborn')
sns.set_palette("husl")



## 1. Data Loading & Cleaning

In [27]:
# Load both CSV files
history_df = pd.read_csv('real_packaging_history (1).csv')
materials_df = pd.read_csv('materials_database_600 (1).csv')

print("History Data Shape:", history_df.shape)
print("Materials Data Shape:", materials_df.shape)
print("\nHistory Columns:", history_df.columns.tolist())
print("\nMaterials Columns:", materials_df.columns.tolist())
print("\nFirst few rows of history:")
history_df.head()

History Data Shape: (15000, 16)
Materials Data Shape: (600, 8)

History Columns: ['Order_ID', 'Date', 'Item_Name', 'Category', 'Weight_kg', 'Volumetric_Weight_kg', 'L_cm', 'W_cm', 'H_cm', 'Fragility', 'Moisture_Sens', 'Shipping_Mode', 'Distance_km', 'Packaging_Used', 'Cost_USD', 'CO2_Emission_kg']

Materials Columns: ['Material_ID', 'Material_Name', 'Category', 'Density_kg_m3', 'Tensile_Strength_MPa', 'CO2_Emission_kg', 'Cost_per_kg', 'Biodegradable']

First few rows of history:


Unnamed: 0,Order_ID,Date,Item_Name,Category,Weight_kg,Volumetric_Weight_kg,L_cm,W_cm,H_cm,Fragility,Moisture_Sens,Shipping_Mode,Distance_km,Packaging_Used,Cost_USD,CO2_Emission_kg
0,1,2025-05-17,Sneakers,Clothing,0.82,1.41,28,21,12,5,False,Air,1893,Kraft Paper Mailer,1.56,6.673
1,2,2025-09-22,Smartphone,Electronics,0.29,0.0,14,7,0,9,True,Air,2141,Mushroom Pkg (Mycelium),1.92,1.552
2,3,2025-11-12,Office Chair,Furniture,12.26,38.06,60,61,52,6,False,Road,1491,Wood Crate,16.42,28.374
3,4,2025-01-30,Office Chair,Furniture,11.56,38.27,65,64,46,5,False,Road,530,Wood Crate,16.31,10.142
4,5,2025-09-06,T-Shirt,Clothing,0.25,0.08,22,18,1,1,False,Air,1587,Kraft Paper Mailer,0.3,0.992


In [28]:
# Check for missing values
print("Missing values in history data:")
print(history_df.isnull().sum())
print("\nMissing values in materials data:")
print(materials_df.isnull().sum())

Missing values in history data:
Order_ID                0
Date                    0
Item_Name               0
Category                0
Weight_kg               0
Volumetric_Weight_kg    0
L_cm                    0
W_cm                    0
H_cm                    0
Fragility               0
Moisture_Sens           0
Shipping_Mode           0
Distance_km             0
Packaging_Used          0
Cost_USD                0
CO2_Emission_kg         0
dtype: int64

Missing values in materials data:
Material_ID             0
Material_Name           0
Category                0
Density_kg_m3           0
Tensile_Strength_MPa    0
CO2_Emission_kg         0
Cost_per_kg             0
Biodegradable           0
dtype: int64


In [29]:
# Impute missing 'Cost_USD' and 'CO2_Emission_kg' with median of their respective 'Category'
for col in ['Cost_USD', 'CO2_Emission_kg']:
    if history_df[col].isnull().sum() > 0:
        category_medians = history_df.groupby('Category')[col].median()
        history_df[col] = history_df.apply(
            lambda row: category_medians[row['Category']] if pd.isnull(row[col]) else row[col],
            axis=1
        )
        print(f"Imputed {history_df[col].isnull().sum()} missing values in {col}")

print("\nMissing values after imputation:")
print(history_df[['Cost_USD', 'CO2_Emission_kg']].isnull().sum())


Missing values after imputation:
Cost_USD           0
CO2_Emission_kg    0
dtype: int64


In [30]:
# Fix invalid dimensions: If L_cm, W_cm, or H_cm are 0, replace with average for that Category
for dim in ['L_cm', 'W_cm', 'H_cm']:
    invalid_mask = (history_df[dim] == 0) | (history_df[dim].isnull())
    if invalid_mask.sum() > 0:
        category_means = history_df.groupby('Category')[dim].mean()
        history_df[dim] = history_df.apply(
            lambda row: category_means[row['Category']] if (row[dim] == 0 or pd.isnull(row[dim])) else row[dim],
            axis=1
        )
        print(f"Fixed {invalid_mask.sum()} invalid values in {dim}")

print("\nInvalid dimensions (zeros) after fixing:")
print((history_df[['L_cm', 'W_cm', 'H_cm']] == 0).sum())

Fixed 1128 invalid values in H_cm

Invalid dimensions (zeros) after fixing:
L_cm    0
W_cm    0
H_cm    0
dtype: int64


In [31]:
# Calculate Product_Volume_m3 as (L*W*H)/1,000,000
history_df['Product_Volume_m3'] = (history_df['L_cm'] * history_df['W_cm'] * history_df['H_cm']) / 1_000_000

print("Product_Volume_m3 calculated successfully!")
print(f"Volume statistics:\n{history_df['Product_Volume_m3'].describe()}")

Product_Volume_m3 calculated successfully!
Volume statistics:
count    15000.000000
mean         0.133259
std          0.381661
min          0.000016
25%          0.000475
50%          0.005850
75%          0.028910
max          1.822824
Name: Product_Volume_m3, dtype: float64


## 2. Advanced Material Mapping

In [32]:
# Get unique packaging names from history and material names from database
packaging_used = history_df['Packaging_Used'].unique()
material_names = materials_df['Material_Name'].unique()

print(f"Unique packaging types in history: {len(packaging_used)}")
print(f"Unique materials in database: {len(material_names)}")
print("\nSample packaging types:", packaging_used[:10])
print("\nSample material names:", material_names[:10])

Unique packaging types in history: 10
Unique materials in database: 600

Sample packaging types: ['Kraft Paper Mailer' 'Mushroom Pkg (Mycelium)' 'Wood Crate'
 'PLA Bioplastic' 'Honeycomb Paper' 'Recycled PET Box'
 'Bubble Wrap (LDPE)' 'Corrugated Cardboard' 'Styrofoam (EPS)'
 'Cornstarch Foam']

Sample material names: ['Recycled Palm Leaf' 'Waterproof Palm Leaf' 'Single-Ply Kraft Paper'
 'Single-Ply Aluminum Foil' 'Fire-Retardant Nylon Fabric'
 'Commercial-Grade Recycled Denim' 'Food-Grade Polycarbonate'
 'Lightweight PET Plastic' 'Single-Ply Borosilicate Glass'
 'Food-Grade Mushroom Mycelium']


In [33]:
# Create a mapping function using fuzzy matching
def map_packaging_to_material(packaging_name, material_list, threshold=60):
    """
    Maps packaging name to material name using fuzzy matching.
    Also handles common substring matches.
    """
    # Direct substring matching for common cases
    packaging_lower = packaging_name.lower()
    
    # Common mappings
    if 'mushroom' in packaging_lower or 'mycelium' in packaging_lower:
        matches = [m for m in material_list if 'mushroom' in m.lower() or 'mycelium' in m.lower()]
        if matches:
            return matches[0]
    
    if 'wood' in packaging_lower or 'crate' in packaging_lower:
        matches = [m for m in material_list if 'plywood' in m.lower()]
        if matches:
            return matches[0]
    
    if 'kraft' in packaging_lower:
        matches = [m for m in material_list if 'kraft' in m.lower()]
        if matches:
            return matches[0]
    
    if 'pla' in packaging_lower or 'bioplastic' in packaging_lower:
        matches = [m for m in material_list if 'pla' in m.lower() or 'bioplastic' in m.lower()]
        if matches:
            return matches[0]
    
    if 'bubble' in packaging_lower or 'ldpe' in packaging_lower:
        matches = [m for m in material_list if 'bubble' in m.lower() or 'ldpe' in m.lower()]
        if matches:
            return matches[0]
    
    if 'pet' in packaging_lower and 'recycled' in packaging_lower:
        matches = [m for m in material_list if 'pet' in m.lower()]
        if matches:
            return matches[0]
    
    if 'honeycomb' in packaging_lower or 'paper' in packaging_lower:
        matches = [m for m in material_list if 'paper' in m.lower() or 'honeycomb' in m.lower()]
        if matches:
            return matches[0]
    
    # Additional mappings for other packaging types
    if 'corrugated' in packaging_lower or 'cardboard' in packaging_lower:
        matches = [m for m in material_list if 'cardboard' in m.lower() or 'corrugated' in m.lower()]
        if matches:
            return matches[0]
    
    if 'styrofoam' in packaging_lower or 'eps' in packaging_lower:
        matches = [m for m in material_list if 'foam' in m.lower() and 'polyurethane' in m.lower()]
        if matches:
            return matches[0]
    
    if 'cornstarch' in packaging_lower:
        matches = [m for m in material_list if 'cornstarch' in m.lower()]
        if matches:
            return matches[0]
    
    # Fuzzy matching as fallback
    try:
        best_match = process.extractOne(packaging_name, material_list, scorer=fuzz.token_sort_ratio)
        if best_match and best_match[1] >= threshold:
            return best_match[0]
    except:
        # If fuzzy matching fails, try simple substring match
        for material in material_list:
            if packaging_lower in material.lower() or material.lower() in packaging_lower:
                return material
    
    return None

# Create mapping dictionary
packaging_to_material = {}
for packaging in packaging_used:
    mapped = map_packaging_to_material(packaging, material_names)
    packaging_to_material[packaging] = mapped
    if mapped:
        print(f"'{packaging}' -> '{mapped}'")
    else:
        print(f"'{packaging}' -> NOT FOUND")

print(f"\nSuccessfully mapped {sum(1 for v in packaging_to_material.values() if v is not None)}/{len(packaging_to_material)} packaging types")

'Kraft Paper Mailer' -> 'Single-Ply Kraft Paper'
'Mushroom Pkg (Mycelium)' -> 'Food-Grade Mushroom Mycelium'
'Wood Crate' -> 'Lightweight Plywood'
'PLA Bioplastic' -> 'Lightweight PET Plastic'
'Honeycomb Paper' -> 'Single-Ply Kraft Paper'
'Recycled PET Box' -> 'Lightweight PET Plastic'
'Bubble Wrap (LDPE)' -> 'Standard Bubble Wrap (LDPE)'
'Corrugated Cardboard' -> 'Industrial-Grade Corrugated Cardboard'
'Styrofoam (EPS)' -> 'UV-Stabilized Polyurethane Foam'
'Cornstarch Foam' -> 'Double-Wall Cornstarch Foam'

Successfully mapped 10/10 packaging types


In [34]:
# Apply mapping to history dataframe
history_df['Material_Name'] = history_df['Packaging_Used'].map(packaging_to_material)

# Check how many were successfully mapped
print(f"Mapped rows: {history_df['Material_Name'].notna().sum()}/{len(history_df)}")
print(f"Unmapped rows: {history_df['Material_Name'].isna().sum()}")

# Show unmapped packaging types
if history_df['Material_Name'].isna().sum() > 0:
    unmapped = history_df[history_df['Material_Name'].isna()]['Packaging_Used'].unique()
    print(f"\nUnmapped packaging types: {unmapped}")

Mapped rows: 15000/15000
Unmapped rows: 0


In [35]:
# Merge material properties from materials database
# Rename CO2_Emission_kg to Material_CO2_Factor in materials_df for clarity
materials_merge = materials_df[['Material_Name', 'Density_kg_m3', 'Cost_per_kg', 'CO2_Emission_kg']].copy()
materials_merge = materials_merge.rename(columns={'CO2_Emission_kg': 'Material_CO2_Factor'})

# Merge with history dataframe
history_df = history_df.merge(
    materials_merge,
    on='Material_Name',
    how='left',
    suffixes=('', '_material')
)

# Rename Density column for clarity
history_df = history_df.rename(columns={'Density_kg_m3': 'Material_Density'})

print("Merged material properties:")
print(f"Rows with material properties: {history_df['Material_Density'].notna().sum()}/{len(history_df)}")
print("\nSample merged data:")
history_df[['Packaging_Used', 'Material_Name', 'Material_Density', 'Material_CO2_Factor', 'Cost_per_kg']].head(10)

Merged material properties:
Rows with material properties: 15000/15000

Sample merged data:


Unnamed: 0,Packaging_Used,Material_Name,Material_Density,Material_CO2_Factor,Cost_per_kg
0,Kraft Paper Mailer,Single-Ply Kraft Paper,742,0.742,0.91
1,Mushroom Pkg (Mycelium),Food-Grade Mushroom Mycelium,146,0.486,5.1
2,Wood Crate,Lightweight Plywood,515,0.535,2.23
3,Wood Crate,Lightweight Plywood,515,0.535,2.23
4,Kraft Paper Mailer,Single-Ply Kraft Paper,742,0.742,0.91
5,Wood Crate,Lightweight Plywood,515,0.535,2.23
6,Kraft Paper Mailer,Single-Ply Kraft Paper,742,0.742,0.91
7,Mushroom Pkg (Mycelium),Food-Grade Mushroom Mycelium,146,0.486,5.1
8,Mushroom Pkg (Mycelium),Food-Grade Mushroom Mycelium,146,0.486,5.1
9,Mushroom Pkg (Mycelium),Food-Grade Mushroom Mycelium,146,0.486,5.1


In [36]:
# Remove rows where material mapping failed (no material properties)
initial_rows = len(history_df)
history_df = history_df[history_df['Material_Density'].notna()].copy()
removed_rows = initial_rows - len(history_df)

print(f"Removed {removed_rows} rows without material mapping")
print(f"Final dataset size: {len(history_df)} rows")

Removed 0 rows without material mapping
Final dataset size: 15000 rows


## 3. Model Training (Sustainability Predictor)

In [37]:
# Prepare features and target
# Features: Weight_kg, Distance_km, Shipping_Mode (One-Hot Encoded), 
#           Material_CO2_Factor, Material_Density
# Target: CO2_Emission_kg

# Select features
feature_cols = ['Weight_kg', 'Distance_km', 'Shipping_Mode', 
                'Material_CO2_Factor', 'Material_Density']

# Create feature dataframe
X = history_df[feature_cols].copy()
y = history_df['CO2_Emission_kg'].copy()

print(f"Feature shape: {X.shape}")
print(f"Target shape: {y.shape}")
print(f"\nMissing values in features:")
print(X.isnull().sum())

Feature shape: (15000, 5)
Target shape: (15000,)

Missing values in features:
Weight_kg              0
Distance_km            0
Shipping_Mode          0
Material_CO2_Factor    0
Material_Density       0
dtype: int64


In [38]:
# One-Hot Encode Shipping_Mode
encoder = OneHotEncoder(sparse_output=False, drop='first')
shipping_encoded = encoder.fit_transform(X[['Shipping_Mode']])
shipping_feature_names = encoder.get_feature_names_out(['Shipping_Mode'])

# Create encoded dataframe
X_encoded = pd.DataFrame(
    shipping_encoded,
    columns=shipping_feature_names,
    index=X.index
)

# Add other numerical features
X_encoded['Weight_kg'] = X['Weight_kg']
X_encoded['Distance_km'] = X['Distance_km']
X_encoded['Material_CO2_Factor'] = X['Material_CO2_Factor']
X_encoded['Material_Density'] = X['Material_Density']

print("Features after encoding:")
print(X_encoded.columns.tolist())
print(f"\nFeature shape: {X_encoded.shape}")
X_encoded.head()

Features after encoding:
['Shipping_Mode_Road', 'Weight_kg', 'Distance_km', 'Material_CO2_Factor', 'Material_Density']

Feature shape: (15000, 5)


Unnamed: 0,Shipping_Mode_Road,Weight_kg,Distance_km,Material_CO2_Factor,Material_Density
0,0.0,0.82,1893,0.742,742
1,0.0,0.29,2141,0.486,146
2,1.0,12.26,1491,0.535,515
3,1.0,11.56,530,0.535,515
4,0.0,0.25,1587,0.742,742


In [39]:
# Split data into train (80%) and test (20%)
X_train, X_test, y_train, y_test = train_test_split(
    X_encoded, y, test_size=0.2, random_state=42
)

print(f"Training set: {X_train.shape[0]} samples")
print(f"Test set: {X_test.shape[0]} samples")

Training set: 12000 samples
Test set: 3000 samples


In [40]:
# Train XGBoost for CO2 prediction
xgb_co2_model = xgb.XGBRegressor(
    n_estimators=100,
    max_depth=10,
    learning_rate=0.1,
    subsample=0.8,
    colsample_bytree=0.8,
    random_state=42,
    n_jobs=-1
)

print("Training XGBoost for CO2 Prediction...")
xgb_co2_model.fit(X_train, y_train)
print("XGBoost CO2 Prediction Training completed!")

Training XGBoost for CO2 Prediction...
XGBoost CO2 Prediction Training completed!


In [41]:
# Predict on test set and evaluate XGBoost CO2 model
y_pred_xgb = xgb_co2_model.predict(X_test)
rmse_xgb = np.sqrt(mean_squared_error(y_test, y_pred_xgb))
mae_xgb = mean_absolute_error(y_test, y_pred_xgb)

print(f"XGBoost CO2 Prediction RMSE on Test Set: {rmse_xgb:.4f}")
print(f"XGBoost CO2 Prediction MAE on Test Set: {mae_xgb:.4f}")
print(f"Mean CO2 Emission: {y_test.mean():.4f}")
print(f"RMSE as % of mean: {(rmse_xgb/y_test.mean())*100:.2f}%")
print(f"MAE as % of mean: {(mae_xgb/y_test.mean())*100:.2f}%")

# Compare with previous Random Forest (if available)
print(f"\nPerformance Comparison:")
print(f"XGBoost RMSE: {rmse_xgb:.4f} vs Previous RF RMSE: 7.8353")
print(f"XGBoost MAE: {mae_xgb:.4f}")
print(f"Improvement: {((7.8353 - rmse_xgb) / 7.8353) * 100:.2f}% reduction in RMSE")

XGBoost CO2 Prediction RMSE on Test Set: 10.6824
XGBoost CO2 Prediction MAE on Test Set: 2.8192
Mean CO2 Emission: 24.0177
RMSE as % of mean: 44.48%
MAE as % of mean: 11.74%

Performance Comparison:
XGBoost RMSE: 10.6824 vs Previous RF RMSE: 7.8353
XGBoost MAE: 2.8192
Improvement: -36.34% reduction in RMSE


# Feature importance analysis for XGBoost CO2 prediction


In [42]:
# Prepare features and target for cost prediction
# Features: Weight_kg, Distance_km, Shipping_Mode (One-Hot Encoded), 
#           Material_Density, Cost_per_kg, Product_Volume_m3
# Target: Cost_USD

# Select features for cost prediction
cost_feature_cols = ['Weight_kg', 'Distance_km', 'Shipping_Mode', 
                     'Material_Density', 'Cost_per_kg', 'Product_Volume_m3']

# Create feature dataframe for cost prediction
X_cost = history_df[cost_feature_cols].copy()
y_cost = history_df['Cost_USD'].copy()

print(f"Cost Prediction Feature shape: {X_cost.shape}")
print(f"Cost Prediction Target shape: {y_cost.shape}")
print(f"\nMissing values in cost features:")
print(X_cost.isnull().sum())

Cost Prediction Feature shape: (15000, 6)
Cost Prediction Target shape: (15000,)

Missing values in cost features:
Weight_kg            0
Distance_km          0
Shipping_Mode        0
Material_Density     0
Cost_per_kg          0
Product_Volume_m3    0
dtype: int64


In [43]:
# One-Hot Encode Shipping_Mode for cost prediction
encoder_cost = OneHotEncoder(sparse_output=False, drop='first')
shipping_encoded_cost = encoder_cost.fit_transform(X_cost[['Shipping_Mode']])
shipping_feature_names_cost = encoder_cost.get_feature_names_out(['Shipping_Mode'])

# Create encoded dataframe for cost prediction
X_cost_encoded = pd.DataFrame(
    shipping_encoded_cost,
    columns=shipping_feature_names_cost,
    index=X_cost.index
)

# Add other numerical features
X_cost_encoded['Weight_kg'] = X_cost['Weight_kg']
X_cost_encoded['Distance_km'] = X_cost['Distance_km']
X_cost_encoded['Material_Density'] = X_cost['Material_Density']
X_cost_encoded['Cost_per_kg'] = X_cost['Cost_per_kg']
X_cost_encoded['Product_Volume_m3'] = X_cost['Product_Volume_m3']

print("Cost Prediction Features after encoding:")
print(X_cost_encoded.columns.tolist())
print(f"\nCost Prediction Feature shape: {X_cost_encoded.shape}")
X_cost_encoded.head()

Cost Prediction Features after encoding:
['Shipping_Mode_Road', 'Weight_kg', 'Distance_km', 'Material_Density', 'Cost_per_kg', 'Product_Volume_m3']

Cost Prediction Feature shape: (15000, 6)


Unnamed: 0,Shipping_Mode_Road,Weight_kg,Distance_km,Material_Density,Cost_per_kg,Product_Volume_m3
0,0.0,0.82,1893,742,0.91,0.007056
1,0.0,0.29,2141,146,5.1,0.00017
2,1.0,12.26,1491,515,2.23,0.19032
3,1.0,11.56,530,515,2.23,0.19136
4,0.0,0.25,1587,742,0.91,0.000396


In [44]:
# Split data into train (80%) and test (20%) for cost prediction
X_cost_train, X_cost_test, y_cost_train, y_cost_test = train_test_split(
    X_cost_encoded, y_cost, test_size=0.2, random_state=42
)

print(f"Cost Prediction Training set: {X_cost_train.shape[0]} samples")
print(f"Cost Prediction Test set: {X_cost_test.shape[0]} samples")

Cost Prediction Training set: 12000 samples
Cost Prediction Test set: 3000 samples


In [45]:
# Train RandomForestRegressor for cost prediction
rf_cost_model = RandomForestRegressor(
    n_estimators=100,
    max_depth=20,
    min_samples_split=5,
    min_samples_leaf=2,
    random_state=42,
    n_jobs=-1
)

print("Training RandomForestRegressor for Cost Prediction...")
rf_cost_model.fit(X_cost_train, y_cost_train)
print("Cost Prediction Training completed!")

Training RandomForestRegressor for Cost Prediction...
Cost Prediction Training completed!


In [46]:
# Predict on test set and evaluate cost prediction model
y_cost_pred = rf_cost_model.predict(X_cost_test)
cost_rmse = np.sqrt(mean_squared_error(y_cost_test, y_cost_pred))
cost_mae = mean_absolute_error(y_cost_test, y_cost_pred)

print(f"Cost Prediction RMSE on Test Set: ${cost_rmse:.4f}")
print(f"Cost Prediction MAE on Test Set: ${cost_mae:.4f}")
print(f"Mean Cost: ${y_cost_test.mean():.4f}")
print(f"RMSE as % of mean: {(cost_rmse/y_cost_test.mean())*100:.2f}%")
print(f"MAE as % of mean: {(cost_mae/y_cost_test.mean())*100:.2f}%")

Cost Prediction RMSE on Test Set: $0.1567
Cost Prediction MAE on Test Set: $0.1240
Mean Cost: $12.7634
RMSE as % of mean: 1.23%
MAE as % of mean: 0.97%


In [47]:
# Feature importance analysis for cost prediction
cost_feature_importance = pd.DataFrame({
    'feature': X_cost_encoded.columns,
    'importance': rf_cost_model.feature_importances_
}).sort_values('importance', ascending=False)

print("Cost Prediction - Feature Importance:")
print(cost_feature_importance.to_string(index=False))
print(f"\nMost Important Feature: {cost_feature_importance.iloc[0]['feature']} (Importance: {cost_feature_importance.iloc[0]['importance']:.4f})")

Cost Prediction - Feature Importance:
           feature   importance
         Weight_kg 5.794759e-01
 Product_Volume_m3 4.204297e-01
       Cost_per_kg 8.382303e-05
       Distance_km 5.807439e-06
  Material_Density 4.706428e-06
Shipping_Mode_Road 5.950379e-08

Most Important Feature: Weight_kg (Importance: 0.5795)


In [48]:
# Note: Old Random Forest CO2 model feature importance plot removed
# Now using XGBoost model with improved performance

## 4. Recommendation Engine

## 5. Advanced Multi-Objective Recommendation System

The system now provides intelligent packaging recommendations considering both environmental impact (CO2 emissions) and cost efficiency using advanced machine learning models.

In [49]:
def recommend_packaging_multi_objective(product_name, history_df, materials_df, 
                                        co2_model, cost_model, co2_encoder, cost_encoder, 
                                        co2_feature_order, cost_feature_order, 
                                        top_n=5, weight_co2=0.6, weight_cost=0.4):
    """
    Recommends top N packaging materials using multi-objective optimization (CO2 + Cost).
    
    Parameters:
    - product_name: Name of the product to get recommendations for
    - history_df: Historical packaging data
    - materials_df: Materials database
    - co2_model: Trained XGBoost model for CO2 prediction
    - cost_model: Trained Random Forest model for cost prediction
    - co2_encoder: Fitted OneHotEncoder for CO2 model Shipping_Mode
    - cost_encoder: Fitted OneHotEncoder for cost model Shipping_Mode
    - co2_feature_order: List of feature column names for CO2 model
    - cost_feature_order: List of feature column names for cost model
    - top_n: Number of top recommendations to return
    - weight_co2: Weight for CO2 in scoring (default 0.6)
    - weight_cost: Weight for cost in scoring (default 0.4)
    
    Returns:
    - DataFrame with top N materials and their predicted CO2, cost, and combined scores
    """
    # Look up average specs for the product
    product_data = history_df[history_df['Item_Name'].str.contains(product_name, case=False, na=False)]
    
    if len(product_data) == 0:
        print(f"No historical data found for product: {product_name}")
        return None
    
    # Calculate average specs
    avg_weight = product_data['Weight_kg'].mean()
    avg_distance = product_data['Distance_km'].mean()
    avg_volume = product_data['Product_Volume_m3'].mean()
    # Use the most common shipping mode
    most_common_shipping = product_data['Shipping_Mode'].mode()[0] if len(product_data['Shipping_Mode'].mode()) > 0 else 'Road'
    
    print(f"Product: {product_name}")
    print(f"Average Weight: {avg_weight:.3f} kg")
    print(f"Average Distance: {avg_distance:.1f} km")
    print(f"Average Volume: {avg_volume:.6f} m¬≥")
    print(f"Most Common Shipping Mode: {most_common_shipping}")
    print(f"Optimization Weights - CO2: {weight_co2}, Cost: {weight_cost}")
    print("\nSimulating all materials...")
    
    # Encode shipping mode for both models
    shipping_encoded_co2 = co2_encoder.transform([[most_common_shipping]])
    shipping_encoded_cost = cost_encoder.transform([[most_common_shipping]])
    
    shipping_feature_names_co2 = co2_encoder.get_feature_names_out(['Shipping_Mode'])
    shipping_feature_names_cost = cost_encoder.get_feature_names_out(['Shipping_Mode'])
    
    # Create dictionaries to map feature names to values
    shipping_dict_co2 = dict(zip(shipping_feature_names_co2, shipping_encoded_co2[0]))
    shipping_dict_cost = dict(zip(shipping_feature_names_cost, shipping_encoded_cost[0]))
    
    # Simulate for each material
    results = []
    
    for _, material_row in materials_df.iterrows():
        # Create feature dictionary for CO2 prediction
        feature_dict_co2 = {
            **shipping_dict_co2,
            'Weight_kg': avg_weight,
            'Distance_km': avg_distance,
            'Material_CO2_Factor': material_row['CO2_Emission_kg'],
            'Material_Density': material_row['Density_kg_m3']
        }
        
        # Create feature dictionary for cost prediction
        feature_dict_cost = {
            **shipping_dict_cost,
            'Weight_kg': avg_weight,
            'Distance_km': avg_distance,
            'Material_Density': material_row['Density_kg_m3'],
            'Cost_per_kg': material_row['Cost_per_kg'],
            'Product_Volume_m3': avg_volume
        }
        
        # Create feature arrays in the exact order expected by each model
        feature_array_co2 = np.array([[feature_dict_co2[col] for col in co2_feature_order]])
        feature_array_cost = np.array([[feature_dict_cost[col] for col in cost_feature_order]])
        
        # Predict CO2 emission and cost
        predicted_co2 = co2_model.predict(feature_array_co2)[0]
        predicted_cost = cost_model.predict(feature_array_cost)[0]
        
        results.append({
            'Material_Name': material_row['Material_Name'],
            'Category': material_row['Category'],
            'Density_kg_m3': material_row['Density_kg_m3'],
            'Material_CO2_Factor': material_row['CO2_Emission_kg'],
            'Cost_per_kg': material_row['Cost_per_kg'],
            'Biodegradable': material_row['Biodegradable'],
            'Predicted_CO2_kg': predicted_co2,
            'Predicted_Cost_USD': predicted_cost
        })
    
    # Create results dataframe
    results_df = pd.DataFrame(results)
    
    # Normalize scores for multi-objective optimization
    co2_min, co2_max = results_df['Predicted_CO2_kg'].min(), results_df['Predicted_CO2_kg'].max()
    cost_min, cost_max = results_df['Predicted_Cost_USD'].min(), results_df['Predicted_Cost_USD'].max()
    
    results_df['CO2_Score'] = (results_df['Predicted_CO2_kg'] - co2_min) / (co2_max - co2_min)
    results_df['Cost_Score'] = (results_df['Predicted_Cost_USD'] - cost_min) / (cost_max - cost_min)
    
    # Calculate combined score (lower is better)
    results_df['Combined_Score'] = (weight_co2 * results_df['CO2_Score'] + 
                                   weight_cost * results_df['Cost_Score'])
    
    # Sort by combined score and return top N
    results_df = results_df.sort_values('Combined_Score').head(top_n)
    
    return results_df


def recommend_packaging_from_specs_multi_objective(product_name, weight_kg, volume_m3, distance_km, shipping_mode, 
                                                  materials_df, co2_model, cost_model, co2_encoder, cost_encoder, 
                                                  co2_feature_order, cost_feature_order, 
                                                  top_n=5, weight_co2=0.6, weight_cost=0.4):
    """
    Multi-objective recommendation using user-provided specifications.
    """
    print(f"Product: {product_name}")
    print(f"Weight: {weight_kg:.3f} kg")
    print(f"Volume: {volume_m3:.6f} m¬≥")
    print(f"Distance: {distance_km:.1f} km")
    print(f"Shipping Mode: {shipping_mode}")
    print(f"Optimization Weights - CO2: {weight_co2}, Cost: {weight_cost}")
    print("\nSimulating all materials...")
    
    # Encode shipping mode for both models
    shipping_encoded_co2 = co2_encoder.transform([[shipping_mode]])
    shipping_encoded_cost = cost_encoder.transform([[shipping_mode]])
    
    shipping_feature_names_co2 = co2_encoder.get_feature_names_out(['Shipping_Mode'])
    shipping_feature_names_cost = cost_encoder.get_feature_names_out(['Shipping_Mode'])
    
    # Create dictionaries to map feature names to values
    shipping_dict_co2 = dict(zip(shipping_feature_names_co2, shipping_encoded_co2[0]))
    shipping_dict_cost = dict(zip(shipping_feature_names_cost, shipping_encoded_cost[0]))
    
    # Simulate for each material
    results = []
    
    for _, material_row in materials_df.iterrows():
        # Create feature dictionary for CO2 prediction
        feature_dict_co2 = {
            **shipping_dict_co2,
            'Weight_kg': weight_kg,
            'Distance_km': distance_km,
            'Material_CO2_Factor': material_row['CO2_Emission_kg'],
            'Material_Density': material_row['Density_kg_m3']
        }
        
        # Create feature dictionary for cost prediction
        feature_dict_cost = {
            **shipping_dict_cost,
            'Weight_kg': weight_kg,
            'Distance_km': distance_km,
            'Material_Density': material_row['Density_kg_m3'],
            'Cost_per_kg': material_row['Cost_per_kg'],
            'Product_Volume_m3': volume_m3
        }
        
        # Create feature arrays in the exact order expected by each model
        feature_array_co2 = np.array([[feature_dict_co2[col] for col in co2_feature_order]])
        feature_array_cost = np.array([[feature_dict_cost[col] for col in cost_feature_order]])
        
        # Predict CO2 emission and cost
        predicted_co2 = co2_model.predict(feature_array_co2)[0]
        predicted_cost = cost_model.predict(feature_array_cost)[0]
        
        results.append({
            'Material_Name': material_row['Material_Name'],
            'Category': material_row['Category'],
            'Density_kg_m3': material_row['Density_kg_m3'],
            'Material_CO2_Factor': material_row['CO2_Emission_kg'],
            'Cost_per_kg': material_row['Cost_per_kg'],
            'Biodegradable': material_row['Biodegradable'],
            'Predicted_CO2_kg': predicted_co2,
            'Predicted_Cost_USD': predicted_cost
        })
    
    # Create results dataframe
    results_df = pd.DataFrame(results)
    
    # Normalize scores for multi-objective optimization
    co2_min, co2_max = results_df['Predicted_CO2_kg'].min(), results_df['Predicted_CO2_kg'].max()
    cost_min, cost_max = results_df['Predicted_Cost_USD'].min(), results_df['Predicted_Cost_USD'].max()
    
    results_df['CO2_Score'] = (results_df['Predicted_CO2_kg'] - co2_min) / (co2_max - co2_min)
    results_df['Cost_Score'] = (results_df['Predicted_Cost_USD'] - cost_min) / (cost_max - cost_min)
    
    # Calculate combined score (lower is better)
    results_df['Combined_Score'] = (weight_co2 * results_df['CO2_Score'] + 
                                   weight_cost * results_df['Cost_Score'])
    
    # Sort by combined score and return top N
    results_df = results_df.sort_values('Combined_Score').head(top_n)
    
    return results_df

In [50]:
# Get feature orders for models
co2_feature_order = X_encoded.columns.tolist()
cost_feature_order = X_cost_encoded.columns.tolist()

# Example 1: Get recommendations for "Sneakers" using historical data
print("=" * 80)
print("PACKAGING RECOMMENDATIONS FOR SNEAKERS")
print("=" * 80)

recommendations_sneakers = recommend_packaging_multi_objective(
    "Sneakers", history_df, materials_df, 
    xgb_co2_model, rf_cost_model, encoder, encoder_cost, 
    co2_feature_order, cost_feature_order, 
    top_n=5, weight_co2=0.6, weight_cost=0.4
)

if recommendations_sneakers is not None:
    # Display results in tabular format
    display_columns = ['Rank', 'Material_Name', 'Category', 'Predicted_CO2_kg', 'Predicted_Cost_USD', 'Biodegradable', 'Combined_Score']
    recommendations_display = recommendations_sneakers.copy()
    recommendations_display.insert(0, 'Rank', range(1, len(recommendations_display) + 1))
    
    # Format the display
    recommendations_display['Predicted_CO2_kg'] = recommendations_display['Predicted_CO2_kg'].round(3)
    recommendations_display['Predicted_Cost_USD'] = recommendations_display['Predicted_Cost_USD'].round(2)
    recommendations_display['Combined_Score'] = recommendations_display['Combined_Score'].round(3)
    
    print("\nTOP 5 PACKAGING RECOMMENDATIONS:")
    print(recommendations_display[display_columns].to_string(index=False))
    
    print(f"\nüìä SUMMARY:")
    print(f"‚Ä¢ Best Overall: {recommendations_display.iloc[0]['Material_Name']} (Score: {recommendations_display.iloc[0]['Combined_Score']:.3f})")
    print(f"‚Ä¢ Lowest CO2: {recommendations_display.loc[recommendations_display['Predicted_CO2_kg'].idxmin(), 'Material_Name']} ({recommendations_display['Predicted_CO2_kg'].min():.3f} kg CO2)")
    print(f"‚Ä¢ Lowest Cost: {recommendations_display.loc[recommendations_display['Predicted_Cost_USD'].idxmin(), 'Material_Name']} (${recommendations_display['Predicted_Cost_USD'].min():.2f})")
    print(f"‚Ä¢ Biodegradable Options: {recommendations_display[recommendations_display['Biodegradable'] == True]['Material_Name'].tolist()}")

print("\n" + "=" * 80)

PACKAGING RECOMMENDATIONS FOR SNEAKERS
Product: Sneakers
Average Weight: 0.999 kg
Average Distance: 1488.5 km
Average Volume: 0.006582 m¬≥
Most Common Shipping Mode: Air
Optimization Weights - CO2: 0.6, Cost: 0.4

Simulating all materials...

TOP 5 PACKAGING RECOMMENDATIONS:
 Rank           Material_Name Category  Predicted_CO2_kg  Predicted_Cost_USD Biodegradable  Combined_Score
    1      Laminated PVC Film  Plastic             3.034                1.21            No           0.062
    2       Woven ABS Plastic  Plastic             3.034                1.21            No           0.062
    3 UV-Stabilized Tin Plate    Metal             3.034                1.21            No           0.062
    4      Standard Tin Plate    Metal             3.034                1.21            No           0.062
    5     Laminated Tin Plate    Metal             3.034                1.21            No           0.062

üìä SUMMARY:
‚Ä¢ Best Overall: Laminated PVC Film (Score: 0.062)
‚Ä¢ Lowest CO2:

In [51]:
# Example 2: Custom specifications for a fragile electronic device
print("=" * 80)
print("PACKAGING RECOMMENDATIONS FOR FRAGILE ELECTRONIC DEVICE")
print("=" * 80)

recommendations_electronics = recommend_packaging_from_specs_multi_objective(
    "Fragile Electronics", weight_kg=0.5, volume_m3=0.0002, distance_km=2500, shipping_mode="Air",
    materials_df=materials_df, co2_model=xgb_co2_model, cost_model=rf_cost_model, 
    co2_encoder=encoder, cost_encoder=encoder_cost, 
    co2_feature_order=co2_feature_order, cost_feature_order=cost_feature_order, 
    top_n=5, weight_co2=0.7, weight_cost=0.3  # Higher priority on environmental impact
)

if recommendations_electronics is not None:
    # Display results in tabular format
    display_columns = ['Rank', 'Material_Name', 'Category', 'Predicted_CO2_kg', 'Predicted_Cost_USD', 'Biodegradable', 'Combined_Score']
    recommendations_display = recommendations_electronics.copy()
    recommendations_display.insert(0, 'Rank', range(1, len(recommendations_display) + 1))
    
    # Format the display
    recommendations_display['Predicted_CO2_kg'] = recommendations_display['Predicted_CO2_kg'].round(3)
    recommendations_display['Predicted_Cost_USD'] = recommendations_display['Predicted_Cost_USD'].round(2)
    recommendations_display['Combined_Score'] = recommendations_display['Combined_Score'].round(3)
    
    print("\nTOP 5 PACKAGING RECOMMENDATIONS:")
    print(recommendations_display[display_columns].to_string(index=False))
    
    print(f"\nüìä SUMMARY:")
    print(f"‚Ä¢ Best Overall: {recommendations_display.iloc[0]['Material_Name']} (Score: {recommendations_display.iloc[0]['Combined_Score']:.3f})")
    print(f"‚Ä¢ Lowest CO2: {recommendations_display.loc[recommendations_display['Predicted_CO2_kg'].idxmin(), 'Material_Name']} ({recommendations_display['Predicted_CO2_kg'].min():.3f} kg CO2)")
    print(f"‚Ä¢ Lowest Cost: {recommendations_display.loc[recommendations_display['Predicted_Cost_USD'].idxmin(), 'Material_Name']} (${recommendations_display['Predicted_Cost_USD'].min():.2f})")
    print(f"‚Ä¢ Biodegradable Options: {recommendations_display[recommendations_display['Biodegradable'] == True]['Material_Name'].tolist()}")

print("\n" + "=" * 80)

PACKAGING RECOMMENDATIONS FOR FRAGILE ELECTRONIC DEVICE
Product: Fragile Electronics
Weight: 0.500 kg
Volume: 0.000200 m¬≥
Distance: 2500.0 km
Shipping Mode: Air
Optimization Weights - CO2: 0.7, Cost: 0.3

Simulating all materials...

TOP 5 PACKAGING RECOMMENDATIONS:
 Rank                   Material_Name Category  Predicted_CO2_kg  Predicted_Cost_USD Biodegradable  Combined_Score
    1            Perforated Wax Paper    Paper             1.450                1.79           Yes           0.183
    2 Heavy-Duty Corrugated Cardboard    Paper             1.450                1.79           Yes           0.188
    3   Recycled Corrugated Cardboard    Paper             2.063                1.59           Yes           0.212
    4      Industrial-Grade Wax Paper    Paper             1.586                1.79           Yes           0.228
    5              Standard Wax Paper    Paper             1.586                1.79           Yes           0.228

üìä SUMMARY:
‚Ä¢ Best Overall: Perforate

In [52]:
# Example 3: Heavy furniture item with cost-focused optimization
print("=" * 80)
print("PACKAGING RECOMMENDATIONS FOR HEAVY FURNITURE (COST-FOCUSED)")
print("=" * 80)

recommendations_furniture = recommend_packaging_from_specs_multi_objective(
    "Heavy Furniture", weight_kg=15.0, volume_m3=0.25, distance_km=800, shipping_mode="Road",
    materials_df=materials_df, co2_model=xgb_co2_model, cost_model=rf_cost_model, 
    co2_encoder=encoder, cost_encoder=encoder_cost, 
    co2_feature_order=co2_feature_order, cost_feature_order=cost_feature_order, 
    top_n=5, weight_co2=0.3, weight_cost=0.7  # Higher priority on cost efficiency
)

if recommendations_furniture is not None:
    # Display results in tabular format
    display_columns = ['Rank', 'Material_Name', 'Category', 'Predicted_CO2_kg', 'Predicted_Cost_USD', 'Biodegradable', 'Combined_Score']
    recommendations_display = recommendations_furniture.copy()
    recommendations_display.insert(0, 'Rank', range(1, len(recommendations_display) + 1))
    
    # Format the display
    recommendations_display['Predicted_CO2_kg'] = recommendations_display['Predicted_CO2_kg'].round(3)
    recommendations_display['Predicted_Cost_USD'] = recommendations_display['Predicted_Cost_USD'].round(2)
    recommendations_display['Combined_Score'] = recommendations_display['Combined_Score'].round(3)
    
    print("\nTOP 5 PACKAGING RECOMMENDATIONS:")
    print(recommendations_display[display_columns].to_string(index=False))
    
    print(f"\nüìä SUMMARY:")
    print(f"‚Ä¢ Best Overall: {recommendations_display.iloc[0]['Material_Name']} (Score: {recommendations_display.iloc[0]['Combined_Score']:.3f})")
    print(f"‚Ä¢ Lowest CO2: {recommendations_display.loc[recommendations_display['Predicted_CO2_kg'].idxmin(), 'Material_Name']} ({recommendations_display['Predicted_CO2_kg'].min():.3f} kg CO2)")
    print(f"‚Ä¢ Lowest Cost: {recommendations_display.loc[recommendations_display['Predicted_Cost_USD'].idxmin(), 'Material_Name']} (${recommendations_display['Predicted_Cost_USD'].min():.2f})")
    print(f"‚Ä¢ Biodegradable Options: {recommendations_display[recommendations_display['Biodegradable'] == True]['Material_Name'].tolist()}")

print("\n" + "=" * 80)

PACKAGING RECOMMENDATIONS FOR HEAVY FURNITURE (COST-FOCUSED)
Product: Heavy Furniture
Weight: 15.000 kg
Volume: 0.250000 m¬≥
Distance: 800.0 km
Shipping Mode: Road
Optimization Weights - CO2: 0.3, Cost: 0.7

Simulating all materials...

TOP 5 PACKAGING RECOMMENDATIONS:
 Rank                         Material_Name Category  Predicted_CO2_kg  Predicted_Cost_USD Biodegradable  Combined_Score
    1       Perforated Corrugated Cardboard    Paper             2.025               18.76           Yes           0.000
    2 Commercial-Grade Corrugated Cardboard    Paper             2.025               18.76           Yes           0.000
    3       Food-Grade Corrugated Cardboard    Paper             2.025               18.76           Yes           0.000
    4              Insulated PLA Bioplastic      Eco             2.243               18.76           Yes           0.003
    5        Laminated Corrugated Cardboard    Paper             2.352               18.76           Yes           0.005

üì

In [58]:
# Interactive User Input for Custom Recommendations
print("=" * 80)
print("üéØ INTERACTIVE PACKAGING RECOMMENDATION SYSTEM")
print("=" * 80)

def get_user_input():
    """Get user input for product specifications"""
    try:
        print("\nüì¶ Please enter your product specifications:")
        
        # Product name
        product_name = input("Product Name (e.g., Laptop, Smartphone): ").strip()
        if not product_name:
            product_name = "Custom Product"
        
        # Weight
        while True:
            try:
                weight_kg = float(input("Weight (kg): "))
                if weight_kg > 0:
                    break
                else:
                    print("‚ùå Weight must be positive.")
            except ValueError:
                print("‚ùå Please enter a valid number.")
        
        # Volume
        while True:
            try:
                volume_m3 = float(input("Volume (m¬≥, e.g., 0.001): "))
                if volume_m3 > 0:
                    break
                else:
                    print("‚ùå Volume must be positive.")
            except ValueError:
                print("‚ùå Please enter a valid number.")
        
        # Distance
        while True:
            try:
                distance_km = float(input("Shipping Distance (km): "))
                if distance_km > 0:
                    break
                else:
                    print("‚ùå Distance must be positive.")
            except ValueError:
                print("‚ùå Please enter a valid number.")
        
        # Shipping mode
        shipping_modes = ["Air", "Road", "Sea", "Rail"]
        print(f"\nüöö Shipping Options: {', '.join(shipping_modes)}")
        while True:
            shipping_mode = input("Shipping Mode: ").strip().title()
            if shipping_mode in shipping_modes:
                break
            else:
                print(f"‚ùå Please choose from: {', '.join(shipping_modes)}")
        
        # Optimization preference
        print(f"\n‚öñÔ∏è Optimization Preference:")
        print("1. Balanced (CO2: 60%, Cost: 40%)")
        print("2. Eco-Focused (CO2: 80%, Cost: 20%)")
        print("3. Cost-Focused (CO2: 20%, Cost: 80%)")
        
        while True:
            try:
                choice = int(input("Choose (1-3): "))
                if choice == 1:
                    weight_co2, weight_cost = 0.6, 0.4
                    break
                elif choice == 2:
                    weight_co2, weight_cost = 0.8, 0.2
                    break
                elif choice == 3:
                    weight_co2, weight_cost = 0.2, 0.8
                    break
                else:
                    print("‚ùå Please choose 1, 2, or 3.")
            except ValueError:
                print("‚ùå Please enter a number (1-3).")
        
        return {
            'product_name': product_name,
            'weight_kg': weight_kg,
            'volume_m3': volume_m3,
            'distance_km': distance_km,
            'shipping_mode': shipping_mode,
            'weight_co2': weight_co2,
            'weight_cost': weight_cost
        }
        
    except KeyboardInterrupt:
        print("\n‚ùå Input cancelled by user.")
        return None
    except Exception as e:
        print(f"‚ùå Error getting input: {e}")
        return None

def generate_user_recommendations():
    """Generate recommendations based on user input"""
    user_specs = get_user_input()
    
    if user_specs is None:
        return
    
    print(f"\nüîÑ GENERATING RECOMMENDATIONS FOR {user_specs['product_name'].upper()}...")
    print("=" * 80)
    
    # Generate recommendations
    recommendations = recommend_packaging_from_specs_multi_objective(
        user_specs['product_name'],
        user_specs['weight_kg'],
        user_specs['volume_m3'],
        user_specs['distance_km'],
        user_specs['shipping_mode'],
        materials_df,
        xgb_co2_model,
        rf_cost_model,
        encoder,
        encoder_cost,
        co2_feature_order,
        cost_feature_order,
        top_n=5,
        weight_co2=user_specs['weight_co2'],
        weight_cost=user_specs['weight_cost']
    )
    
    if recommendations is not None:
        # Display results
        display_columns = ['Rank', 'Material_Name', 'Category', 'Predicted_CO2_kg', 'Predicted_Cost_USD', 'Biodegradable', 'Combined_Score']
        recommendations_display = recommendations.copy()
        recommendations_display.insert(0, 'Rank', range(1, len(recommendations_display) + 1))
        
        # Format the display
        recommendations_display['Predicted_CO2_kg'] = recommendations_display['Predicted_CO2_kg'].round(3)
        recommendations_display['Predicted_Cost_USD'] = recommendations_display['Predicted_Cost_USD'].round(2)
        recommendations_display['Combined_Score'] = recommendations_display['Combined_Score'].round(3)
        
        print(f"\nüèÜ TOP 5 PACKAGING RECOMMENDATIONS:")
        print(recommendations_display[display_columns].to_string(index=False))
        
        print(f"\nüìä SUMMARY:")
        print(f"‚Ä¢ Best Overall: {recommendations_display.iloc[0]['Material_Name']} (Score: {recommendations_display.iloc[0]['Combined_Score']:.3f})")
        print(f"‚Ä¢ Lowest CO2: {recommendations_display.loc[recommendations_display['Predicted_CO2_kg'].idxmin(), 'Material_Name']} ({recommendations_display['Predicted_CO2_kg'].min():.3f} kg CO2)")
        print(f"‚Ä¢ Lowest Cost: {recommendations_display.loc[recommendations_display['Predicted_Cost_USD'].idxmin(), 'Material_Name']} (${recommendations_display['Predicted_Cost_USD'].min():.2f})")
        
        biodegradable_options = recommendations_display[recommendations_display['Biodegradable'] == True]['Material_Name'].tolist()
        if biodegradable_options:
            print(f"‚Ä¢ Biodegradable Options: {', '.join(biodegradable_options)}")
        else:
            print(f"‚Ä¢ Biodegradable Options: None available")
    
    print("\n" + "=" * 80)

# Uncomment to run interactive input
generate_user_recommendations()

üéØ INTERACTIVE PACKAGING RECOMMENDATION SYSTEM

üì¶ Please enter your product specifications:

üöö Shipping Options: Air, Road, Sea, Rail

‚öñÔ∏è Optimization Preference:
1. Balanced (CO2: 60%, Cost: 40%)
2. Eco-Focused (CO2: 80%, Cost: 20%)
3. Cost-Focused (CO2: 20%, Cost: 80%)

üîÑ GENERATING RECOMMENDATIONS FOR LAPTOP...
Product: laptop
Weight: 2.000 kg
Volume: 0.023560 m¬≥
Distance: 600.0 km
Shipping Mode: Road
Optimization Weights - CO2: 0.6, Cost: 0.4

Simulating all materials...

üèÜ TOP 5 PACKAGING RECOMMENDATIONS:
 Rank              Material_Name Category  Predicted_CO2_kg  Predicted_Cost_USD Biodegradable  Combined_Score
    1     Insulated Seaweed Film      Eco            -4.605                2.18           Yes           0.012
    2 Food-Grade Pine Wood Crate     Wood            -4.605                2.18           Yes           0.015
    3        Laminated Palm Leaf      Eco            -4.605                2.20           Yes           0.049
    4       Reinforced Pa

In [None]:
# Save Models for Web Deployment (Flask/Django)
import joblib
import os
from datetime import datetime

def save_models_for_deployment():
    """Save all trained models and required components for web deployment"""
    
    print("=" * 80)
    print("üíæ SAVING MODELS FOR WEB DEPLOYMENT")
    print("=" * 80)
    
    # Create deployment directory
    deployment_dir = "deployment_models"
    if not os.path.exists(deployment_dir):
        os.makedirs(deployment_dir)
        print(f"üìÅ Created directory: {deployment_dir}")
    
    # Save CO2 prediction model (XGBoost)
    co2_model_path = os.path.join(deployment_dir, "co2_prediction_model.joblib")
    joblib.dump(xgb_co2_model, co2_model_path)
    print(f"‚úÖ CO2 Prediction Model saved: {co2_model_path}")
    
    # Save Cost prediction model (Random Forest)
    cost_model_path = os.path.join(deployment_dir, "cost_prediction_model.joblib")
    joblib.dump(rf_cost_model, cost_model_path)
    print(f"‚úÖ Cost Prediction Model saved: {cost_model_path}")
    
    # Save encoders
    co2_encoder_path = os.path.join(deployment_dir, "co2_label_encoder.joblib")
    joblib.dump(encoder, co2_encoder_path)
    print(f"‚úÖ CO2 Label Encoder saved: {co2_encoder_path}")
    
    cost_encoder_path = os.path.join(deployment_dir, "cost_label_encoder.joblib")
    joblib.dump(encoder_cost, cost_encoder_path)
    print(f"‚úÖ Cost Label Encoder saved: {cost_encoder_path}")
    
    # Save feature orders
    co2_features_path = os.path.join(deployment_dir, "co2_feature_order.joblib")
    joblib.dump(co2_feature_order, co2_features_path)
    print(f"‚úÖ CO2 Feature Order saved: {co2_features_path}")
    
    cost_features_path = os.path.join(deployment_dir, "cost_feature_order.joblib")
    joblib.dump(cost_feature_order, cost_features_path)
    print(f"‚úÖ Cost Feature Order saved: {cost_features_path}")
    
    # Save materials database
    materials_path = os.path.join(deployment_dir, "materials_database.joblib")
    joblib.dump(materials_df, materials_path)
    print(f"‚úÖ Materials Database saved: {materials_path}")
    
    # Save model metadata
    metadata = {
        'model_version': '1.0',
        'training_date': datetime.now().strftime('%Y-%m-%d %H:%M:%S'),
        'co2_model_type': 'XGBoost Regressor',
        'cost_model_type': 'Random Forest Regressor',
        'co2_rmse': float(rmse_xgb),
        'co2_mae': float(mae_xgb),
        'cost_rmse': float(cost_rmse),
        'cost_mae': float(cost_mae),
        'total_materials': len(materials_df),
        'categories': materials_df['Category'].unique().tolist(),
        'feature_order_co2': co2_feature_order,
        'feature_order_cost': cost_feature_order
    }
    
    metadata_path = os.path.join(deployment_dir, "model_metadata.joblib")
    joblib.dump(metadata, metadata_path)
    print(f"‚úÖ Model Metadata saved: {metadata_path}")
    
    # Create requirements.txt for deployment
    requirements = [
        "scikit-learn>=1.0.0",
        "xgboost>=1.6.0", 
        "pandas>=1.3.0",
        "numpy>=1.21.0",
        "joblib>=1.1.0",
        "flask>=2.0.0"
    ]
    
    requirements_path = os.path.join(deployment_dir, "requirements.txt")
    with open(requirements_path, 'w') as f:
        f.write('\n'.join(requirements))
    print(f"‚úÖ Requirements file created: {requirements_path}")
    
 

    readme_path = os.path.join(deployment_dir, "README.md")
    with open(readme_path, 'w') as f:
        f.write(readme_content)
    print(f"‚úÖ README documentation created: {readme_path}")
    
    print(f"\nüéâ DEPLOYMENT PACKAGE CREATED SUCCESSFULLY!")
    print(f"üìÅ Location: {deployment_dir}/")
    print(f"\nüìã FILES CREATED:")
    for file in sorted(os.listdir(deployment_dir)):
        print(f"   ‚Ä¢ {file}")
    
    print(f"\nüöÄ DEPLOYMENT INSTRUCTIONS:")
    print(f"1. Copy the '{deployment_dir}' folder to your server")
    print(f"2. Install dependencies: pip install -r {deployment_dir}/requirements.txt")
    print(f"3. Run Flask app: python app.py (app.py is in the parent directory)")
    print(f"4. Access at: http://localhost:5000")
    print(f"\nüìñ For detailed instructions, see: {deployment_dir}/README.md")
    
    print("\n" + "=" * 80)

# Save all models and deployment files
save_models_for_deployment()