In [5]:
import pandas as pd
import numpy as np

# Load the dataset
file_path = 'LEAD TIME.csv'
dataset = pd.read_csv(file_path)

# Encode categorical variables if not already encoded
from sklearn.preprocessing import LabelEncoder
le = LabelEncoder()
for column in ['Shipping Mode', 'Stock Availability', 'Packaging Complexity',
               'Shipping Zone', 'Order Priority', 'Order Size Category', 'Customization Type']:
    dataset[column] = le.fit_transform(dataset[column])

# Derived Features
# 1. Order Complexity Score: A weighted sum of customization, production stages, and labor hours
dataset['Order Complexity Score'] = (
    dataset['Customization Level'] * 2 +
    dataset['Production Stages Count'] +
    dataset['Labor Hours Required'] * 0.5
)

# 2. Distance-Time Ratio: Distance to Delivery / Process Time
dataset['Distance-Time Ratio'] = dataset['Distance to Delivery'] / (dataset['Process Time (days)'] + 1)

# 3. Supplier Delay Impact: Adjust lead time by considering supplier delays
dataset['Supplier Delay Impact'] = dataset['Lead Time'] * (1 + dataset['Previous Supplier Delays (%)'] / 100)

# 4. Shipping Mode Impact: Assign weights to shipping modes (based on domain knowledge)
shipping_mode_weights = {'Ground': 1, 'Sea': 2, 'Air': 0.5}  # Example weights
dataset['Shipping Mode Weight'] = dataset['Shipping Mode'].map(lambda x: shipping_mode_weights.get(x, 1))

# 5. Priority-Customization Interaction: Interaction between order priority and customization level
dataset['Priority-Customization Interaction'] = dataset['Order Priority'] * dataset['Customization Level']

# Save the enhanced dataset for further analysis
enhanced_file_path = 'Dataset_With_Derived_Features.csv'
dataset.to_csv(enhanced_file_path, index=False)

print("Derived features added and dataset saved as:", enhanced_file_path)


Derived features added and dataset saved as: Dataset_With_Derived_Features.csv


In [6]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier, RandomForestRegressor
from sklearn.metrics import accuracy_score, mean_absolute_error
import pickle

# Load enhanced dataset
file_path = 'Dataset_With_Derived_Features.csv'
dataset = pd.read_csv(file_path)

In [7]:
# Order Complexity Predictor
complexity_target = (dataset['Order Type'] == 'Complex').astype(int)
complexity_features = dataset[['Order Size', 'Customization Level', 'Supplier Rating',
                                'Production Stages Count', 'Labor Hours Required',
                                'Order Complexity Score', 'Distance-Time Ratio',
                                'Priority-Customization Interaction']]

X_train_c, X_test_c, y_train_c, y_test_c = train_test_split(complexity_features, complexity_target, test_size=0.2, random_state=42)
complexity_model = RandomForestClassifier(random_state=42)
complexity_model.fit(X_train_c, y_train_c)
complexity_predictions = complexity_model.predict(X_test_c)
complexity_accuracy = accuracy_score(y_test_c, complexity_predictions)
print("Order Complexity Predictor Accuracy:", complexity_accuracy)

# Save the complexity model
with open('order_complexity_model.pkl', 'wb') as f:
    pickle.dump(complexity_model, f)

Order Complexity Predictor Accuracy: 1.0


In [8]:
# Lead Time Predictor
lead_time_target = dataset['Lead Time']
lead_time_features = dataset[['Order Size', 'Customization Level', 'Supplier Rating', 'Process Time (days)',
                               'Shipping Mode Weight', 'Distance to Delivery', 'Stock Availability',
                               'Production Stages Count', 'Labor Hours Required', 'Order Complexity Score',
                               'Supplier Delay Impact', 'Priority-Customization Interaction']]

X_train_lt, X_test_lt, y_train_lt, y_test_lt = train_test_split(lead_time_features, lead_time_target, test_size=0.2, random_state=42)
lead_time_model = RandomForestRegressor(random_state=42)
lead_time_model.fit(X_train_lt, y_train_lt)
lead_time_predictions = lead_time_model.predict(X_test_lt)
lead_time_mae = mean_absolute_error(y_test_lt, lead_time_predictions)
print("Lead Time Predictor MAE:", lead_time_mae)

# Save the lead time model
with open('lead_time_model.pkl', 'wb') as f:
    pickle.dump(lead_time_model, f)

Lead Time Predictor MAE: 2.3599


In [9]:
# Analyze interaction
dataset['Predicted Complexity'] = complexity_model.predict(complexity_features)
interaction_features = dataset[['Predicted Complexity', 'Lead Time']]
correlation = interaction_features.corr()['Lead Time']['Predicted Complexity']
print("Correlation between predicted complexity and lead time:", correlation)

Correlation between predicted complexity and lead time: 0.05119404139167283
