# OBJECTIVE 1: MODEL TRAINING FOR PREDICTING DAILY SALES QUANTITY

# Importing the neccessary libraries

In [1]:
import pandas as pd
import xgboost as xgb
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score
import numpy as np
import matplotlib.pyplot as plt

# Importing dataset for training

In [2]:
df = pd.read_csv("Cleaned_ready for ml.csv")

# Label Encoding for categorical variables

In [3]:
for col in df.columns:
    if df[col].dtype == 'object':
        le = LabelEncoder()
        df[col] = le.fit_transform(df[col].astype(str))

# Declaring and dropping variables

In [4]:
df = df.drop(columns=['Customer_Id'])

In [5]:
X = df.drop(columns=['Daily_Sales_Quantity'])
y = df['Daily_Sales_Quantity']

# Train test split and model training

In [6]:
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42
)

In [7]:
model = xgb.XGBRegressor(
    n_estimators=100,
    learning_rate=0.1,
    max_depth=6,
    random_state=42
)
model.fit(X_train, y_train)

In [19]:
y_pred=model.predict(X_test)

# Calculation Metrics

In [20]:
mae = mean_absolute_error(y_test, y_pred)
rmse = np.sqrt(mean_squared_error(y_test, y_pred))
r2 = r2_score(y_test, y_pred)

In [21]:
print(f"MAE: {mae:.2f}")
print(f"RMSE: {rmse:.2f}")
print(f"R² Score: {r2:.2f}")

MAE: 8.16
RMSE: 10.41
R² Score: 1.00


In [22]:
relative_error = mae / y_test.mean()
accuracy_percent = (1 - relative_error) * 100
print(f"Estimated Regression Accuracy: {accuracy_percent:.2f}%")

Estimated Regression Accuracy: 95.65%


# With Testing Data

In [23]:
df_input = pd.read_csv("input_data.csv")

In [29]:
label_encoders = {}

for col in df_input.columns:
    if df_input[col].dtype == 'object':
        le = LabelEncoder()
        df_input[col] = le.fit_transform(df_input[col].astype(str))
        label_encoders[col] = le

In [30]:
df_input = df_input.drop(columns=['Customer_Id'], errors='ignore')

In [31]:
df_input = df_input.apply(pd.to_numeric, errors='coerce')

In [32]:
df_input = df_input[X_train.columns]
predictions = model.predict(df_input)
predictions_rounded = np.round(predictions).astype(int)

In [36]:
df_input['Predicted_Daily_Sales_Quantity'] = predictions_rounded

# Save and show
df_input.to_csv("output_with_predictions.csv", index=False)
print(df_input[['Predicted_Daily_Sales_Quantity']])

   Predicted_Daily_Sales_Quantity
0                              73
1                              73
2                              73
3                              74
4                              75
5                              73


# OBJECTIVE 2: INVENTORY MANAGEMENT

# Importing libraries

In [34]:
import pandas as pd
from pulp import LpMaximize, LpProblem, LpVariable, lpSum

# Machinery Data given by the company

In [35]:
machines = {
    "Backhoe Loader": {"volume": 15, "cost": 3000000},
    "Excavator (Crawler)": {"volume": 25, "cost": 5000000},
    "Loader (Wheeled)": {"volume": 20, "cost": 4000000},
    "Skid Steer Loader": {"volume": 10, "cost": 2000000},
    "Compactor": {"volume": 12, "cost": 2500000},
    "Tele Handler": {"volume": 18, "cost": 3500000},
}

# Load predicted demand from your model

In [37]:
predicted_demand = {
    "Backhoe Loader": 73,
    "Excavator (Crawler)": 73,
    "Loader (Wheeled)": 73,
    "Skid Steer Loader": 74,
    "Compactor": 75,
    "Tele Handler": 73
}

# Optimization Model

In [38]:
model = LpProblem("Inventory_Optimization", LpMaximize)

# Create variables for number of units to stock for each machine

In [39]:
units = {m: LpVariable(f"units_{m.replace(' ', '_')}", lowBound=0, cat='Integer') for m in machines}

# Objective – Maximize total demand met

In [40]:
model += lpSum([units[m] for m in machines]), "Total_Units_Stored"

# Constraint – Total volume used ≤ 5000 m³

In [41]:
model += lpSum([units[m] * machines[m]["volume"] for m in machines]) <= 5000, "Storage_Limit"

# Don't store more than predicted demand

In [42]:
for m in machines:
    model += units[m] <= predicted_demand[m], f"Cap_{m}"

# Solve

In [43]:
model.solve()

1

# Show Results

In [44]:
print("\n📦 Recommended Inventory Plan:")
total_space = 0
total_units = 0

for m in machines:
    qty = int(units[m].varValue)
    space = qty * machines[m]["volume"]
    total_space += space
    total_units += qty
    print(f"- {m}: {qty} units (uses {space} m³)")

print(f"\n🔧 Total Units Stored: {total_units}")
print(f"📐 Total Space Used: {total_space} / 5000 m³")



📦 Recommended Inventory Plan:
- Backhoe Loader: 73 units (uses 1095 m³)
- Excavator (Crawler): 0 units (uses 0 m³)
- Loader (Wheeled): 47 units (uses 940 m³)
- Skid Steer Loader: 74 units (uses 740 m³)
- Compactor: 75 units (uses 900 m³)
- Tele Handler: 73 units (uses 1314 m³)

🔧 Total Units Stored: 342
📐 Total Space Used: 4989 / 5000 m³
