# Importing the neccessary libraries

In [61]:
import pandas as pd
import numpy as np
import xgboost as xgb
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_absolute_error

# Importing dataset for training

In [63]:
df = pd.read_csv("Cleaned_ready for ml.csv")
df.head()

Unnamed: 0,Date,Infrastructure_Machineries,Daily_Sales _Percentage,Daily_Sales_Quantity,Market_Share,Political,Marketing,Budget,Customer_Id,Region
0,2019-01-01,Excavators(crawler),0.034464,80,35,1,1,5000.56,CUST00002,North Ryanstad
1,2019-01-01,Loaders (Wheeled),0.034464,70,35,1,1,5000.56,CUST00003,South Christophermouth
2,2019-01-01,Skid Steer Loaders,0.034464,70,35,1,1,5000.56,CUST00004,Juliashire
3,2019-01-01,Compactors,0.034464,68,35,1,1,5000.56,CUST00005,Davidberg
4,2019-01-01,Tele Handlers,0.034464,61,35,1,1,5000.56,CUST00006,West Brandonburgh


In [None]:
df['Date'] = pd.to_datetime(df['Date'])
df['Day'] = df['Date'].dt.day
df['Month'] = df['Date'].dt.month
df['Year'] = df['Date'].dt.year
df.head()

In [71]:
drop_cols = ['Date', 'Customer_Id', 'Region']
df = df.drop(columns=drop_cols)
df.head()

Unnamed: 0,Infrastructure_Machineries,Daily_Sales _Percentage,Daily_Sales_Quantity,Market_Share,Political,Marketing,Budget,Day,Month,Year
0,Excavators(crawler),0.034464,80,35,1,1,5000.56,1,1,2019
1,Loaders (Wheeled),0.034464,70,35,1,1,5000.56,1,1,2019
2,Skid Steer Loaders,0.034464,70,35,1,1,5000.56,1,1,2019
3,Compactors,0.034464,68,35,1,1,5000.56,1,1,2019
4,Tele Handlers,0.034464,61,35,1,1,5000.56,1,1,2019


In [73]:
df = pd.get_dummies(df, drop_first=True)
df.head()

Unnamed: 0,Daily_Sales _Percentage,Daily_Sales_Quantity,Market_Share,Political,Marketing,Budget,Day,Month,Year,Infrastructure_Machineries_Compactors,Infrastructure_Machineries_Excavators(crawler),Infrastructure_Machineries_Loaders (Wheeled),Infrastructure_Machineries_Skid Steer Loaders,Infrastructure_Machineries_Tele Handlers
0,0.034464,80,35,1,1,5000.56,1,1,2019,False,True,False,False,False
1,0.034464,70,35,1,1,5000.56,1,1,2019,False,False,True,False,False
2,0.034464,70,35,1,1,5000.56,1,1,2019,False,False,False,True,False
3,0.034464,68,35,1,1,5000.56,1,1,2019,True,False,False,False,False
4,0.034464,61,35,1,1,5000.56,1,1,2019,False,False,False,False,True


In [79]:
X = df.drop(columns=['Daily_Sales_Quantity'])
y = df['Daily_Sales_Quantity']
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

xgb_model = xgb.XGBRegressor(objective='reg:squarederror', n_estimators=100, learning_rate=0.1, max_depth=5)
xgb_model.fit(X_train, y_train)

y_pred = xgb_model.predict(X_test)

mae = mean_absolute_error(y_test, y_pred)
print(f'Mean Absolute Error: {mae}')

inventory_space = 5000
machine_types = {
    'Backhoe Loader': {'space': 15, 'price': 3000000},
    'Excavators (crawler)': {'space': 25, 'price': 5000000},
    'Loaders (Wheeled)': {'space': 20, 'price': 4000000},
    'Skid Steer Loaders': {'space': 10, 'price': 2000000},
    'Compactors': {'space': 12, 'price': 2500000},
    'Tele Handlers': {'space': 18, 'price': 3500000}
}

predicted_demand = xgb_model.predict(X) 

def optimize_inventory(demand, space_constraint):
    allocation = {}
    total_space_used = 0
    for machine, specs in sorted(machine_types.items(), key=lambda x: -x[1]['price']):
        max_units = space_constraint // specs['space']
        allocation[machine] = min(max_units, int(demand.mean()))
        total_space_used += allocation[machine] * specs['space']
        space_constraint -= allocation[machine] * specs['space']
        if space_constraint <= 0:
            break
    return allocation

optimized_inventory = optimize_inventory(predicted_demand, inventory_space)
print("Optimized Inventory Allocation:", optimized_inventory)

Mean Absolute Error: 7.984801489130324
Optimized Inventory Allocation: {'Excavators (crawler)': 200}


In [87]:
import pandas as pd
import numpy as np
import xgboost as xgb
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_absolute_error

# Load dataset
df = pd.read_csv("Cleaned_ready for ml.csv")

# Convert date to datetime format
df['Date'] = pd.to_datetime(df['Date'])

# Feature Engineering
df['Day'] = df['Date'].dt.day
df['Month'] = df['Date'].dt.month
df['Year'] = df['Date'].dt.year

# Drop unnecessary columns
drop_cols = ['Date', 'Customer_ID', 'Region']
df = df.drop(columns=[col for col in drop_cols if col in df.columns], errors='ignore')

# Handling categorical variables
df = pd.get_dummies(df, drop_first=True)

# Splitting dataset
X = df.drop(columns=['Daily_Sales_Quantity'])
y = df['Daily_Sales_Quantity']
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Train XGBoost Model
xgb_model = xgb.XGBRegressor(objective='reg:squarederror', n_estimators=100, learning_rate=0.1, max_depth=5)
xgb_model.fit(X_train, y_train)

# Predictions
y_pred = xgb_model.predict(X_test)

# Evaluation
mae = mean_absolute_error(y_test, y_pred)
print(f'Mean Absolute Error: {mae}')

# Inventory Optimization
inventory_space = 5000
machine_types = {
    'Backhoe Loader': {'space': 15, 'price': 3000000},
    'Excavators (crawler)': {'space': 25, 'price': 5000000},
    'Loaders (Wheeled)': {'space': 20, 'price': 4000000},
    'Skid Steer Loaders': {'space': 10, 'price': 2000000},
    'Compactors': {'space': 12, 'price': 2500000},
    'Tele Handlers': {'space': 18, 'price': 3500000}
}

# Get predicted demand
predicted_demand = xgb_model.predict(X)

def optimize_inventory(demand, space_constraint):
    allocation = {}
    total_space_used = 0
    for machine, specs in sorted(machine_types.items(), key=lambda x: -x[1]['price']):
        max_units = space_constraint // specs['space']
        allocation[machine] = min(max_units, int(demand.mean()))
        total_space_used += allocation[machine] * specs['space']
        space_constraint -= allocation[machine] * specs['space']
        if space_constraint <= 0:
            break
    return allocation

optimized_inventory = optimize_inventory(predicted_demand, inventory_space)
print("Optimized Inventory Allocation:", optimized_inventory)

def process_new_data(input_file):
    input_df = pd.read_csv(input_file)
    input_df['Date'] = pd.to_datetime(input_df['Date'])
    input_df['Day'] = input_df['Date'].dt.day
    input_df['Month'] = input_df['Date'].dt.month
    input_df['Year'] = input_df['Date'].dt.year
    input_df = input_df.drop(columns=[col for col in drop_cols if col in input_df.columns], errors='ignore')
    input_df = pd.get_dummies(input_df, drop_first=True)
    missing_cols = set(X_train.columns) - set(input_df.columns)
    for col in missing_cols:
        input_df[col] = 0
    input_df = input_df[X_train.columns]
    input_predictions = xgb_model.predict(input_df)
    input_df['Predicted_Sales'] = input_predictions
    input_df.to_csv("/mnt/data/predicted_sales.csv", index=False)
    new_optimized_inventory = optimize_inventory(input_predictions, inventory_space)
    print("Optimized Inventory Allocation for Input Data:", new_optimized_inventory)
process_new_data("input_data.csv")

Mean Absolute Error: 7.590083189483722
Optimized Inventory Allocation: {'Excavators (crawler)': 200}


  input_df[col] = 0
  input_df[col] = 0
  input_df[col] = 0
  input_df[col] = 0
  input_df[col] = 0
  input_df[col] = 0
  input_df[col] = 0
  input_df[col] = 0
  input_df[col] = 0
  input_df[col] = 0
  input_df[col] = 0
  input_df[col] = 0
  input_df[col] = 0
  input_df[col] = 0
  input_df[col] = 0
  input_df[col] = 0
  input_df[col] = 0
  input_df[col] = 0
  input_df[col] = 0
  input_df[col] = 0
  input_df[col] = 0
  input_df[col] = 0
  input_df[col] = 0
  input_df[col] = 0
  input_df[col] = 0
  input_df[col] = 0
  input_df[col] = 0
  input_df[col] = 0
  input_df[col] = 0
  input_df[col] = 0
  input_df[col] = 0
  input_df[col] = 0
  input_df[col] = 0
  input_df[col] = 0
  input_df[col] = 0
  input_df[col] = 0
  input_df[col] = 0
  input_df[col] = 0
  input_df[col] = 0
  input_df[col] = 0
  input_df[col] = 0
  input_df[col] = 0
  input_df[col] = 0
  input_df[col] = 0
  input_df[col] = 0
  input_df[col] = 0
  input_df[col] = 0
  input_df[col] = 0
  input_df[col] = 0
  input_df[col] = 0


OSError: Cannot save file into a non-existent directory: '\mnt\data'