In [1]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_squared_error
from sklearn.model_selection import GridSearchCV

In [2]:
df = pd.read_csv('/Users/kumarmayank/Downloads/Dataset_modified_pom.csv')

In [3]:
df = df.drop(['Product Availibility index','City'],axis=1)

In [4]:
# Assuming df is your DataFrame with 'Date', 'MRP', 'SP', and 'MSP' columns

# Convert 'Date' to datetime
df['Date'] = pd.to_datetime(df['Date'])

# Extract date-related features
df['Year'] = df['Date'].dt.year
df['Month'] = df['Date'].dt.month
df['Day'] = df['Date'].dt.day

  df['Date'] = pd.to_datetime(df['Date'])


In [5]:
# Features (X) and target variables (y)
X = df[['Year', 'Month', 'Day']]
y_mrp = df['MRP']
y_sp = df['SP']
y_msp = df['MSP']

In [6]:
# Split the data into training and testing sets
X_train, X_test, y_mrp_train, y_mrp_test, y_sp_train, y_sp_test, y_msp_train, y_msp_test = train_test_split(
    X, y_mrp, y_sp, y_msp, test_size=0.2, random_state=42
)

In [7]:
# Define the parameter grid to search
param_grid = {
    'n_estimators': [50, 100, 150],
    'max_depth': [None, 10, 20, 30],
    'min_samples_split': [2, 5, 10],
    'min_samples_leaf': [1, 2, 4]
}

In [8]:
# Create Random Forest regressors for each target variable
rf_regressor_mrp = RandomForestRegressor(random_state=42)
rf_regressor_sp = RandomForestRegressor(random_state=42)
rf_regressor_msp = RandomForestRegressor(random_state=42)

In [9]:
# Perform GridSearchCV for each target variable
grid_search_mrp = GridSearchCV(rf_regressor_mrp, param_grid, cv=5, scoring='neg_mean_squared_error')
grid_search_sp = GridSearchCV(rf_regressor_sp, param_grid, cv=5, scoring='neg_mean_squared_error')
grid_search_msp = GridSearchCV(rf_regressor_msp, param_grid, cv=5, scoring='neg_mean_squared_error')

In [10]:
# Fit the models
grid_search_mrp.fit(X_train, y_mrp_train)
grid_search_sp.fit(X_train, y_sp_train)
grid_search_msp.fit(X_train, y_msp_train)

In [11]:
# Get the best models
best_rf_mrp = grid_search_mrp.best_estimator_
best_rf_sp = grid_search_sp.best_estimator_
best_rf_msp = grid_search_msp.best_estimator_

In [12]:
# Make predictions on the test set
y_mrp_pred = best_rf_mrp.predict(X_test)
y_sp_pred = best_rf_sp.predict(X_test)
y_msp_pred = best_rf_msp.predict(X_test)

In [13]:
# Evaluate the models (using Mean Squared Error in this example)
mse_mrp = mean_squared_error(y_mrp_test, y_mrp_pred)
mse_sp = mean_squared_error(y_sp_test, y_sp_pred)
mse_msp = mean_squared_error(y_msp_test, y_msp_pred)

In [14]:
print(f"Best hyperparameters for MRP: {grid_search_mrp.best_params_}")
print(f"Mean Squared Error for MRP: {mse_mrp}")

print(f"Best hyperparameters for SP: {grid_search_sp.best_params_}")
print(f"Mean Squared Error for SP: {mse_sp}")

print(f"Best hyperparameters for MSP: {grid_search_msp.best_params_}")
print(f"Mean Squared Error for MSP: {mse_msp}")

Best hyperparameters for MRP: {'max_depth': 20, 'min_samples_leaf': 1, 'min_samples_split': 2, 'n_estimators': 50}
Mean Squared Error for MRP: 109.41549803469358
Best hyperparameters for SP: {'max_depth': None, 'min_samples_leaf': 1, 'min_samples_split': 2, 'n_estimators': 100}
Mean Squared Error for SP: 115.7008401789225
Best hyperparameters for MSP: {'max_depth': 20, 'min_samples_leaf': 1, 'min_samples_split': 2, 'n_estimators': 150}
Mean Squared Error for MSP: 101.7768453047532
