In [1]:
import pandas as pd
from lightgbm import LGBMRegressor
from sklearn.model_selection import GridSearchCV, train_test_split
from sklearn.metrics import mean_squared_error


In [2]:
# Load your data
data = pd.read_csv(r'C:\Users\Ahsan\Desktop\Data_mining_Project\train_0irEZ2H.csv')  # Replace 'your_data.csv' with the actual file path

In [3]:
# Replacing null value with mean
data.fillna({'total_price':data['total_price'].mean()}, inplace=True)

In [4]:
# Convert 'week' to datetime format
data['week'] = pd.to_datetime(data['week'])

# Extract features from the 'week' column
data['day_of_week'] = data['week'].dt.dayofweek
data['month'] = data['week'].dt.month
data['year'] = data['week'].dt.year

In [5]:
# Define features (X) and target variable (y)
X = data.drop(['record_ID', 'week', 'units_sold'], axis=1)
y = data['units_sold']

In [6]:
# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [7]:
# Define the parameter grid for RandomForestRegressor
param_grid = {
    'n_estimators': [50, 100, 200],
    'max_depth': [None, 5, 10, 15],
    'min_samples_split': [2, 5, 10],
    'min_samples_leaf': [1, 2, 4],
    'max_features': ['auto', 'sqrt', 'log2']
}

In [8]:
from sklearn.ensemble import RandomForestRegressor
# Initialize the RandomForestRegressor model
rf_model = RandomForestRegressor(random_state=42)

In [None]:
# Grid search for the best parameters
grid_search = GridSearchCV(estimator=rf_model, param_grid=param_grid, scoring='neg_mean_squared_error', cv=3, n_jobs=-1, verbose=2)
grid_search.fit(X_train, y_train)


Fitting 3 folds for each of 324 candidates, totalling 972 fits


In [None]:
# Get the best parameters
best_params = grid_search.best_params_
print(f'Best Parameters: {best_params}')

In [None]:
# Train the model with the best parameters
best_model = RandomForestRegressor(**best_params, random_state=42)
best_model.fit(X_train, y_train)


In [None]:
# Make predictions on the testing set
y_pred = best_model.predict(X_test)

In [None]:
# Evaluate the model
mse = mean_squared_error(y_test, y_pred)
print(f'Mean Squared Error: {mse}')