### Linear Regression 

In [114]:
import pandas as pd
from sklearn.linear_model import LinearRegression 


In [115]:
train = pd.read_csv("BMPO_train.csv")
test=pd.read_csv("BMPO_test.csv")

In [116]:
# separate  target column from the feature columns:
xtrain = train.drop(['Item_Outlet_Sales','Item_Identifier','Outlet_Identifier','Item_Type','Outlet_Type','Item_MRP_qcut'], axis=1)
ytrain = train['Item_Outlet_Sales'] 
xtest = test.drop(['Item_Identifier','Outlet_Identifier','Item_Type','Outlet_Type','Item_MRP_qcut'], axis=1)

print(xtrain.shape)
print(ytrain.shape)
print(xtest.shape)

(6113, 10)
(6113,)
(4705, 10)


In [117]:
# Fit model on the training data
model = LinearRegression().fit(xtrain, ytrain)

In [118]:
#Predict on the testing data
y_pred = model.predict(xtest)

In [119]:
#coefficient of determination
r_sq = model.score(xtrain, ytrain)

print(f"coefficient of determination: {r_sq}")
print(f"intercept: {model.intercept_}")
print(f"coefficients: {model.coef_}")

coefficient of determination: 0.46919485779566616
intercept: -127.63500741811448
coefficients: [-0.00980709  0.01249349 -0.06237951  0.58928394  0.06434409 -0.30128912
  0.33182579  0.0226325  -0.73432709 -2.210074  ]


.

### Regularized Linear Regression

In [120]:
from sklearn.linear_model import Lasso
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error

In [121]:
X = train.drop(['Item_Outlet_Sales','Item_Identifier','Outlet_Identifier','Item_Type','Outlet_Type','Item_MRP_qcut'], axis=1)
y = train['Item_Outlet_Sales']

# Split into train and validation sets
X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=42)


In [122]:
# Fit Lasso regression
lasso = Lasso(alpha=0.1)
lasso.fit(X_train, y_train)

# Evaluate on validation set
y_pred = lasso.predict(X_val)
mse = mean_squared_error(y_val, y_pred)
r2 = lasso.score(X_val, y_val)

print("MSE:", mse)
print("R2:", r2)

MSE: 0.5999212204836335
R2: 0.38494022711091747


.

### Random Forest 

In [123]:
from sklearn.ensemble import RandomForestRegressor

In [124]:
X = train.drop(['Item_Outlet_Sales','Item_Identifier','Outlet_Identifier','Item_Type','Outlet_Type','Item_MRP_qcut'], axis=1)
y = train['Item_Outlet_Sales']
X_test = test.drop(['Item_Identifier','Outlet_Identifier','Item_Type','Outlet_Type','Item_MRP_qcut'], axis=1)

# Split into train and validation sets
X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=42)


In [125]:
rf = RandomForestRegressor(n_estimators=100, random_state=42)

# Train the random forest model 
rf.fit(X_train, y_train)

# Make predictions on the validation set 
y_pred = rf.predict(X_val)


In [126]:
# Evaluate the model using metrics such as mean squared error, R-squared, and feature importance
mse = mean_squared_error(y_val, y_pred)
r2 = r2_score(y_val, y_pred)
print('Mean Squared Error:', mse)
print('R-squared:', r2)

Mean Squared Error: 0.42380756461164737
R-squared: 0.565497976169979


In [127]:
# make predictions on the test dataset
y_pred_test = rf.predict(X_test)

.

### XGBoost

In [128]:
import xgboost as xgb
from xgboost import XGBRegressor


In [129]:
X = train.drop(['Item_Outlet_Sales','Item_Identifier','Outlet_Identifier','Item_Type','Outlet_Type','Item_MRP_qcut'], axis=1)
y = train['Item_Outlet_Sales']
X_test = test.drop(['Item_Identifier','Outlet_Identifier','Item_Type','Outlet_Type','Item_MRP_qcut'], axis=1)

# Split into train and validation sets
X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=42)


In [131]:
model = XGBRegressor(n_estimators=100, learning_rate=0.05, max_depth=5, objective="reg:squarederror", random_state=42)

# Train the model on the training set
model.fit(X_train, y_train)

# Make predictions on the validation set
y_pred = model.predict(X_val)

In [132]:
# Evaluate the model's performance using evaluation metrics such as mean squared error or R-squared
mse = mean_squared_error(y_val, y_pred)
r2 = r2_score(y_val, y_pred)
print("MSE:", mse)
print("R-squared:", r2)

MSE: 0.3871243818861729
R-squared: 0.6031068309089231
