In [1]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.ensemble import RandomForestRegressor, GradientBoostingRegressor
from sklearn.metrics import mean_squared_error, r2_score

# Load the data
data = pd.read_csv('Fashion_Retail_Sales_Clean.csv')

# Preprocessing
data['Date Purchase'] = pd.to_datetime(data['Date Purchase'])
data_encoded = pd.get_dummies(data, columns=['Item Purchased'], drop_first=True)
data_encoded['Payment Method'] = data_encoded['Payment Method'].apply(lambda x: 1 if x == 'Credit Card' else 0)
data_encoded['Year'] = data_encoded['Date Purchase'].dt.year
data_encoded['Month'] = data_encoded['Date Purchase'].dt.month
data_encoded['Day'] = data_encoded['Date Purchase'].dt.day
data_encoded = data_encoded.drop('Date Purchase', axis=1)

# Split data into features and target
X = data_encoded.drop('Review Rating', axis=1)
y = data_encoded['Review Rating']

# Splitting the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Model initialization
linear_model = LinearRegression()
rf_model = RandomForestRegressor(random_state=42)
gb_model = GradientBoostingRegressor(random_state=42)

# Training the models
linear_model.fit(X_train, y_train)
rf_model.fit(X_train, y_train)
gb_model.fit(X_train, y_train)

# Predictions
linear_preds = linear_model.predict(X_test)
rf_preds = rf_model.predict(X_test)
gb_preds = gb_model.predict(X_test)

# Evaluation
linear_rmse = mean_squared_error(y_test, linear_preds, squared=False)
rf_rmse = mean_squared_error(y_test, rf_preds, squared=False)
gb_rmse = mean_squared_error(y_test, gb_preds, squared=False)

linear_r2 = r2_score(y_test, linear_preds)
rf_r2 = r2_score(y_test, rf_preds)
gb_r2 = r2_score(y_test, gb_preds)

print("Linear Regression RMSE:", linear_rmse)
print("Linear Regression R^2:", linear_r2)
print("Random Forest RMSE:", rf_rmse)
print("Random Forest R^2:", rf_r2)
print("Gradient Boosting RMSE:", gb_rmse)
print("Gradient Boosting R^2:", gb_r2)




Linear Regression RMSE: 1.1246811707457667
Linear Regression R^2: -0.012156445499727164
Random Forest RMSE: 1.1520881727196874
Random Forest R^2: -0.06208734185913678
Gradient Boosting RMSE: 1.1224698943190292
Gradient Boosting R^2: -0.008180283215206474
