In [None]:
import pandas as pd
from sklearn.preprocessing import OneHotEncoder

train = pd.read_csv("dataset.csv")
train['Compartments'] = train['Compartments'].fillna(train['Compartments'].median())
features = ['Brand', 'Material', 'Size', 'Laptop Compartment', 'Waterproof', 'Style', 'Color']

encoder = OneHotEncoder(handle_unknown='ignore', sparse_output=False)
encoded_category = encoder.fit_transform(train[features])
encoded_df = pd.DataFrame(encoded_category, columns=encoder.get_feature_names_out(features))

train = train.drop(columns=features).reset_index(drop=True)
train = pd.concat([train, encoded_df], axis=1)
train.head()


Unnamed: 0,Compartments,Weight Capacity (kg),Price,Brand_Adidas,Brand_Jansport,Brand_Nike,Brand_Puma,Brand_Under Armour,Brand_nan,Material_Canvas,...,Style_Messenger,Style_Tote,Style_nan,Color_Black,Color_Blue,Color_Gray,Color_Green,Color_Pink,Color_Red,Color_nan
0,2.0,13.340058,143.445135,0.0,1.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0
1,4.0,5.91803,72.086319,0.0,0.0,0.0,0.0,1.0,0.0,0.0,...,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0
2,5.0,24.088386,29.699631,0.0,0.0,1.0,0.0,0.0,0.0,0.0,...,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0
3,1.0,5.0,27.18199,0.0,0.0,1.0,0.0,0.0,0.0,0.0,...,1.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0
4,8.0,11.258172,71.953236,0.0,0.0,0.0,0.0,1.0,0.0,0.0,...,0.0,0.0,1.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0


In [7]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_squared_error, mean_absolute_error


train = train.dropna(subset=['Price'])

X = train.drop(columns=['Price'])
y = train['Price']

X_encoded = pd.get_dummies(X, drop_first=True)

X_train, X_test, y_train, y_test = train_test_split(X_encoded, y, test_size=0.2, random_state=42)

model = RandomForestRegressor(n_estimators=100, random_state=42)
model.fit(X_train, y_train)

y_pred = model.predict(X_test)

mse = mean_squared_error(y_test, y_pred)
mae = mean_absolute_error(y_test, y_pred)

print(f"Mean Squared Error: {mse}")
print(f"Mean Absolute Error: {mae}")


Mean Squared Error: 1594.5033143876117
Mean Absolute Error: 34.25700570938816


In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestRegressor
from sklearn.preprocessing import OneHotEncoder
from sklearn.metrics import mean_squared_error, mean_absolute_error
from xgboost import XGBRegressor

train = pd.read_csv("dataset.csv")

train['Compartments'] = train['Compartments'].fillna(train['Compartments'].median())

train['Total_Compartments'] = train['Compartments'] + train['Laptop Compartment'].map({'Yes': 1, 'No': 0})

categorical_features = ['Brand', 'Material', 'Size', 'Laptop Compartment', 'Waterproof', 'Style', 'Color']

encoder = OneHotEncoder(handle_unknown='ignore', sparse_output=False)
encoded_category = encoder.fit_transform(train[categorical_features])
encoded_df = pd.DataFrame(encoded_category, columns=encoder.get_feature_names_out(categorical_features))

train = train.drop(columns=categorical_features).reset_index(drop=True)
train = pd.concat([train, encoded_df], axis=1)

train = train.dropna(subset=['Price'])

X = train.drop(columns=['Price'])
y = train['Price']

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

model = XGBRegressor(n_estimators=500, learning_rate=0.05, max_depth=5, random_state=42)
model.fit(X_train, y_train)

y_pred = model.predict(X_test)

mse = mean_squared_error(y_test, y_pred)
mae = mean_absolute_error(y_test, y_pred)

print(f"Prediction: {y_pred[:50]}")

print(f"Mean Squared Error: {mse}")
print(f"Mean Absolute Error: {mae}")

# feature_importances = pd.Series(model.feature_importances_, index=X_train.columns)
# feature_importances.nlargest(10).plot(kind='barh')
# plt.title("Top 10 Feature Importances")
# plt.show()


Prediction: [73.96882  78.78145  83.007385 83.53759  88.74695  74.53537  87.04291
 88.052536 66.94466  78.603455 79.02738  85.2419   85.2352   78.70987
 81.89919  95.88264  79.350006 69.07209  82.74044  83.09205  80.608635
 84.01324  83.676735 79.836365 78.0928   76.281204 64.59686  80.55926
 83.546364 82.939415 80.40112  84.30337  95.338806 70.96474  81.09933
 80.19031  89.70524  79.36484  77.47422  83.16909  81.055115 75.914314
 80.65211  76.702835 83.10643  89.873604 77.19324  59.14771  77.48837
 85.7964  ]
Mean Squared Error: 1568.0025634438039
Mean Absolute Error: 34.18342997882912
