In [77]:
import pandas as pd
import numpy as np
import pickle
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import r2_score, mean_absolute_error, mean_squared_error


<div class="alert alert-block alert-info">
    5. <b>Train - Test Splitting</b>
</div>

In [78]:
import pandas as pd
df = pd.read_csv('processed_data.csv')
df.info()
df.columns

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 5000 entries, 0 to 4999
Data columns (total 10 columns):
 #   Column           Non-Null Count  Dtype
---  ------           --------------  -----
 0   brand            5000 non-null   int64
 1   processor_brand  5000 non-null   int64
 2   processor_name   5000 non-null   int64
 3   proc_gn          5000 non-null   int64
 4   ram              5000 non-null   int64
 5   ram_type         5000 non-null   int64
 6   ssd              5000 non-null   int64
 7   hdd              5000 non-null   int64
 8   graphic_card_gb  5000 non-null   int64
 9   Price            5000 non-null   int64
dtypes: int64(10)
memory usage: 390.8 KB


Index(['brand', 'processor_brand', 'processor_name', 'proc_gn', 'ram',
       'ram_type', 'ssd', 'hdd', 'graphic_card_gb', 'Price'],
      dtype='object')

In [79]:
X = df.drop('Price', axis=1)   
y = df['Price']               

In [80]:
train_X, test_X, train_y, test_y = train_test_split(X, y, test_size=0.2, shuffle=False)


In [81]:
print(train_X.shape, train_y.shape)
print(test_X.shape, test_y.shape)

(4000, 9) (4000,)
(1000, 9) (1000,)


<div class="alert alert-block alert-info">
    6. <b>Applying Classifier / Model Training</b>
</div>

In [82]:
model_rf = RandomForestRegressor(n_estimators=300, max_depth=10, random_state=42)
model_rf.fit(train_X, train_y)
print(model_rf)

RandomForestRegressor(max_depth=10, n_estimators=300, random_state=42)


In [83]:
predictions = model_rf.predict(test_X)
model_pred_rf = predictions
print(predictions)

[184712.65195681 163925.79504651 115293.22562447 120991.80028239
 174394.26643132 110071.67460212 145239.53222919 117130.92802294
 146248.13701522 191574.54241667 131529.7888521   89859.11481174
 148726.19595412  93018.29359622  82368.25221709 133597.94586137
  95697.55155068 153075.63426471 122804.16641691 100607.40213112
 222286.59870311  92900.20806108 146192.66030207  95132.61374047
 248952.98057407 125577.49951385 132157.15561509 114458.82804145
 138097.91705759 160384.42644556 108695.29776885 150718.17875196
  92852.47489331 143776.73749938 109540.75146771 107759.12463897
  91817.9879271  186837.42371812 145173.19421272 108942.23987573
 110041.0855893  183263.3420686  136785.30774421 192826.97175126
 137981.48275888 146677.84657423 244580.16606554 131170.17295736
 170223.22946111 123782.31828021 165289.36465285 185102.88150196
 165864.77790116 104315.92253886  77062.86990144 209872.83617067
 145231.59032102 125098.57273361  81289.10211332 135397.46398275
 190422.91400336 115012.0

In [84]:
r2 = r2_score(test_y, predictions)
mae = mean_absolute_error(test_y, predictions)
mse = mean_squared_error(test_y, predictions)
rmse = np.sqrt(mse)

print(f"R² Score: {r2:.3f}")
print(f"MAE: {mae:.2f}")
print(f"MSE: {mse:.2f}")
print(f"RMSE: {rmse:.2f}")

R² Score: 0.970
MAE: 3299.16
MSE: 48445688.25
RMSE: 6960.29


In [85]:
accuracy = r2 * 100
print(f"Accuracy: {accuracy:.2f}%")

Accuracy: 97.04%


In [86]:
pickle.dump(model_rf, open('model_rf.pkl', 'wb'))

In [87]:
model_rf = pickle.load(open('model_rf.pkl', 'rb'))