In [1]:
import pandas as pd
import numpy as np 
from sklearn.model_selection import train_test_split
from xgboost import XGBRegressor
from sklearn.metrics import mean_absolute_error,root_mean_squared_error,r2_score

In [2]:
df = pd.read_csv('encoded_data',index_col=0)
df.head()

Unnamed: 0,ppi,is_touchscreen,ram,opsys,hdd,ssd,clock_speed,cpu_mean,dual_memory,inches,weight,hybrid,typename,flashstorage,gpu_mean,price_log
0,226.98,0,8.0,0.0,0,128,2.3,11.301072,0,13.3,1.37,0,0.0,0,11.463797,11.175755
1,127.68,0,8.0,0.0,0,0,1.8,11.030615,0,13.3,1.34,0,0.0,128,10.932775,10.776777
2,141.21,0,8.0,1.0,0,256,2.5,10.728625,0,15.6,1.86,0,1.0,0,10.924892,10.329931
3,220.53,0,16.0,0.0,0,512,2.7,10.826395,0,15.4,1.83,0,0.0,0,10.826395,11.814476
4,226.98,0,8.0,0.0,0,256,3.1,11.596266,0,13.3,1.37,0,0.0,0,11.596266,11.473101


In [3]:
X = df.drop(columns = 'price_log')
y = df['price_log']
X_train,X_test,y_train,y_test = train_test_split(X,y,test_size=0.2,random_state=42)

In [4]:
X.info()

<class 'pandas.core.frame.DataFrame'>
Index: 1244 entries, 0 to 1243
Data columns (total 15 columns):
 #   Column          Non-Null Count  Dtype  
---  ------          --------------  -----  
 0   ppi             1244 non-null   float64
 1   is_touchscreen  1244 non-null   int64  
 2   ram             1244 non-null   float64
 3   opsys           1244 non-null   float64
 4   hdd             1244 non-null   int64  
 5   ssd             1244 non-null   int64  
 6   clock_speed     1244 non-null   float64
 7   cpu_mean        1244 non-null   float64
 8   dual_memory     1244 non-null   int64  
 9   inches          1244 non-null   float64
 10  weight          1244 non-null   float64
 11  hybrid          1244 non-null   int64  
 12  typename        1244 non-null   float64
 13  flashstorage    1244 non-null   int64  
 14  gpu_mean        1244 non-null   float64
dtypes: float64(9), int64(6)
memory usage: 155.5 KB


In [5]:
model1 = XGBRegressor()
model1.load_model('xgb_regressor_model.json')

In [6]:
y_train_pred = model1.predict(X_train) 
y_test_pred = model1.predict(X_test)
# Calculate Metrics
train_mae = mean_absolute_error(y_train, y_train_pred)
test_mae = mean_absolute_error(y_test, y_test_pred)
train_rmse = root_mean_squared_error(y_train,y_train_pred)
test_rmse = root_mean_squared_error(y_test, y_test_pred)
train_r2 = r2_score(y_train, y_train_pred)
test_r2 = r2_score(y_test, y_test_pred)

print(f"Train MAE: {train_mae:.2f}, Test MAE: {test_mae:.2f}")
print(f"Train RMSE: {train_rmse:.2f}, Test RMSE: {test_rmse:.2f}")

print(f"Train R²: {train_r2:.2f}, Test R²: {test_r2:.2f}")

Train MAE: 0.07, Test MAE: 0.15
Train RMSE: 0.09, Test RMSE: 0.20
Train R²: 0.98, Test R²: 0.88


### SAMPLE PREDICTIONS

In [7]:
def predict_results(input_X,model):
    y_pred = model.predict(input_X)
    y_max,y_min = np.exp(y_pred-test_rmse),np.exp(y_pred+test_rmse)
    y_predicted = np.exp(y_pred)
    return y_pred,y_pred+test_rmse,y_pred-test_rmse,y_predicted,(y_max,y_min)

In [8]:
input_X = np.array(X_test.iloc[1,:].values).reshape(1,-1)
input_X

array([[141.21      ,   0.        ,  16.        ,   2.        ,
          0.        , 512.        ,   2.6       ,  11.35365564,
          0.        ,  15.6       ,   3.31      ,   0.        ,
          2.        ,   0.        ,  11.06229896]])

In [9]:
predict_results(input_X=input_X,model=model1)

(array([11.337991], dtype=float32),
 array([11.540751], dtype=float32),
 array([11.13523], dtype=float32),
 array([83951.18], dtype=float32),
 (array([68543.92], dtype=float32), array([102821.67], dtype=float32)))

In [10]:
print(y_test.iloc[1])
np.exp(y_test.iloc[1])

11.149519345988928


69530.40000000002