In [31]:
import numpy as np
import pandas as pd

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler

from sklearn.linear_model import LinearRegression, Lasso, Ridge
from sklearn.neighbors import KNeighborsRegressor
from sklearn.tree import DecisionTreeRegressor
from sklearn.neural_network import MLPRegressor
from sklearn.ensemble import RandomForestRegressor,GradientBoostingRegressor
from xgboost import XGBRegressor
from lightgbm import LGBMRegressor
from catboost import CatBoostRegressor

from sklearn import metrics

import warnings
warnings.filterwarnings(action='ignore')

In [8]:
data = pd.read_csv('D:/Music/CSV/games_data.csv')
data

Unnamed: 0.1,Unnamed: 0,game,score,leaderbord,gamers,comp_perc,rating,url,min_comp_time,max_comp_time
0,0,A Boy and His Blob,638,2.02,2194,16.5,3.2,https://www.truetrophies.com/game/A-Boy-and-Hi...,15,20
1,1,A Hat in Time,1992,1.53,7062,35.9,4.2,https://www.truetrophies.com/game/A-Hat-in-Tim...,15,20
2,2,A Hero and a Garden,1364,1.01,503,97.6,5.0,https://www.truetrophies.com/game/A-Hero-and-a...,0,1
3,3,A Hero and a Garden (EU),1363,1.01,581,97.8,2.9,https://www.truetrophies.com/game/A-Hero-and-a...,0,1
4,4,A King's Tale: Final Fantasy XV,637,2.02,21914,14.1,3.3,https://www.truetrophies.com/game/A-Kings-Tale...,4,5
...,...,...,...,...,...,...,...,...,...,...
1579,1579,36 Fragments of Midnight,1367,1.06,8472,82.3,2.5,https://www.truetrophies.com/game/36-Fragments...,0,1
1580,1580,36 Fragments of Midnight (Asia),1335,1.03,2131,88.9,2.4,https://www.truetrophies.com/game/36-Fragments...,0,1
1581,1581,36 Fragments of Midnight (EU),1382,1.07,12273,79.2,2.4,https://www.truetrophies.com/game/36-Fragments...,0,1
1582,1582,428: Shibuya Scramble,1943,1.47,916,41.5,4.2,https://www.truetrophies.com/game/428-Shibuya-...,40,50


In [10]:
data.isna().sum()

Unnamed: 0       0
game             0
score            0
leaderbord       0
gamers           0
comp_perc        0
rating           0
url              0
min_comp_time    0
max_comp_time    0
dtype: int64

In [11]:
data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1584 entries, 0 to 1583
Data columns (total 10 columns):
 #   Column         Non-Null Count  Dtype  
---  ------         --------------  -----  
 0   Unnamed: 0     1584 non-null   int64  
 1   game           1584 non-null   object 
 2   score          1584 non-null   int64  
 3   leaderbord     1584 non-null   float64
 4   gamers         1584 non-null   int64  
 5   comp_perc      1584 non-null   float64
 6   rating         1584 non-null   float64
 7   url            1584 non-null   object 
 8   min_comp_time  1584 non-null   int64  
 9   max_comp_time  1584 non-null   int64  
dtypes: float64(3), int64(5), object(2)
memory usage: 123.9+ KB


In [12]:
data = data.drop(['Unnamed: 0', 'game', 'url'], axis = 1)
data

Unnamed: 0,score,leaderbord,gamers,comp_perc,rating,min_comp_time,max_comp_time
0,638,2.02,2194,16.5,3.2,15,20
1,1992,1.53,7062,35.9,4.2,15,20
2,1364,1.01,503,97.6,5.0,0,1
3,1363,1.01,581,97.8,2.9,0,1
4,637,2.02,21914,14.1,3.3,4,5
...,...,...,...,...,...,...,...
1579,1367,1.06,8472,82.3,2.5,0,1
1580,1335,1.03,2131,88.9,2.4,0,1
1581,1382,1.07,12273,79.2,2.4,0,1
1582,1943,1.47,916,41.5,4.2,40,50


In [21]:
def preprocess_inputs(data):
    
    X = data.drop('rating', axis = 1)
    Y = data['rating']
    
    X_train, X_test, Y_train, Y_test = train_test_split(X, Y, train_size = 0.8, random_state = 1)
    
    scaler = StandardScaler()
    scaler.fit(X_train)
    X_train = pd.DataFrame(scaler.transform(X_train), columns = X_train.columns, index = X_train.index)
    X_test = pd.DataFrame(scaler.transform(X_test), columns = X_test.columns, index = X_test.index)
    
    return X_train, X_test, Y_train, Y_test

In [22]:
X_train, X_test, Y_train, Y_test = preprocess_inputs(data)

In [18]:
print(X.shape, X_train.shape, X_test.shape)

(1584, 6) (1267, 6) (317, 6)


In [23]:
X_train

Unnamed: 0,score,leaderbord,gamers,comp_perc,min_comp_time,max_comp_time
142,0.379779,-0.028632,2.805802,-1.128541,0.647392,0.191844
736,0.445328,0.423457,0.440767,-0.941726,1.522378,0.741739
599,-0.153457,-0.371124,-0.400599,0.102741,-0.472589,-0.303060
632,-0.409763,-0.782114,-0.525393,1.339685,-0.577587,-0.344302
404,-0.438487,-0.754715,-0.544695,1.147209,-0.542587,-0.330555
...,...,...,...,...,...,...
715,0.975618,1.245438,-0.099609,-1.156847,0.647392,0.191844
905,0.620618,0.354959,2.041555,-1.386120,0.122401,-0.014366
1096,-0.985717,0.245361,-0.377204,-1.207796,-0.297592,-0.220576
235,0.038037,-0.124530,0.168426,-0.307686,0.122401,-0.014366


In [28]:
models ={
    "Linear Regression" : LinearRegression(),
    "Lasso" : Lasso(),
    "Ridge" : Ridge(),
    "KNeighbors" : KNeighborsRegressor(),
    "Decision Tree" : DecisionTreeRegressor(),
    "Neural Network" : MLPRegressor(),
    "Random Forest" : RandomForestRegressor(),
    "Gradient Boosting" : GradientBoostingRegressor(),
    "XGBoost" : XGBRegressor(),
    "LGBM" : LGBMRegressor(),
    "Cat Boost" : CatBoostRegressor()
}

for name, model in models.items():
    model.fit(X_train, Y_train)
    print(name + " trained!")

Linear Regression trained!
Lasso trained!
Ridge trained!
KNeighbors trained!
Decision Tree trained!
Neural Network trained!
Random Forest trained!
Gradient Boosting trained!
XGBoost trained!
LGBM trained!
Learning rate set to 0.042503
0:	learn: 0.9517322	total: 163ms	remaining: 2m 42s
1:	learn: 0.9301625	total: 164ms	remaining: 1m 22s
2:	learn: 0.9096351	total: 166ms	remaining: 55.1s
3:	learn: 0.8909098	total: 167ms	remaining: 41.6s
4:	learn: 0.8734182	total: 168ms	remaining: 33.5s
5:	learn: 0.8559385	total: 170ms	remaining: 28.1s
6:	learn: 0.8393756	total: 171ms	remaining: 24.3s
7:	learn: 0.8236243	total: 173ms	remaining: 21.4s
8:	learn: 0.8093484	total: 174ms	remaining: 19.1s
9:	learn: 0.7969200	total: 175ms	remaining: 17.4s
10:	learn: 0.7842151	total: 177ms	remaining: 15.9s
11:	learn: 0.7718614	total: 178ms	remaining: 14.7s
12:	learn: 0.7602499	total: 179ms	remaining: 13.6s
13:	learn: 0.7494831	total: 181ms	remaining: 12.7s
14:	learn: 0.7399478	total: 182ms	remaining: 12s
15:	learn:

250:	learn: 0.5042371	total: 482ms	remaining: 1.44s
251:	learn: 0.5040998	total: 484ms	remaining: 1.44s
252:	learn: 0.5035695	total: 486ms	remaining: 1.43s
253:	learn: 0.5029279	total: 487ms	remaining: 1.43s
254:	learn: 0.5026020	total: 489ms	remaining: 1.43s
255:	learn: 0.5022720	total: 491ms	remaining: 1.43s
256:	learn: 0.5021707	total: 492ms	remaining: 1.42s
257:	learn: 0.5015894	total: 494ms	remaining: 1.42s
258:	learn: 0.5012257	total: 496ms	remaining: 1.42s
259:	learn: 0.5006367	total: 498ms	remaining: 1.42s
260:	learn: 0.5003214	total: 500ms	remaining: 1.42s
261:	learn: 0.5000506	total: 502ms	remaining: 1.41s
262:	learn: 0.4996135	total: 503ms	remaining: 1.41s
263:	learn: 0.4991250	total: 504ms	remaining: 1.4s
264:	learn: 0.4986923	total: 505ms	remaining: 1.4s
265:	learn: 0.4984697	total: 506ms	remaining: 1.4s
266:	learn: 0.4979512	total: 507ms	remaining: 1.39s
267:	learn: 0.4977733	total: 509ms	remaining: 1.39s
268:	learn: 0.4971212	total: 510ms	remaining: 1.39s
269:	learn: 0.4

517:	learn: 0.4186490	total: 812ms	remaining: 756ms
518:	learn: 0.4184925	total: 814ms	remaining: 754ms
519:	learn: 0.4181916	total: 816ms	remaining: 753ms
520:	learn: 0.4177873	total: 818ms	remaining: 752ms
521:	learn: 0.4176023	total: 819ms	remaining: 750ms
522:	learn: 0.4175415	total: 821ms	remaining: 749ms
523:	learn: 0.4173613	total: 823ms	remaining: 747ms
524:	learn: 0.4170864	total: 825ms	remaining: 746ms
525:	learn: 0.4168683	total: 826ms	remaining: 745ms
526:	learn: 0.4166065	total: 828ms	remaining: 743ms
527:	learn: 0.4165111	total: 829ms	remaining: 741ms
528:	learn: 0.4160726	total: 830ms	remaining: 739ms
529:	learn: 0.4159070	total: 831ms	remaining: 737ms
530:	learn: 0.4155875	total: 832ms	remaining: 735ms
531:	learn: 0.4153786	total: 834ms	remaining: 733ms
532:	learn: 0.4152183	total: 835ms	remaining: 731ms
533:	learn: 0.4150985	total: 837ms	remaining: 730ms
534:	learn: 0.4149810	total: 838ms	remaining: 728ms
535:	learn: 0.4146758	total: 839ms	remaining: 726ms
536:	learn: 

766:	learn: 0.3689636	total: 1.14s	remaining: 346ms
767:	learn: 0.3687764	total: 1.14s	remaining: 345ms
768:	learn: 0.3685648	total: 1.14s	remaining: 343ms
769:	learn: 0.3682578	total: 1.14s	remaining: 342ms
770:	learn: 0.3681429	total: 1.15s	remaining: 340ms
771:	learn: 0.3680529	total: 1.15s	remaining: 339ms
772:	learn: 0.3677046	total: 1.15s	remaining: 338ms
773:	learn: 0.3672668	total: 1.15s	remaining: 336ms
774:	learn: 0.3671879	total: 1.15s	remaining: 335ms
775:	learn: 0.3669565	total: 1.16s	remaining: 334ms
776:	learn: 0.3668654	total: 1.16s	remaining: 332ms
777:	learn: 0.3665285	total: 1.16s	remaining: 331ms
778:	learn: 0.3663742	total: 1.16s	remaining: 329ms
779:	learn: 0.3663368	total: 1.16s	remaining: 328ms
780:	learn: 0.3663136	total: 1.16s	remaining: 326ms
781:	learn: 0.3661152	total: 1.16s	remaining: 324ms
782:	learn: 0.3660680	total: 1.16s	remaining: 323ms
783:	learn: 0.3659730	total: 1.17s	remaining: 321ms
784:	learn: 0.3658530	total: 1.17s	remaining: 320ms
785:	learn: 

Cat Boost trained!


In [58]:
for name, model in models.items():
    y_pred = model.predict(X_test)
    rmse = np.sqrt(np.mean(Y_test - y_pred)**2)
    print(name+ "RMSE: {:.4f}".format(rmse))

Linear RegressionRMSE: 0.0618
LassoRMSE: 0.0775
RidgeRMSE: 0.0618
KNeighborsRMSE: 0.0782
Decision TreeRMSE: 0.0407
Neural NetworkRMSE: 0.0628
Random ForestRMSE: 0.0674
Gradient BoostingRMSE: 0.0618
XGBoostRMSE: 0.0512
LGBMRMSE: 0.0664
Cat BoostRMSE: 0.0708


In [47]:
#base model  r2
np.sum(Y_test - Y_test.mean()** 2)

-2159.3549211356476

In [48]:
#my model r2
np.sum((Y_test - y_pred)** 2)

131.66777223610688

In [57]:
#r2 score
for name, model in models.items():
    y_pred = model.predict(X_test)
    r2 = 1 - (np.sum((Y_test - y_pred)**2) / np.sum((Y_test - Y_test.mean())**2))
    print(name+ "R2: {:.5f}".format(r2))
    

Linear RegressionR2: 0.48721
LassoR2: -0.00643
RidgeR2: 0.48756
KNeighborsR2: 0.52412
Decision TreeR2: 0.19840
Neural NetworkR2: 0.57020
Random ForestR2: 0.58133
Gradient BoostingR2: 0.57293
XGBoostR2: 0.45795
LGBMR2: 0.56275
Cat BoostR2: 0.55472
