In [26]:
import pandas as pd
import numpy as np
import requests
import json
import os
import time
import ast
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import r2_score, mean_squared_error, root_mean_squared_error, mean_absolute_error, mean_absolute_percentage_error
from sklearn.pipeline import Pipeline
from sklearn.impute import SimpleImputer
from sklearn.ensemble import GradientBoostingClassifier
from sklearn.preprocessing import StandardScaler
from sklearn.preprocessing import OneHotEncoder
import joblib

In [2]:
data = pd.read_csv('../data/rawg_cleaned_games_no_dev_data.csv')

In [3]:
data.head()

Unnamed: 0.2,Unnamed: 0.1,Unnamed: 0,id,slug,name,released,tba,rating,rating_top,ratings_count,...,PC_platform,PlayStation 5_platform,Xbox Series S/X_platform,PlayStation 4_platform,Xbox One_platform,macOS_platform,Nintendo Switch_platform,Linux_platform,Android_platform,iOS_platform
0,0,0,3498,grand-theft-auto-v,Grand Theft Auto V,2013-09-17,False,4.47,5,7118,...,1,1,1,1,1,0,0,0,0,0
1,1,1,3328,the-witcher-3-wild-hunt,The Witcher 3: Wild Hunt,2015-05-18,False,4.64,5,6913,...,1,1,1,1,1,1,1,0,0,0
2,2,2,4200,portal-2,Portal 2,2011-04-18,False,4.59,5,5908,...,1,0,0,0,1,1,0,1,0,0
3,3,3,4291,counter-strike-global-offensive,Counter-Strike: Global Offensive,2012-08-21,False,3.57,4,3574,...,1,0,0,0,0,0,0,1,0,0
4,4,4,5286,tomb-raider,Tomb Raider (2013),2013-03-05,False,4.06,4,4007,...,1,0,0,1,1,1,0,0,0,0


In [4]:
data = data.drop(columns=['parent_platforms', 'user_game', 'updated', 'suggestions_count', 'community_rating', 'tba','Unnamed: 0.1', 'Unnamed: 0'])

In [5]:
data.columns

Index(['id', 'slug', 'name', 'released', 'rating', 'rating_top',
       'ratings_count', 'reviews_text_count', 'added', 'metacritic',
       ...
       'PC_platform', 'PlayStation 5_platform', 'Xbox Series S/X_platform',
       'PlayStation 4_platform', 'Xbox One_platform', 'macOS_platform',
       'Nintendo Switch_platform', 'Linux_platform', 'Android_platform',
       'iOS_platform'],
      dtype='object', length=109)

In [6]:
data = data[(data['owned'] > 150)]

In [7]:
def remove_string(list_str):
        extracted_list = ast.literal_eval(list_str)
        return extracted_list

## Creating Target

In [9]:
conditions = [
    (data['rating_exceptional'] >= 75),
    (data['rating_exceptional'] < 75) & (data['rating_exceptional'] >= 50),
    (data['rating_exceptional'] < 50) & (data['rating_exceptional'] >= 25),
    (data['rating_exceptional'] < 25)
]
choices = [3,2,1,0]
data['target'] = np.select(conditions, choices, default='NaN')

In [10]:
data

Unnamed: 0,id,slug,name,released,rating,rating_top,ratings_count,reviews_text_count,added,metacritic,...,PlayStation 5_platform,Xbox Series S/X_platform,PlayStation 4_platform,Xbox One_platform,macOS_platform,Nintendo Switch_platform,Linux_platform,Android_platform,iOS_platform,target
0,3498,grand-theft-auto-v,Grand Theft Auto V,2013-09-17,4.47,5,7118,65,22059,92.0,...,1,1,1,1,0,0,0,0,0,2
1,3328,the-witcher-3-wild-hunt,The Witcher 3: Wild Hunt,2015-05-18,4.64,5,6913,79,21702,92.0,...,1,1,1,1,1,1,0,0,0,3
2,4200,portal-2,Portal 2,2011-04-18,4.59,5,5908,38,20576,95.0,...,0,0,0,1,1,0,1,0,0,2
3,4291,counter-strike-global-offensive,Counter-Strike: Global Offensive,2012-08-21,3.57,4,3574,29,18267,81.0,...,0,0,0,0,0,0,1,0,0,0
4,5286,tomb-raider,Tomb Raider (2013),2013-03-05,4.06,4,4007,14,17600,86.0,...,0,0,1,1,1,0,0,0,0,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
10895,375233,cartoon-network-journeys-vr,Cartoon Network Journeys VR,,0.00,0,1,0,153,,...,0,0,0,0,0,0,0,0,0,
10916,18819,driftwood-the-visual-novel,Driftwood The Visual Novel,2016-08-08,0.00,0,1,0,153,,...,0,0,0,0,1,0,1,0,0,
10921,14152,prism-collider,Prism Collider,2017-01-31,0.00,0,1,0,153,,...,0,0,0,0,0,0,0,0,0,
10943,49986,mytd-wo-de-ta-fang,MyTD 我的塔防,,0.00,0,1,0,152,,...,0,0,0,0,0,0,0,0,0,


In [11]:
data.iloc[:, 12:-1]

Unnamed: 0,esrb_rating,rating_exceptional,rating_recommended,rating_meh,rating_skip,owned,beaten,dropped,playing,Action_genre,...,PC_platform,PlayStation 5_platform,Xbox Series S/X_platform,PlayStation 4_platform,Xbox One_platform,macOS_platform,Nintendo Switch_platform,Linux_platform,Android_platform,iOS_platform
0,Mature,59.00,32.70,6.36,1.94,12714,6254.0,1154.0,753.0,1,...,1,1,1,1,1,0,0,0,0,0
1,Mature,76.74,16.34,4.23,2.69,12714,5091.0,995.0,893.0,1,...,1,1,1,1,1,1,1,0,0,0
2,Everyone 10+,69.33,25.16,3.02,2.50,13016,5716.0,627.0,161.0,0,...,1,0,0,0,1,1,0,1,0,0
3,Mature,16.25,46.57,25.94,11.24,14084,1109.0,2097.0,631.0,0,...,1,0,0,0,0,0,0,1,0,0
4,Mature,25.80,60.44,10.79,2.97,11545,4402.0,558.0,116.0,1,...,1,0,0,1,1,1,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
10895,,,100.00,,,151,1.0,,,1,...,1,0,0,0,0,0,0,0,0,0
10916,,,,100.00,,151,,1.0,,0,...,1,0,0,0,0,1,0,1,0,0
10921,,,,,100.00,153,,,,1,...,1,0,0,0,0,0,0,0,0,0
10943,,,,100.00,,152,,,,0,...,1,0,0,0,0,0,0,0,0,0


In [12]:
data.iloc[:, 12:-1].drop(columns=['rating_exceptional', 'rating_recommended', 'rating_meh', 'rating_skip','owned', 'beaten', 'dropped', 'playing'])

Unnamed: 0,esrb_rating,Action_genre,RPG_genre,Shooter_genre,Puzzle_genre,Adventure_genre,Indie_genre,Platformer_genre,Massively Multiplayer_genre,Sports_genre,...,PC_platform,PlayStation 5_platform,Xbox Series S/X_platform,PlayStation 4_platform,Xbox One_platform,macOS_platform,Nintendo Switch_platform,Linux_platform,Android_platform,iOS_platform
0,Mature,1,0,0,0,0,0,0,0,0,...,1,1,1,1,1,0,0,0,0,0
1,Mature,1,1,0,0,0,0,0,0,0,...,1,1,1,1,1,1,1,0,0,0
2,Everyone 10+,0,0,1,1,0,0,0,0,0,...,1,0,0,0,1,1,0,1,0,0
3,Mature,0,0,1,0,0,0,0,0,0,...,1,0,0,0,0,0,0,1,0,0
4,Mature,1,0,0,0,0,0,0,0,0,...,1,0,0,1,1,1,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
10895,,1,0,0,0,1,0,0,0,0,...,1,0,0,0,0,0,0,0,0,0
10916,,0,0,0,0,0,1,0,0,0,...,1,0,0,0,0,1,0,1,0,0
10921,,1,0,0,0,0,1,0,0,0,...,1,0,0,0,0,0,0,0,0,0
10943,,0,0,0,0,0,1,0,0,0,...,1,0,0,0,0,0,0,0,0,0


In [14]:
X = data.iloc[:, 12:-1].drop(columns=['rating_exceptional', 'rating_recommended', 'rating_meh', 'rating_skip','owned', 'beaten', 'dropped', 'playing'])
y = data['target']

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=123)

In [24]:
imputer = SimpleImputer(strategy='median')
classifier = GradientBoostingClassifier(n_estimators=100, learning_rate=1.0, max_depth=1, random_state=123)
encoder = OneHotEncoder()

pipe = Pipeline([
    ('encoder', encoder),
    ('imputer', imputer),
    ('xgboost', classifier)
]).fit(X_train, y_train)

In [30]:
joblib.dump(pipe, '../models/xgboostclassifier_v1.joblib')

['../models/xgboostclassifier_v1.joblib']