In [1]:
import numpy as np
import pandas as pd
import gradio as gr

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LinearRegression, LogisticRegression
from sklearn.tree import DecisionTreeRegressor, DecisionTreeClassifier
from sklearn.ensemble import RandomForestRegressor, RandomForestClassifier
from sklearn.svm import SVR, SVC
from sklearn.neighbors import KNeighborsRegressor, KNeighborsClassifier
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score, accuracy_score, f1_score
import matplotlib.pyplot as plt
import seaborn as sns

In [2]:
path = "cleaned_game_data.csv"
game_dataa = pd.read_csv(path)
game_dataa.head()

Unnamed: 0,game_name,place_in_top,genres,release_date,rated_age_letter,age_restriction,avg_playtime,platforms,developers,publisher,other_games_in_series,tags,metascore_rating,place_in_that_year,place_in_rawq_by_genre,genre_of_this_place,game_link,website
0,The Legend of Zelda: Ocarina of Time,1,"Action, Adventure, RPG","Nov 23, 1998",Rated E,0,7.0,"Nintendo Switch, Nintendo 64",Nintendo,Nintendo,"The Legend of Zelda: Tears of the Kingdom, Zel...",Singleplayer,99,9,385,RPG,https://rawg.io/games/the-legend-of-zelda-ocar...,Not defined
1,SoulCalibur,2,"Action, Fighting","Sep 8, 1999",Rated T,0,6.0,"Xbox One, iOS, Xbox 360, Android, Dreamcast","BANDAI NAMCO Entertainment America, NAMCO, Pro...","Bandai Namco Entertainment, Namco","SOUL CALIBUR, SoulCalibur VI, SoulCalibur: Los...",2 players,98,41,152,Fighting,https://rawg.io/games/soulcalibur,Not defined
2,Grand Theft Auto IV,3,Action,"Apr 29, 2008",Rated M,17,10.0,"Xbox 360, PC, PlayStation 3, Xbox One",Rockstar North,"Capcom, Rockstar Games, Take Two Interactive","Grand Theft Auto VI, Grand Theft Auto V, Grand...","Singleplayer, Multiplayer, Atmospheric, Co-op,...",98,1,23,Action,https://rawg.io/games/grand-theft-auto-iv,http://www.rockstargames.com/iv
3,Super Mario Galaxy,4,Platformer,"Nov 12, 2007",Rated E,0,28.0,Wii,Nintendo,Nintendo,"Super Mario Odyssey, Super Mario Run, Super Ma...","console, planet, Gravity, NES, mario",97,35,129,Platformer,https://rawg.io/games/super-mario-galaxy,http://www.nintendo.com/sites/supermariogalaxy/
4,Super Mario Galaxy 2,5,Platformer,"May 23, 2010",Rated E,0,18.0,"Wii, Wii U",Nintendo,Nintendo,"Super Mario Odyssey, Super Mario Run, Super Ma...","collect, Solo, galaxy, light, castle, Gravity,...",97,120,181,Platformer,https://rawg.io/games/super-mario-galaxy-2,http://supermariogalaxy.com/


In [5]:
game_dataa.columns

Index(['game_name', 'place_in_top', 'genres', 'release_date',
       'rated_age_letter', 'age_restriction', 'avg_playtime', 'platforms',
       'developers', 'publisher', 'other_games_in_series', 'tags',
       'metascore_rating', 'place_in_that_year', 'place_in_rawq_by_genre',
       'genre_of_this_place', 'game_link', 'website'],
      dtype='object')

In [None]:
pip install gradio

In [7]:
print("D tree \n")

columns_to_include = ['game_name', 'metascore_rating', 'genres', 'age_restriction', 'avg_playtime', 'developers', 'publisher']
filtered_game_data = game_dataa[columns_to_include].copy()

filtered_game_data['genres'] = filtered_game_data['genres'].str.lower().str.replace(' ', '').str.split(',')
filtered_game_data['developers'] = filtered_game_data['developers'].str.lower().str.replace(' ', '').str.split(',')
filtered_game_data['publisher'] = filtered_game_data['publisher'].str.lower().str.replace(' ', '').str.split(',')

filtered_game_data = filtered_game_data.explode('genres').explode('developers').explode('publisher')
filtered_game_data = pd.get_dummies(filtered_game_data, columns=['genres', 'developers', 'publisher'], drop_first=True)

game_names = game_dataa['game_name'].unique().tolist()

filtered_game_data = filtered_game_data.drop(columns=['game_name']) 
filtered_game_data = filtered_game_data.astype(int)

X = filtered_game_data.drop(['metascore_rating'], axis=1)
y = filtered_game_data['metascore_rating']

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

dt_model = DecisionTreeRegressor()
dt_model.fit(X_train_scaled, y_train)

y_pred_dt = dt_model.predict(X_test_scaled)

print("MAE:", mean_absolute_error(y_test, y_pred_dt))
print("MSE:", mean_squared_error(y_test, y_pred_dt))
print("R^2:", r2_score(y_test, y_pred_dt))

def predict_game_metascore(game_name):
    game_data = game_dataa[game_dataa['game_name'] == game_name].copy()
    game_data = game_data.drop('metascore_rating', axis=1)

    game_data['genres'] = game_data['genres'].str.lower().str.replace(' ', '').str.split(',')
    game_data['developers'] = game_data['developers'].str.lower().str.replace(' ', '').str.split(',')
    game_data['publisher'] = game_data['publisher'].str.lower().str.replace(' ', '').str.split(',')

    game_data = game_data.explode('genres').explode('developers').explode('publisher')
    game_data = pd.get_dummies(game_data, columns=['genres', 'developers', 'publisher'], drop_first=True)

    missing_cols = set(X.columns) - set(game_data.columns)
    for col in missing_cols:
        game_data[col] = 0

    game_data = game_data[X.columns]
    game_data = game_data.astype(int)
    game_data_scaled = scaler.transform([game_data.iloc[0]])
    predicted_metascore = round(dt_model.predict(game_data_scaled)[0])
    real_metascore = game_dataa[game_dataa['game_name'] == game_name]['metascore_rating'].iloc[0]
    return f"Predicted Metascore: {predicted_metascore}", f"Real Metascore: {real_metascore}"

gr_interface = gr.Interface(
    fn=predict_game_metascore,
    inputs=gr.Dropdown(choices=game_names, label="Select a Game"),
    outputs=[gr.Textbox(label="Predicted Metascore"), gr.Textbox(label="Real Metascore")]
)

gr_interface.launch()

D tree 

MAE: 0.7769192736425746
MSE: 3.8160054461824284
R^2: 0.7004756884280199
* Running on local URL:  http://127.0.0.1:7860

To create a public link, set `share=True` in `launch()`.




In [9]:
print("Random forest \n")

columns_to_include = ['game_name', 'metascore_rating', 'genres', 'age_restriction', 'avg_playtime', 'developers', 'publisher']
filtered_game_data = game_dataa[columns_to_include].copy()

filtered_game_data['genres'] = filtered_game_data['genres'].str.lower().str.replace(' ', '').str.split(',')
filtered_game_data['developers'] = filtered_game_data['developers'].str.lower().str.replace(' ', '').str.split(',')
filtered_game_data['publisher'] = filtered_game_data['publisher'].str.lower().str.replace(' ', '').str.split(',')

filtered_game_data = filtered_game_data.explode('genres').explode('developers').explode('publisher')
filtered_game_data = pd.get_dummies(filtered_game_data, columns=['genres', 'developers', 'publisher'], drop_first=True)

game_names = game_dataa['game_name'].unique().tolist()

filtered_game_data = filtered_game_data.drop(columns=['game_name']) 
filtered_game_data = filtered_game_data.astype(int)

X = filtered_game_data.drop(['metascore_rating'], axis=1)
y = filtered_game_data['metascore_rating']

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

rf_model = RandomForestRegressor()
rf_model.fit(X_train_scaled, y_train)

y_pred_rf = rf_model.predict(X_test_scaled)

print("MAE:", mean_absolute_error(y_test, y_pred_rf))
print("MSE:", mean_squared_error(y_test, y_pred_rf))
print("R^2:", r2_score(y_test, y_pred_rf))

def predict_game_metascore(game_name):
    game_data = game_dataa[game_dataa['game_name'] == game_name].copy()
    game_data = game_data.drop('metascore_rating', axis=1)

    game_data['genres'] = game_data['genres'].str.lower().str.replace(' ', '').str.split(',')
    game_data['developers'] = game_data['developers'].str.lower().str.replace(' ', '').str.split(',')
    game_data['publisher'] = game_data['publisher'].str.lower().str.replace(' ', '').str.split(',')

    game_data = game_data.explode('genres').explode('developers').explode('publisher')
    game_data = pd.get_dummies(game_data, columns=['genres', 'developers', 'publisher'], drop_first=True)

    missing_cols = set(X.columns) - set(game_data.columns)
    for col in missing_cols:
        game_data[col] = 0

    game_data = game_data[X.columns]
    game_data = game_data.astype(int)
    game_data_scaled = scaler.transform([game_data.iloc[0]])
    predicted_metascore = round(rf_model.predict(game_data_scaled)[0])
    real_metascore = game_dataa[game_dataa['game_name'] == game_name]['metascore_rating'].iloc[0]
    return f"Predicted Metascore: {predicted_metascore}", f"Real Metascore: {real_metascore}"

gr_interface = gr.Interface(
    fn=predict_game_metascore,
    inputs=gr.Dropdown(choices=game_names, label="Select a Game"),
    outputs=[gr.Textbox(label="Predicted Metascore"), gr.Textbox(label="Real Metascore")]
)

gr_interface.launch()

Random forest 

MAE: 1.0029639047069685
MSE: 3.112124992826385
R^2: 0.7557243800752915
* Running on local URL:  http://127.0.0.1:7861

To create a public link, set `share=True` in `launch()`.




In [11]:
print("KNN \n")

columns_to_include = ['game_name', 'metascore_rating', 'genres', 'age_restriction', 'avg_playtime', 'developers', 'publisher']
filtered_game_data = game_dataa[columns_to_include].copy()

filtered_game_data['genres'] = filtered_game_data['genres'].str.lower().str.replace(' ', '').str.split(',')
filtered_game_data['developers'] = filtered_game_data['developers'].str.lower().str.replace(' ', '').str.split(',')
filtered_game_data['publisher'] = filtered_game_data['publisher'].str.lower().str.replace(' ', '').str.split(',')

filtered_game_data = filtered_game_data.explode('genres').explode('developers').explode('publisher')
filtered_game_data = pd.get_dummies(filtered_game_data, columns=['genres', 'developers', 'publisher'], drop_first=True)

game_names = game_dataa['game_name'].unique().tolist()

filtered_game_data = filtered_game_data.drop(columns=['game_name']) 
filtered_game_data = filtered_game_data.astype(int)

X = filtered_game_data.drop(['metascore_rating'], axis=1)
y = filtered_game_data['metascore_rating']

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

knn_model = KNeighborsRegressor()
knn_model.fit(X_train_scaled, y_train)

y_pred_knn = knn_model.predict(X_test_scaled)

print("MAE:", mean_absolute_error(y_test, y_pred_knn))
print("MSE:", mean_squared_error(y_test, y_pred_knn))
print("R^2:", r2_score(y_test, y_pred_knn))

def predict_game_metascore(game_name):
    game_data = game_dataa[game_dataa['game_name'] == game_name].copy()
    game_data = game_data.drop('metascore_rating', axis=1)

    game_data['genres'] = game_data['genres'].str.lower().str.replace(' ', '').str.split(',')
    game_data['developers'] = game_data['developers'].str.lower().str.replace(' ', '').str.split(',')
    game_data['publisher'] = game_data['publisher'].str.lower().str.replace(' ', '').str.split(',')

    game_data = game_data.explode('genres').explode('developers').explode('publisher')
    game_data = pd.get_dummies(game_data, columns=['genres', 'developers', 'publisher'], drop_first=True)

    missing_cols = set(X.columns) - set(game_data.columns)
    for col in missing_cols:
        game_data[col] = 0

    game_data = game_data[X.columns]
    game_data = game_data.astype(int)
    game_data_scaled = scaler.transform([game_data.iloc[0]])
    predicted_metascore = round(knn_model.predict(game_data_scaled)[0])
    real_metascore = game_dataa[game_dataa['game_name'] == game_name]['metascore_rating'].iloc[0]
    return f"Predicted Metascore: {predicted_metascore}", f"Real Metascore: {real_metascore}"

gr_interface = gr.Interface(
    fn=predict_game_metascore,
    inputs=gr.Dropdown(choices=game_names, label="Select a Game"),
    outputs=[gr.Textbox(label="Predicted Metascore"), gr.Textbox(label="Real Metascore")]
)

gr_interface.launch()

KNN 

MAE: 1.9340884573894284
MSE: 7.669018338727077
R^2: 0.39804660377565293
* Running on local URL:  http://127.0.0.1:7862

To create a public link, set `share=True` in `launch()`.




In [17]:
print("SVM \n")

columns_to_include = ['game_name', 'metascore_rating', 'genres', 'age_restriction', 'avg_playtime', 'developers', 'publisher']
filtered_game_data = game_dataa[columns_to_include].copy()

filtered_game_data['genres'] = filtered_game_data['genres'].str.lower().str.replace(' ', '').str.split(',')
filtered_game_data['developers'] = filtered_game_data['developers'].str.lower().str.replace(' ', '').str.split(',')
filtered_game_data['publisher'] = filtered_game_data['publisher'].str.lower().str.replace(' ', '').str.split(',')

filtered_game_data = filtered_game_data.explode('genres').explode('developers').explode('publisher')
filtered_game_data = pd.get_dummies(filtered_game_data, columns=['genres', 'developers', 'publisher'], drop_first=True)

game_names = game_dataa['game_name'].unique().tolist()

filtered_game_data = filtered_game_data.drop(columns=['game_name']) 
filtered_game_data = filtered_game_data.astype(int)

X = filtered_game_data.drop(['metascore_rating'], axis=1)
y = filtered_game_data['metascore_rating']

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

svm_model = SVR()
svm_model.fit(X_train_scaled, y_train)

y_pred_svm = svm_model.predict(X_test_scaled)

print("MAE:", mean_absolute_error(y_test, y_pred_svm))
print("MSE:", mean_squared_error(y_test, y_pred_svm))
print("R^2:", r2_score(y_test, y_pred_svm))

def predict_game_metascore(game_name):
    game_data = game_dataa[game_dataa['game_name'] == game_name].copy()
    game_data = game_data.drop('metascore_rating', axis=1)

    game_data['genres'] = game_data['genres'].str.lower().str.replace(' ', '').str.split(',')
    game_data['developers'] = game_data['developers'].str.lower().str.replace(' ', '').str.split(',')
    game_data['publisher'] = game_data['publisher'].str.lower().str.replace(' ', '').str.split(',')

    game_data = game_data.explode('genres').explode('developers').explode('publisher')
    game_data = pd.get_dummies(game_data, columns=['genres', 'developers', 'publisher'], drop_first=True)

    missing_cols = set(X.columns) - set(game_data.columns)
    for col in missing_cols:
        game_data[col] = 0

    game_data = game_data[X.columns]
    game_data = game_data.astype(int)
    game_data_scaled = scaler.transform([game_data.iloc[0]])
    predicted_metascore = round(svm_model.predict(game_data_scaled)[0])
    real_metascore = game_dataa[game_dataa['game_name'] == game_name]['metascore_rating'].iloc[0]
    return f"Predicted Metascore: {predicted_metascore}", f"Real Metascore: {real_metascore}"

gr_interface = gr.Interface(
    fn=predict_game_metascore,
    inputs=gr.Dropdown(choices=game_names, label="Select a Game"),
    outputs=[gr.Textbox(label="Predicted Metascore"), gr.Textbox(label="Real Metascore")]
)

gr_interface.launch()

SVM 

MAE: 1.7781391712833166
MSE: 7.801142247927413
R^2: 0.3876759888738791
* Running on local URL:  http://127.0.0.1:7865

To create a public link, set `share=True` in `launch()`.




In [15]:
print("Linear Regression\n")

columns_to_include = ['game_name', 'metascore_rating', 'place_in_top', 'genres', 'age_restriction', 'avg_playtime', 'developers', 'publisher']
filtered_game_data = game_dataa[columns_to_include].copy()

filtered_game_data['genres'] = filtered_game_data['genres'].str.lower().str.replace(' ', '').str.split(',')
filtered_game_data['developers'] = filtered_game_data['developers'].str.lower().str.replace(' ', '').str.split(',')
filtered_game_data['publisher'] = filtered_game_data['publisher'].str.lower().str.replace(' ', '').str.split(',')

filtered_game_data = filtered_game_data.explode('genres').explode('developers').explode('publisher')
filtered_game_data = pd.get_dummies(filtered_game_data, columns=['genres', 'developers', 'publisher'], drop_first=True)

game_names = game_dataa['game_name'].unique().tolist()

filtered_game_data = filtered_game_data.drop(columns=['game_name']) 
filtered_game_data = filtered_game_data.astype(int)

X = filtered_game_data.drop(['metascore_rating'], axis=1)
y = filtered_game_data['metascore_rating']

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

lr_model = LinearRegression()
lr_model.fit(X_train_scaled, y_train)

y_pred_lr = lr_model.predict(X_test_scaled)

print("MAE:", mean_absolute_error(y_test, y_pred_lr))
print("MSE:", mean_squared_error(y_test, y_pred_lr))
print("R^2:", r2_score(y_test, y_pred_lr))

def predict_game_metascore(game_name):
    game_data = game_dataa[game_dataa['game_name'] == game_name].copy()
    game_data = game_data.drop('metascore_rating', axis=1)

    game_data['genres'] = game_data['genres'].str.lower().str.replace(' ', '').str.split(',')
    game_data['developers'] = game_data['developers'].str.lower().str.replace(' ', '').str.split(',')
    game_data['publisher'] = game_data['publisher'].str.lower().str.replace(' ', '').str.split(',')

    game_data = game_data.explode('genres').explode('developers').explode('publisher')
    game_data = pd.get_dummies(game_data, columns=['genres', 'developers', 'publisher'], drop_first=True)

    missing_cols = set(X.columns) - set(game_data.columns)
    for col in missing_cols:
        game_data[col] = 0

    game_data = game_data[X.columns]
    game_data = game_data.astype(int)
    game_data_scaled = scaler.transform([game_data.iloc[0]])
    predicted_metascore = round(lr_model.predict(game_data_scaled)[0])
    real_metascore = game_dataa[game_dataa['game_name'] == game_name]['metascore_rating'].iloc[0]
    return f"Predicted Metascore: {predicted_metascore}", f"Real Metascore: {real_metascore}"

gr_interface = gr.Interface(
    fn=predict_game_metascore,
    inputs=gr.Dropdown(choices=game_names, label="Select a Game"),
    outputs=[gr.Textbox(label="Predicted Metascore"), gr.Textbox(label="Real Metascore")]
)

gr_interface.launch()

Linear Regression

MAE: 2468309528595.1187
MSE: 1.1284861059050769e+27
R^2: -8.857666184096001e+25
* Running on local URL:  http://127.0.0.1:7864

To create a public link, set `share=True` in `launch()`.


