In [1]:
import warnings
warnings.filterwarnings('ignore')

In [2]:
import numpy as np
import pandas as pd
from pathlib import Path
from collections import Counter
import dataframe_image as dfi

In [3]:
from sklearn.metrics import balanced_accuracy_score
from sklearn.metrics import confusion_matrix
from imblearn.metrics import classification_report_imbalanced

In [4]:
# Load the game_clean_df.csv dataset.
game_df = pd.read_csv("../Resources/game_clean_df.csv")
game_df.head()

Unnamed: 0.1,Unnamed: 0,Name,Platform,Year_of_Release,Genre,Publisher,Global_Sales,Critic_Score,Developer,Rating
0,42,Grand Theft Auto V,PS4,2014,Action,Take-Two Interactive,12610000,97,Rockstar North,M
1,62,New Super Mario Bros. 2,3DS,2012,Platform,Nintendo,9900000,78,Nintendo,E
2,73,Animal Crossing: New Leaf,3DS,2012,Simulation,Nintendo,9160000,88,Nintendo,E
3,77,FIFA 16,PS4,2015,Sports,Electronic Arts,8570000,82,EA Sports,E
4,92,Call of Duty: Advanced Warfare,PS4,2014,Shooter,Activision,7660000,83,Sledgehammer Games,M


In [5]:
# Drop the Unnamed: 0 column.
game_df = game_df.drop(columns=["Unnamed: 0"])

print(game_df.shape)
game_df.head(10)

(867, 9)


Unnamed: 0,Name,Platform,Year_of_Release,Genre,Publisher,Global_Sales,Critic_Score,Developer,Rating
0,Grand Theft Auto V,PS4,2014,Action,Take-Two Interactive,12610000,97,Rockstar North,M
1,New Super Mario Bros. 2,3DS,2012,Platform,Nintendo,9900000,78,Nintendo,E
2,Animal Crossing: New Leaf,3DS,2012,Simulation,Nintendo,9160000,88,Nintendo,E
3,FIFA 16,PS4,2015,Sports,Electronic Arts,8570000,82,EA Sports,E
4,Call of Duty: Advanced Warfare,PS4,2014,Shooter,Activision,7660000,83,Sledgehammer Games,M
5,FIFA 17,PS4,2016,Sports,Electronic Arts,7590000,85,"EA Sports, EA Vancouver",E
6,Fallout 4,PS4,2015,Role-Playing,Bethesda Softworks,7160000,87,Bethesda Game Studios,M
7,Mario Kart 8,WiiU,2014,Racing,Nintendo,7090000,88,Nintendo,E
8,FIFA 15,PS4,2014,Sports,Electronic Arts,6080000,82,EA Sports,E
9,Destiny,PS4,2014,Shooter,Activision,5640000,76,"Bungie Software, Bungie",T


# Balanced Random Forest Classifier

### Balanced Random Forest Classifier By Platform

In [6]:
target = ["Platform"]

# Create our features
X = pd.get_dummies(game_df.drop(columns='Platform').copy())

# Create our target
y = game_df[target].copy()

In [7]:
X.describe()

Unnamed: 0,Year_of_Release,Global_Sales,Critic_Score,Name_7 Days to Die,Name_7th Dragon III Code: VFD,Name_Adventure Time: The Secret of the Nameless Kingdom,Name_Aegis of Earth: Protonovus Assault,Name_Agatha Christie's The ABC Murders,Name_Alan Wake,Name_Alien: Isolation,...,Developer_Zipper Interactive,Developer_Zoe Mode,Developer_h.a.n.d. Inc.,Developer_id Software,Developer_n-Space,Developer_syn Sophia,Rating_E,Rating_E10+,Rating_M,Rating_T
count,867.0,867.0,867.0,867.0,867.0,867.0,867.0,867.0,867.0,867.0,...,867.0,867.0,867.0,867.0,867.0,867.0,867.0,867.0,867.0,867.0
mean,2014.296424,691672.4,72.959631,0.002307,0.001153,0.001153,0.001153,0.001153,0.001153,0.00346,...,0.001153,0.002307,0.001153,0.001153,0.00346,0.001153,0.222607,0.186851,0.320646,0.269896
std,1.362149,1237520.0,12.209563,0.048001,0.033962,0.033962,0.033962,0.033962,0.033962,0.058756,...,0.033962,0.048001,0.033962,0.033962,0.058756,0.033962,0.416236,0.390017,0.466994,0.444162
min,2012.0,10000.0,19.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
25%,2013.0,90000.0,67.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
50%,2014.0,240000.0,75.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
75%,2015.0,685000.0,81.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,1.0
max,2016.0,12610000.0,97.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,...,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0


In [8]:
# Check the balance of our target values
y["Platform"].value_counts()

PS4     239
PC      174
XOne    159
PSV     106
3DS     100
WiiU     89
Name: Platform, dtype: int64

In [9]:
from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=1)

In [10]:
# Resample the training data with the BalancedRandomForestClassifier
from imblearn.ensemble import BalancedRandomForestClassifier

brfc = BalancedRandomForestClassifier(n_estimators=100, random_state=1)

brfc.fit(X_train, y_train)

BalancedRandomForestClassifier(random_state=1)

In [11]:
# Calculated the balanced accuracy score
y_pred = brfc.predict(X_test)

balanced_accuracy_score(y_test, y_pred)

0.35324419317980343

In [12]:
# Display the confusion matrix
confusion_matrix(y_test, y_pred)

array([[ 5,  0,  0,  4, 10,  0],
       [ 0, 22,  5,  3,  3, 14],
       [ 0,  7,  4, 16,  4, 32],
       [ 1,  1,  3, 15,  4,  2],
       [ 6,  2,  1,  1, 11,  3],
       [ 1,  5, 17,  0,  4, 11]], dtype=int64)

In [13]:
# Create a DataFrame from the confusion matrix.
BRFC_cm = confusion_matrix(y_test, y_pred)

BRFC_cm_df = pd.DataFrame(
    BRFC_cm, 
    index=["Actual PS4", "Actual PC", "Actual XOne", "Actual WiiU", "Actual 3DS", "Actual PSV"], 
    columns=["Predicted PS4", "Predicted PC", "Predicted XOne", "Predicted WiiU", "Predicted 3DS", "Predicted PSV"])

BRFC_cm_df

Unnamed: 0,Predicted PS4,Predicted PC,Predicted XOne,Predicted WiiU,Predicted 3DS,Predicted PSV
Actual PS4,5,0,0,4,10,0
Actual PC,0,22,5,3,3,14
Actual XOne,0,7,4,16,4,32
Actual WiiU,1,1,3,15,4,2
Actual 3DS,6,2,1,1,11,3
Actual PSV,1,5,17,0,4,11


In [14]:
# Save the BRFC dataframe
dfi.export(BRFC_cm_df, "analysis/BRFC By Platform.png")

C:\Program Files\Google\Chrome\Application\chrome.exe
C:\Program Files\Google\Chrome\Application\chrome.exe


In [15]:
# Print the imbalanced classification report
print("Balanced Random Forest Classifier By Platform")
print(f"Accuracy: { balanced_accuracy_score(y_test, y_pred) * 100:.2f}% \n\n")
print(classification_report_imbalanced(y_test, y_pred))

Balanced Random Forest Classifier By Platform
Accuracy: 35.32% 


                   pre       rec       spe        f1       geo       iba       sup

        3DS       0.38      0.26      0.96      0.31      0.50      0.23        19
         PC       0.59      0.47      0.91      0.52      0.65      0.41        47
        PS4       0.13      0.06      0.83      0.09      0.23      0.05        63
        PSV       0.38      0.58      0.87      0.46      0.71      0.49        26
       WiiU       0.31      0.46      0.87      0.37      0.63      0.38        24
       XOne       0.18      0.29      0.72      0.22      0.45      0.20        38

avg / total       0.31      0.31      0.85      0.30      0.49      0.26       217



### Balanced Random Forest Classifier By Rating

In [16]:
target = ["Rating"]

# Create our features
X = pd.get_dummies(game_df.drop(columns='Rating').copy())

# Create our target
y = game_df[target].copy()

In [17]:
X.describe()

Unnamed: 0,Year_of_Release,Global_Sales,Critic_Score,Name_7 Days to Die,Name_7th Dragon III Code: VFD,Name_Adventure Time: The Secret of the Nameless Kingdom,Name_Aegis of Earth: Protonovus Assault,Name_Agatha Christie's The ABC Murders,Name_Alan Wake,Name_Alien: Isolation,...,Developer_Yager,Developer_Yuke's,Developer_ZeniMax Media,Developer_Zerodiv,Developer_Zipper Interactive,Developer_Zoe Mode,Developer_h.a.n.d. Inc.,Developer_id Software,Developer_n-Space,Developer_syn Sophia
count,867.0,867.0,867.0,867.0,867.0,867.0,867.0,867.0,867.0,867.0,...,867.0,867.0,867.0,867.0,867.0,867.0,867.0,867.0,867.0,867.0
mean,2014.296424,691672.4,72.959631,0.002307,0.001153,0.001153,0.001153,0.001153,0.001153,0.00346,...,0.001153,0.00692,0.002307,0.002307,0.001153,0.002307,0.001153,0.001153,0.00346,0.001153
std,1.362149,1237520.0,12.209563,0.048001,0.033962,0.033962,0.033962,0.033962,0.033962,0.058756,...,0.033962,0.082949,0.048001,0.048001,0.033962,0.048001,0.033962,0.033962,0.058756,0.033962
min,2012.0,10000.0,19.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
25%,2013.0,90000.0,67.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
50%,2014.0,240000.0,75.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
75%,2015.0,685000.0,81.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
max,2016.0,12610000.0,97.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,...,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0


In [18]:
# Check the balance of our target values
y["Rating"].value_counts()

M       278
T       234
E       193
E10+    162
Name: Rating, dtype: int64

In [19]:
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=1)

In [20]:
brfc = BalancedRandomForestClassifier(n_estimators=100, random_state=1)

brfc.fit(X_train, y_train)

BalancedRandomForestClassifier(random_state=1)

In [21]:
# Calculated the balanced accuracy score
y_pred = brfc.predict(X_test)

balanced_accuracy_score(y_test, y_pred)

0.7241995955851377

In [22]:
# Display the confusion matrix
confusion_matrix(y_test, y_pred)

array([[41, 13,  0,  1],
       [ 6, 28,  3,  3],
       [ 2,  2, 63, 16],
       [ 4,  2,  6, 27]], dtype=int64)

In [23]:
# Create a DataFrame from the confusion matrix.
BRFC_cm = confusion_matrix(y_test, y_pred)

BRFC_cm_df = pd.DataFrame(
    BRFC_cm, 
    index=["Actual M", "Actual T", "Actual E", "Actual E10+"], 
    columns=["Predicted M", "Predicted T", "Predicted E", "Predicted E10+"])

BRFC_cm_df

Unnamed: 0,Predicted M,Predicted T,Predicted E,Predicted E10+
Actual M,41,13,0,1
Actual T,6,28,3,3
Actual E,2,2,63,16
Actual E10+,4,2,6,27


In [24]:
# Save the BRFC dataframe
dfi.export(BRFC_cm_df, "analysis/BRFC By Rating.png")

C:\Program Files\Google\Chrome\Application\chrome.exe


In [25]:
# Print the imbalanced classification report
print("Balanced Random Forest Classifier By Rating")
print(f"Accuracy: { balanced_accuracy_score(y_test, y_pred) * 100:.2f}% \n\n")
print(classification_report_imbalanced(y_test, y_pred))

Balanced Random Forest Classifier By Rating
Accuracy: 72.42% 


                   pre       rec       spe        f1       geo       iba       sup

          E       0.77      0.75      0.93      0.76      0.83      0.68        55
       E10+       0.62      0.70      0.90      0.66      0.80      0.62        40
          M       0.88      0.76      0.93      0.81      0.84      0.70        83
          T       0.57      0.69      0.89      0.63      0.78      0.60        39

avg / total       0.75      0.73      0.92      0.74      0.82      0.66       217



### Balanced Random Forest Classifier By Genre

In [26]:
target = ["Genre"]

# Create our features
X = pd.get_dummies(game_df.drop(columns='Genre').copy())

# Create our target
y = game_df[target].copy()

In [27]:
X.describe()

Unnamed: 0,Year_of_Release,Global_Sales,Critic_Score,Name_7 Days to Die,Name_7th Dragon III Code: VFD,Name_Adventure Time: The Secret of the Nameless Kingdom,Name_Aegis of Earth: Protonovus Assault,Name_Agatha Christie's The ABC Murders,Name_Alan Wake,Name_Alien: Isolation,...,Developer_Zipper Interactive,Developer_Zoe Mode,Developer_h.a.n.d. Inc.,Developer_id Software,Developer_n-Space,Developer_syn Sophia,Rating_E,Rating_E10+,Rating_M,Rating_T
count,867.0,867.0,867.0,867.0,867.0,867.0,867.0,867.0,867.0,867.0,...,867.0,867.0,867.0,867.0,867.0,867.0,867.0,867.0,867.0,867.0
mean,2014.296424,691672.4,72.959631,0.002307,0.001153,0.001153,0.001153,0.001153,0.001153,0.00346,...,0.001153,0.002307,0.001153,0.001153,0.00346,0.001153,0.222607,0.186851,0.320646,0.269896
std,1.362149,1237520.0,12.209563,0.048001,0.033962,0.033962,0.033962,0.033962,0.033962,0.058756,...,0.033962,0.048001,0.033962,0.033962,0.058756,0.033962,0.416236,0.390017,0.466994,0.444162
min,2012.0,10000.0,19.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
25%,2013.0,90000.0,67.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
50%,2014.0,240000.0,75.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
75%,2015.0,685000.0,81.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,1.0
max,2016.0,12610000.0,97.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,...,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0


In [28]:
# Check the balance of our target values
y["Genre"].value_counts()

Action          290
Role-Playing    111
Shooter         109
Sports           94
Platform         50
Racing           49
Misc             44
Adventure        37
Fighting         31
Simulation       22
Strategy         21
Puzzle            9
Name: Genre, dtype: int64

In [29]:
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=1)

In [30]:
brfc = BalancedRandomForestClassifier(n_estimators=100, random_state=1)

brfc.fit(X_train, y_train)

BalancedRandomForestClassifier(random_state=1)

In [31]:
# Calculated the balanced accuracy score
y_pred = brfc.predict(X_test)

balanced_accuracy_score(y_test, y_pred)

0.3999349242842394

In [32]:
# Display the confusion matrix
confusion_matrix(y_test, y_pred)

array([[ 5,  5,  9,  3,  0, 11,  0,  4, 26,  4,  1,  5],
       [ 0,  2,  0,  0,  0,  0,  0,  0,  4,  0,  0,  1],
       [ 0,  0,  0,  1,  0,  0,  0,  0,  2,  0,  0,  0],
       [ 0,  1,  0,  7,  0,  1,  1,  0,  1,  0,  2,  0],
       [ 0,  0,  2,  0,  4,  2,  2,  0,  0,  0,  1,  0],
       [ 0,  0,  0,  0,  0,  1,  0,  0,  0,  2,  0,  0],
       [ 0,  0,  0,  2,  0,  0,  9,  0,  0,  0,  1,  1],
       [ 0,  4,  6,  2,  0,  1,  0,  6,  5,  0,  0,  1],
       [ 1,  0,  0,  1,  2,  0,  0,  2, 24,  1,  0,  2],
       [ 0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  1,  1],
       [ 0,  0,  1,  2,  1,  0,  1,  0,  0,  1, 24,  0],
       [ 0,  0,  0,  0,  0,  0,  0,  0,  1,  0,  0,  3]], dtype=int64)

In [33]:
# Create a DataFrame from the confusion matrix.
BRFC_cm = confusion_matrix(y_test, y_pred)

BRFC_cm_df = pd.DataFrame(
    BRFC_cm, 
    index=["Actual Action", "Actual Shooter", "Actual Sports", "Actual Role-Playing", "Actual Racing", "Actual Platform", "Actual Misc", "Actual Adventure", "Actual Fighting", "Actual Strategy", "Actual Simulation", "Actual Puzzle"], 
    columns=["Predicted Action", "Predicted Shooter", "Predicted Sports", "Predicted Role-Playing", "Predicted Racing", "Predicted Platform", "Predicted Misc", "Predicted Adventure", "Predicted Fighting", "Predicted Strategy", "Predicted Simulation", "Predicted Puzzle"])

BRFC_cm_df

Unnamed: 0,Predicted Action,Predicted Shooter,Predicted Sports,Predicted Role-Playing,Predicted Racing,Predicted Platform,Predicted Misc,Predicted Adventure,Predicted Fighting,Predicted Strategy,Predicted Simulation,Predicted Puzzle
Actual Action,5,5,9,3,0,11,0,4,26,4,1,5
Actual Shooter,0,2,0,0,0,0,0,0,4,0,0,1
Actual Sports,0,0,0,1,0,0,0,0,2,0,0,0
Actual Role-Playing,0,1,0,7,0,1,1,0,1,0,2,0
Actual Racing,0,0,2,0,4,2,2,0,0,0,1,0
Actual Platform,0,0,0,0,0,1,0,0,0,2,0,0
Actual Misc,0,0,0,2,0,0,9,0,0,0,1,1
Actual Adventure,0,4,6,2,0,1,0,6,5,0,0,1
Actual Fighting,1,0,0,1,2,0,0,2,24,1,0,2
Actual Strategy,0,0,0,0,0,0,0,0,0,0,1,1


In [34]:
dfi.export(BRFC_cm_df, "analysis/BRFC By Genre.png")

C:\Program Files\Google\Chrome\Application\chrome.exe
C:\Program Files\Google\Chrome\Application\chrome.exe
C:\Program Files\Google\Chrome\Application\chrome.exe
C:\Program Files\Google\Chrome\Application\chrome.exe
C:\Program Files\Google\Chrome\Application\chrome.exe
C:\Program Files\Google\Chrome\Application\chrome.exe


In [35]:
# Print the imbalanced classification report
print("Balanced Random Forest Classifier By Genre")
print(f"Accuracy: { balanced_accuracy_score(y_test, y_pred) * 100:.2f}% \n\n")
print(classification_report_imbalanced(y_test, y_pred))

Balanced Random Forest Classifier By Genre
Accuracy: 39.99% 


                    pre       rec       spe        f1       geo       iba       sup

      Action       0.83      0.07      0.99      0.13      0.26      0.06        73
   Adventure       0.17      0.29      0.95      0.21      0.52      0.25         7
    Fighting       0.00      0.00      0.92      0.00      0.00      0.00         3
        Misc       0.39      0.54      0.95      0.45      0.71      0.49        13
    Platform       0.57      0.36      0.99      0.44      0.60      0.34        11
      Puzzle       0.06      0.33      0.93      0.11      0.56      0.29         3
      Racing       0.69      0.69      0.98      0.69      0.82      0.66        13
Role-Playing       0.50      0.24      0.97      0.32      0.48      0.22        25
     Shooter       0.38      0.73      0.79      0.50      0.76      0.57        33
  Simulation       0.00      0.00      0.96      0.00      0.00      0.00         2
      Sports

# Easy Ensemble AdaBoost Classifier

### Easy Ensemble AdaBoost Classifier by Platform

In [36]:
target = ["Platform"]

# Create our features
X = pd.get_dummies(game_df.drop(columns='Platform').copy())

# Create our target
y = game_df[target].copy()

In [37]:
# Check the balance of our target values
y["Platform"].value_counts()

PS4     239
PC      174
XOne    159
PSV     106
3DS     100
WiiU     89
Name: Platform, dtype: int64

In [38]:
from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=1)

In [39]:
# Train the EasyEnsembleClassifier
from imblearn.ensemble import EasyEnsembleClassifier

eeac = EasyEnsembleClassifier(n_estimators=100, random_state=1)

eeac.fit(X_train, y_train)

EasyEnsembleClassifier(n_estimators=100, random_state=1)

In [40]:
# Calculated the balanced accuracy score
y_pred = eeac.predict(X_test)

balanced_accuracy_score(y_test, y_pred)

0.4174514983585533

In [41]:
# Display the confusion matrix
confusion_matrix(y_test, y_pred)

array([[ 6,  0,  0,  3, 10,  0],
       [ 0, 15,  6, 13,  3, 10],
       [ 1, 10, 21,  7,  3, 21],
       [ 2,  0,  3, 16,  3,  2],
       [ 6,  1,  0,  1, 12,  4],
       [ 1,  7, 11,  1,  2, 16]], dtype=int64)

In [42]:
# Create a DataFrame from the confusion matrix.
EEAC_cm = confusion_matrix(y_test, y_pred)
EEAC_cm_df = pd.DataFrame(
    EEAC_cm, 
    index=["Actual PS4", "Actual PC", "Actual XOne", "Actual WiiU", "Actual 3DS", "Actual PSV"], 
    columns=["Predicted PS4", "Predicted PC", "Predicted XOne", "Predicted WiiU", "Predicted 3DS", "Predicted PSV"])

EEAC_cm_df

Unnamed: 0,Predicted PS4,Predicted PC,Predicted XOne,Predicted WiiU,Predicted 3DS,Predicted PSV
Actual PS4,6,0,0,3,10,0
Actual PC,0,15,6,13,3,10
Actual XOne,1,10,21,7,3,21
Actual WiiU,2,0,3,16,3,2
Actual 3DS,6,1,0,1,12,4
Actual PSV,1,7,11,1,2,16


In [43]:
dfi.export(EEAC_cm_df, "analysis/EEAC By Platform.png")

C:\Program Files\Google\Chrome\Application\chrome.exe
C:\Program Files\Google\Chrome\Application\chrome.exe


In [44]:
# Print the imbalanced classification report
print("Easy Ensemble AdaBoost Classifier By Platform")
print(f"Accuracy: { balanced_accuracy_score(y_test, y_pred) * 100:.2f}% \n\n")
print(classification_report_imbalanced(y_test, y_pred))

Easy Ensemble AdaBoost Classifier By Platform
Accuracy: 41.75% 


                   pre       rec       spe        f1       geo       iba       sup

        3DS       0.38      0.32      0.95      0.34      0.55      0.28        19
         PC       0.45      0.32      0.89      0.38      0.53      0.27        47
        PS4       0.51      0.33      0.87      0.40      0.54      0.27        63
        PSV       0.39      0.62      0.87      0.48      0.73      0.52        26
       WiiU       0.36      0.50      0.89      0.42      0.67      0.43        24
       XOne       0.30      0.42      0.79      0.35      0.58      0.32        38

avg / total       0.42      0.40      0.87      0.39      0.58      0.33       217



### Easy Ensemble AdaBoost Classifier by Rating

In [45]:
target = ["Rating"]

# Create our features
X = pd.get_dummies(game_df.drop(columns='Rating').copy())

# Create our target
y = game_df[target].copy()

In [46]:
# Check the balance of our target values
y["Rating"].value_counts()

M       278
T       234
E       193
E10+    162
Name: Rating, dtype: int64

In [47]:
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=1)

In [48]:
eeac = EasyEnsembleClassifier(n_estimators=100, random_state=1)

eeac.fit(X_train, y_train)

EasyEnsembleClassifier(n_estimators=100, random_state=1)

In [49]:
# Calculated the balanced accuracy score
y_pred = eeac.predict(X_test)

balanced_accuracy_score(y_test, y_pred)

0.5016910298536805

In [50]:
# Display the confusion matrix
confusion_matrix(y_test, y_pred)

array([[21, 19,  2, 13],
       [ 4, 14,  6, 16],
       [ 1,  6, 59, 17],
       [ 3,  2, 12, 22]], dtype=int64)

In [51]:
# Create a DataFrame from the confusion matrix.
EEAC_cm = confusion_matrix(y_test, y_pred)

EEAC_cm_df = pd.DataFrame(
    EEAC_cm, 
    index=["Actual M", "Actual T", "Actual E", "Actual E10+"], 
    columns=["Predicted M", "Predicted T", "Predicted E", "Predicted E10+"])

EEAC_cm_df

Unnamed: 0,Predicted M,Predicted T,Predicted E,Predicted E10+
Actual M,21,19,2,13
Actual T,4,14,6,16
Actual E,1,6,59,17
Actual E10+,3,2,12,22


In [52]:
dfi.export(EEAC_cm_df, "analysis/EEAC By Rating.png")

C:\Program Files\Google\Chrome\Application\chrome.exe


In [53]:
# Print the imbalanced classification report
print("Easy Ensemble AdaBoost Classifier By Rating")
print(f"Accuracy: { balanced_accuracy_score(y_test, y_pred) * 100:.2f}% \n\n")
print(classification_report_imbalanced(y_test, y_pred))

Easy Ensemble AdaBoost Classifier By Rating
Accuracy: 50.17% 


                   pre       rec       spe        f1       geo       iba       sup

          E       0.72      0.38      0.95      0.50      0.60      0.34        55
       E10+       0.34      0.35      0.85      0.35      0.54      0.28        40
          M       0.75      0.71      0.85      0.73      0.78      0.60        83
          T       0.32      0.56      0.74      0.41      0.65      0.41        39

avg / total       0.59      0.53      0.86      0.54      0.67      0.44       217



### Easy Ensemble AdaBoost Classifier by Genre

In [54]:
target = ["Genre"]

# Create our features
X = pd.get_dummies(game_df.drop(columns='Genre').copy())

# Create our target
y = game_df[target].copy()

In [55]:
# Check the balance of our target values
y["Genre"].value_counts()

Action          290
Role-Playing    111
Shooter         109
Sports           94
Platform         50
Racing           49
Misc             44
Adventure        37
Fighting         31
Simulation       22
Strategy         21
Puzzle            9
Name: Genre, dtype: int64

In [56]:
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=1)

In [57]:
eeac = EasyEnsembleClassifier(n_estimators=100, random_state=1)

eeac.fit(X_train, y_train)

EasyEnsembleClassifier(n_estimators=100, random_state=1)

In [58]:
# Calculated the balanced accuracy score
y_pred = eeac.predict(X_test)

balanced_accuracy_score(y_test, y_pred)

0.289713638720488

In [59]:
# Display the confusion matrix
confusion_matrix(y_test, y_pred)

array([[23,  4,  2,  1,  0, 29,  1,  5,  3,  2,  0,  3],
       [ 4,  2,  0,  0,  0,  1,  0,  0,  0,  0,  0,  0],
       [ 2,  0,  0,  0,  0,  1,  0,  0,  0,  0,  0,  0],
       [ 1,  0,  0,  2,  0,  9,  0,  1,  0,  0,  0,  0],
       [ 0,  0,  2,  0,  0,  6,  2,  0,  0,  0,  0,  1],
       [ 0,  0,  0,  0,  0,  3,  0,  0,  0,  0,  0,  0],
       [ 1,  0,  0,  0,  0,  5,  4,  0,  0,  2,  0,  1],
       [10,  1,  3,  3,  0,  6,  0,  1,  0,  0,  0,  1],
       [23,  0,  0,  2,  0,  3,  0,  1,  3,  0,  0,  1],
       [ 0,  0,  0,  0,  0,  2,  0,  0,  0,  0,  0,  0],
       [ 4,  0,  0,  0,  0,  8,  0,  0,  0,  2, 16,  0],
       [ 1,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  3]], dtype=int64)

In [60]:
# Create a DataFrame from the confusion matrix.
EEAC_cm = confusion_matrix(y_test, y_pred)

EEAC_cm_df = pd.DataFrame(
    EEAC_cm, 
    index=["Actual Action", "Actual Shooter", "Actual Sports", "Actual Role-Playing", "Actual Racing", "Actual Platform", "Actual Misc", "Actual Adventure", "Actual Fighting", "Actual Strategy", "Actual Simulation", "Actual Puzzle"], 
    columns=["Predicted Action", "Predicted Shooter", "Predicted Sports", "Predicted Role-Playing", "Predicted Racing", "Predicted Platform", "Predicted Misc", "Predicted Adventure", "Predicted Fighting", "Predicted Strategy", "Predicted Simulation", "Predicted Puzzle"])

EEAC_cm_df

Unnamed: 0,Predicted Action,Predicted Shooter,Predicted Sports,Predicted Role-Playing,Predicted Racing,Predicted Platform,Predicted Misc,Predicted Adventure,Predicted Fighting,Predicted Strategy,Predicted Simulation,Predicted Puzzle
Actual Action,23,4,2,1,0,29,1,5,3,2,0,3
Actual Shooter,4,2,0,0,0,1,0,0,0,0,0,0
Actual Sports,2,0,0,0,0,1,0,0,0,0,0,0
Actual Role-Playing,1,0,0,2,0,9,0,1,0,0,0,0
Actual Racing,0,0,2,0,0,6,2,0,0,0,0,1
Actual Platform,0,0,0,0,0,3,0,0,0,0,0,0
Actual Misc,1,0,0,0,0,5,4,0,0,2,0,1
Actual Adventure,10,1,3,3,0,6,0,1,0,0,0,1
Actual Fighting,23,0,0,2,0,3,0,1,3,0,0,1
Actual Strategy,0,0,0,0,0,2,0,0,0,0,0,0


In [61]:
dfi.export(EEAC_cm_df, "analysis/EEAC By Genre.png")

C:\Program Files\Google\Chrome\Application\chrome.exe
C:\Program Files\Google\Chrome\Application\chrome.exe
C:\Program Files\Google\Chrome\Application\chrome.exe
C:\Program Files\Google\Chrome\Application\chrome.exe
C:\Program Files\Google\Chrome\Application\chrome.exe
C:\Program Files\Google\Chrome\Application\chrome.exe


In [62]:
# Print the imbalanced classification report
print("Easy Ensemble AdaBoost Classifier By Genre")
print(f"Accuracy: { balanced_accuracy_score(y_test, y_pred) * 100:.2f}% \n\n")
print(classification_report_imbalanced(y_test, y_pred))

Easy Ensemble AdaBoost Classifier By Genre
Accuracy: 28.97% 


                    pre       rec       spe        f1       geo       iba       sup

      Action       0.33      0.32      0.68      0.32      0.46      0.21        73
   Adventure       0.29      0.29      0.98      0.29      0.53      0.26         7
    Fighting       0.00      0.00      0.97      0.00      0.00      0.00         3
        Misc       0.25      0.15      0.97      0.19      0.39      0.14        13
    Platform       0.00      0.00      1.00      0.00      0.00      0.00        11
      Puzzle       0.04      1.00      0.67      0.08      0.82      0.69         3
      Racing       0.57      0.31      0.99      0.40      0.55      0.28        13
Role-Playing       0.12      0.04      0.96      0.06      0.20      0.03        25
     Shooter       0.50      0.09      0.98      0.15      0.30      0.08        33
  Simulation       0.00      0.00      0.97      0.00      0.00      0.00         2
      Sports