# Deep Learning A Z Regressão mais valores - Games

In [1]:
import pandas as pd
import tensorflow as tf
import sklearn

In [2]:
pd.__version__, tf.__version__, sklearn.__version__

('2.2.2', '2.17.0', '1.5.1')

In [3]:
from tensorflow.keras.layers import Dense, Dropout, Activation, Input
from tensorflow.keras.models import Model # Utilizaremos a Model ao invés de Sequential
from sklearn.preprocessing import OneHotEncoder
from sklearn.compose import ColumnTransformer

In [4]:
# https://www.kaggle.com/datasets/gregorut/videogamesales
base = pd.read_csv('games.csv')
base

Unnamed: 0,Name,Platform,Year_of_Release,Genre,Publisher,NA_Sales,EU_Sales,JP_Sales,Other_Sales,Global_Sales,Critic_Score,Critic_Count,User_Score,User_Count,Developer,Rating
0,Wii Sports,Wii,2006.0,Sports,Nintendo,41.36,28.96,3.77,8.45,82.53,76.0,51.0,8,322.0,Nintendo,E
1,Super Mario Bros.,NES,1985.0,Platform,Nintendo,29.08,3.58,6.81,0.77,40.24,,,,,,
2,Mario Kart Wii,Wii,2008.0,Racing,Nintendo,15.68,12.76,3.79,3.29,35.52,82.0,73.0,8.3,709.0,Nintendo,E
3,Wii Sports Resort,Wii,2009.0,Sports,Nintendo,15.61,10.93,3.28,2.95,32.77,80.0,73.0,8,192.0,Nintendo,E
4,Pokemon Red/Pokemon Blue,GB,1996.0,Role-Playing,Nintendo,11.27,8.89,10.22,1.00,31.37,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
16714,Samurai Warriors: Sanada Maru,PS3,2016.0,Action,Tecmo Koei,0.00,0.00,0.01,0.00,0.01,,,,,,
16715,LMA Manager 2007,X360,2006.0,Sports,Codemasters,0.00,0.01,0.00,0.00,0.01,,,,,,
16716,Haitaka no Psychedelica,PSV,2016.0,Adventure,Idea Factory,0.00,0.00,0.01,0.00,0.01,,,,,,
16717,Spirits & Spells,GBA,2003.0,Platform,Wanadoo,0.01,0.00,0.00,0.00,0.01,,,,,,


In [5]:
# Removendo colunas que não serão utilizadas
base = base.drop('Other_Sales', axis = 1)
base = base.drop('Global_Sales', axis = 1)
base = base.drop('Developer', axis = 1)

In [6]:
base.shape

(16719, 13)

# Processamento da base de dados

In [7]:
base.isnull().sum()

Name                  2
Platform              0
Year_of_Release     269
Genre                 2
Publisher            54
NA_Sales              0
EU_Sales              0
JP_Sales              0
Critic_Score       8582
Critic_Count       8582
User_Score         6704
User_Count         9129
Rating             6769
dtype: int64

In [8]:
base = base.dropna(axis = 0) # Removendo linhas com valores nulos

In [9]:
base.shape # Reduziu de 16.719 para 6825 linhas (não é muito recomendado remover linhas, ainda mais que representam 59% dos dados)

(6825, 13)

In [10]:
base.isnull().sum()

Name               0
Platform           0
Year_of_Release    0
Genre              0
Publisher          0
NA_Sales           0
EU_Sales           0
JP_Sales           0
Critic_Score       0
Critic_Count       0
User_Score         0
User_Count         0
Rating             0
dtype: int64

In [11]:
base['Name'].value_counts()

Name
Need for Speed: Most Wanted                  8
Madden NFL 07                                8
LEGO Star Wars II: The Original Trilogy      8
The Sims 2                                   7
Terraria                                     7
                                            ..
Castlevania: Portrait of Ruin                1
Suzuki TT Superbikes                         1
Rumble Roses                                 1
Sherlock Holmes: The Mystery of the Mummy    1
STORM: Frontline Nation                      1
Name: count, Length: 4377, dtype: int64

In [14]:
base = base.drop('Name', axis = 1) # Iremos remover a coluna Name, pois ela não é útil para o modelo

In [12]:
base.shape

(6825, 13)

In [15]:
base.columns

Index(['Platform', 'Year_of_Release', 'Genre', 'Publisher', 'NA_Sales',
       'EU_Sales', 'JP_Sales', 'Critic_Score', 'Critic_Count', 'User_Score',
       'User_Count', 'Rating'],
      dtype='object')

In [16]:
X = base.iloc[:, [0, 1, 2, 3, 7, 8, 9, 10, 11]].values # Features ou atributos previsores
X

array([['Wii', 2006.0, 'Sports', ..., '8', 322.0, 'E'],
       ['Wii', 2008.0, 'Racing', ..., '8.3', 709.0, 'E'],
       ['Wii', 2009.0, 'Sports', ..., '8', 192.0, 'E'],
       ...,
       ['PC', 2014.0, 'Action', ..., '7.6', 412.0, 'M'],
       ['PC', 2011.0, 'Shooter', ..., '5.8', 43.0, 'T'],
       ['PC', 2011.0, 'Strategy', ..., '7.2', 13.0, 'E10+']], dtype=object)

In [18]:
y_na = base.iloc[:, 4].values # Atributo a serem previstos
y_eu = base.iloc[:, 5].values
y_jp = base.iloc[:, 6].values

In [19]:
y_na

array([4.136e+01, 1.568e+01, 1.561e+01, ..., 0.000e+00, 1.000e-02,
       0.000e+00])

In [20]:
y_eu

array([2.896e+01, 1.276e+01, 1.093e+01, ..., 1.000e-02, 0.000e+00,
       1.000e-02])

In [21]:
y_jp

array([3.77, 3.79, 3.28, ..., 0.  , 0.  , 0.  ])

In [22]:
# PS2  1 0 0 0 0 ...
# X360 0 1 0 0 0 ...
base['Platform'].value_counts()

Platform
PS2     1140
X360     858
PS3      769
PC       651
XB       565
Wii      479
DS       464
PSP      390
GC       348
PS4      239
GBA      237
XOne     159
3DS      155
PS       150
PSV      118
WiiU      89
DC        14
Name: count, dtype: int64

In [23]:
base.columns

Index(['Platform', 'Year_of_Release', 'Genre', 'Publisher', 'NA_Sales',
       'EU_Sales', 'JP_Sales', 'Critic_Score', 'Critic_Count', 'User_Score',
       'User_Count', 'Rating'],
      dtype='object')

In [24]:
onehotencoder = ColumnTransformer(transformers=[("OneHot", OneHotEncoder(), [0, 2, 3, 8])], remainder='passthrough') # Passamos as colunas CATEGÓRICAS para o OneHotEncoder
X = onehotencoder.fit_transform(X).toarray()

In [25]:
X.shape # 6825 linhas e 303 colunas

(6825, 303)

In [26]:
X[0]

array([0.000e+00, 0.000e+00, 0.000e+00, 0.000e+00, 0.000e+00, 0.000e+00,
       0.000e+00, 0.000e+00, 0.000e+00, 0.000e+00, 0.000e+00, 0.000e+00,
       1.000e+00, 0.000e+00, 0.000e+00, 0.000e+00, 0.000e+00, 0.000e+00,
       0.000e+00, 0.000e+00, 0.000e+00, 0.000e+00, 0.000e+00, 0.000e+00,
       0.000e+00, 0.000e+00, 0.000e+00, 1.000e+00, 0.000e+00, 0.000e+00,
       0.000e+00, 0.000e+00, 0.000e+00, 0.000e+00, 0.000e+00, 0.000e+00,
       0.000e+00, 0.000e+00, 0.000e+00, 0.000e+00, 0.000e+00, 0.000e+00,
       0.000e+00, 0.000e+00, 0.000e+00, 0.000e+00, 0.000e+00, 0.000e+00,
       0.000e+00, 0.000e+00, 0.000e+00, 0.000e+00, 0.000e+00, 0.000e+00,
       0.000e+00, 0.000e+00, 0.000e+00, 0.000e+00, 0.000e+00, 0.000e+00,
       0.000e+00, 0.000e+00, 0.000e+00, 0.000e+00, 0.000e+00, 0.000e+00,
       0.000e+00, 0.000e+00, 0.000e+00, 0.000e+00, 0.000e+00, 0.000e+00,
       0.000e+00, 0.000e+00, 0.000e+00, 0.000e+00, 0.000e+00, 0.000e+00,
       0.000e+00, 0.000e+00, 0.000e+00, 0.000e+00, 

In [None]:
(303 + 3) / 2

153.0

In [27]:
# Criação da rede neural a partir da biblioteca Model
camada_entrada = Input(shape = (303,)) # Quantidade de neurônios é sempre igual a quantidade de colunas (303)
camada_oculta1 = Dense(units = 153, activation='relu')(camada_entrada) # Adicionamos uma camada oculta com 153 neurônios conectada à camada de entrada
camada_oculta2 = Dense(units = 153, activation='relu')(camada_oculta1)
camada_saida1 = Dense(units = 1, activation='linear')(camada_oculta2) # As camadas de saída estarão conectadas à última camada oculta (camada_oculta2)
camada_saida2 = Dense(units = 1, activation='linear')(camada_oculta2)
camada_saida3 = Dense(units = 1, activation='linear')(camada_oculta2)

In [28]:
regressor = Model(inputs = camada_entrada, outputs = [camada_saida1, camada_saida2, camada_saida3])

In [29]:
regressor.compile(optimizer='adam', loss = 'mse') # A função de loss agora é o mse (mean squared error)
# O mse é melhor por conta de penalizar erros maiores

In [30]:
regressor.fit(X, [y_na, y_eu, y_jp], epochs=500, batch_size=100)

Epoch 1/500
[1m69/69[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 3ms/step - dense_2_loss: 857.7544 - dense_3_loss: 2977.0461 - dense_4_loss: 1113.3972 - loss: 4948.6855
Epoch 2/500
[1m69/69[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - dense_2_loss: 6.0444 - dense_3_loss: 4.4705 - dense_4_loss: 3.8063 - loss: 14.3229
Epoch 3/500
[1m69/69[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - dense_2_loss: 1.1097 - dense_3_loss: 1.5113 - dense_4_loss: 0.4063 - loss: 3.0277
Epoch 4/500
[1m69/69[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - dense_2_loss: 1.5233 - dense_3_loss: 1.5513 - dense_4_loss: 0.4074 - loss: 3.4824
Epoch 5/500
[1m69/69[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - dense_2_loss: 0.9439 - dense_3_loss: 1.0535 - dense_4_loss: 0.3135 - loss: 2.3098
Epoch 6/500
[1m69/69[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - dense_2_loss: 3.3397 - dense_3_loss: 1.6016 - dense_4_lo

<keras.src.callbacks.history.History at 0x2a0502a9390>

In [31]:
previsao_na, previsao_eu, previsao_jp = regressor.predict(X) # Testando o modelo com os próprios dados de treinamento

[1m214/214[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step


In [32]:
previsao_na, previsao_na.mean()

(array([[ 6.183502  ],
        [ 5.241153  ],
        [ 4.1079545 ],
        ...,
        [-0.29109827],
        [ 0.08104491],
        [ 0.0444997 ]], dtype=float32),
 0.47349498)

In [33]:
y_na, y_na.mean()

(array([4.136e+01, 1.568e+01, 1.561e+01, ..., 0.000e+00, 1.000e-02,
        0.000e+00]),
 0.3944835164835165)

In [34]:
from sklearn.metrics import mean_absolute_error

In [35]:
mean_absolute_error(y_na, previsao_na) # Quando a rede neural fizer uma previsão, ela errará em média 0.32 para a previsão da América do Norte

0.3225100472481696

In [36]:
previsao_eu, previsao_eu.mean()

(array([[4.2102094 ],
        [3.499757  ],
        [2.5971322 ],
        ...,
        [0.01210006],
        [0.03780045],
        [0.014507  ]], dtype=float32),
 0.2892797)

In [37]:
y_eu, y_eu.mean()

(array([2.896e+01, 1.276e+01, 1.093e+01, ..., 1.000e-02, 0.000e+00,
        1.000e-02]),
 0.23608937728937732)

In [38]:
mean_absolute_error(y_eu, previsao_eu)

0.22454100766815108

In [39]:
previsao_jp, previsao_jp.mean()

(array([[ 1.659661  ],
        [ 1.3950778 ],
        [ 1.1259577 ],
        ...,
        [-0.0136299 ],
        [ 0.00764441],
        [-0.00773841]], dtype=float32),
 0.10616828)

In [40]:
y_jp, y_jp.mean()

(array([3.77, 3.79, 3.28, ..., 0.  , 0.  , 0.  ]), 0.06415824175824175)

In [42]:
mean_absolute_error(y_jp, previsao_jp)

0.1189478561075179