# Video games sales prediction on region

In [27]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import OneHotEncoder
import tensorflow as tf
from tensorflow import keras

In [28]:
data = pd.read_csv('./data/Video_Games.csv')
new_data = data.drop(['Name', 'Year_of_Release', 'Developer', 'Rating', 'Critic_Count', 'Critic_Score', 'User_Count', 'User_Score'], axis=1)

In [29]:
# split data into train and test and keep 20% for test
train, test = train_test_split(new_data, test_size=0.2)
print(f"Train size: {len(train)}")
#show first 5 rows
train.head()

Train size: 13375


Unnamed: 0,Platform,Genre,Publisher,NA_Sales,EU_Sales,JP_Sales,Other_Sales,Global_Sales
2151,PC,Action,Namco Bandai Games,0.25,0.56,0.0,0.14,0.96
3811,PS,Misc,Aruze Corp,0.0,0.0,0.49,0.03,0.53
5944,PC,Simulation,Electronic Arts,0.02,0.24,0.0,0.04,0.29
15620,XOne,Action,Ubisoft,0.0,0.02,0.0,0.0,0.02
4842,PS,Adventure,Sony Computer Entertainment,0.0,0.0,0.37,0.03,0.4


# RN : correction

In [80]:
# Convertir les données d'entrée et de sortie en tenseurs NumPy
X_train = np.array(train[['Genre', 'Platform']])
y_train = np.array(train[['NA_Sales', 'EU_Sales', 'JP_Sales']])

X_test = np.array(test[['Genre', 'Platform']])
y_test = np.array(test[['NA_Sales', 'EU_Sales', 'JP_Sales']])

# Encoder les données d'entrée en utilisant OneHotEncoder
encoder = OneHotEncoder(handle_unknown='ignore')
X_train = encoder.fit_transform(X_train)
X_test = encoder.transform(X_test)

# Construction du modèle
model = keras.Sequential([
    keras.layers.Dense(32, activation='relu', input_shape=(X_train.shape[1],)),
    keras.layers.Dense(3, activation='softmax')
])

# Compilation du modèle
model.compile(optimizer='RMSprop',
              loss='categorical_crossentropy',
              metrics=['accuracy'])

# Entraînement du modèle
model.fit(X_train, y_train, epochs=25, batch_size=50)

# évaluation du modèle
test_loss, test_acc = model.evaluate(X_test, y_test)
print('Test accuracy:', test_acc)

# prédiction sur de nouvelles données
new_game = np.array([['Action', 'PS4']])
new_game = encoder.transform(new_game) # Encoder la nouvelle donnée
region_probs = model.predict(new_game)

Epoch 1/25
Epoch 2/25
Epoch 3/25
Epoch 4/25
Epoch 5/25
Epoch 6/25
Epoch 7/25
Epoch 8/25
Epoch 9/25
Epoch 10/25
Epoch 11/25
Epoch 12/25
Epoch 13/25
Epoch 14/25
Epoch 15/25
Epoch 16/25
Epoch 17/25
Epoch 18/25
Epoch 19/25
Epoch 20/25
Epoch 21/25
Epoch 22/25
Epoch 23/25
Epoch 24/25
Epoch 25/25
Test accuracy: 0.6578947305679321


In [89]:
def predict(game="Mario Kart 7"):
    # prédiction sur un jeu existant: game dans la base de données Video_Games
    game_data = data[data['Name'] == game]
    game_data = game_data.drop(['Name', 'Year_of_Release', 'Developer', 'Rating', 'Critic_Count', 'Critic_Score', 'User_Count', 'User_Score'], axis=1)

    # compléter les données manquantes
    new_game = np.array([game_data.iloc[0][['Genre', 'Platform']]])
    new_game = encoder.transform(new_game) # Encoder la nouvelle donnée
    region_probs = model.predict(new_game)

    print(f"Region probabilities for {game}: {region_probs}")

    # afficher les résultats
    print(f"North America: {region_probs[0][0]}")
    print(f"Europe: {region_probs[0][1]}")
    print(f"Japan: {region_probs[0][2]}")

In [93]:
predict() # Mario Kart 7
predict("Call of Duty: Black Ops II") # Call of Duty: Black Ops II
predict("Grand Theft Auto V") # GTA5
predict("Assassin's Creed Syndicate") # Assassin's Creed IV: Black Flag

Region probabilities for Mario Kart 7: [[0.42379454 0.40726084 0.16894463]]
North America: 0.42379453778266907
Europe: 0.40726083517074585
Japan: 0.16894462704658508
Region probabilities for Call of Duty: Black Ops II: [[0.5322652  0.42862785 0.03910702]]
North America: 0.5322651863098145
Europe: 0.4286278486251831
Japan: 0.03910702094435692
Region probabilities for Grand Theft Auto V: [[0.5086752  0.4333967  0.05792809]]
North America: 0.508675217628479
Europe: 0.43339669704437256
Japan: 0.05792808532714844
Region probabilities for Assassin's Creed Syndicate: [[0.4280832  0.5127239  0.05919292]]
North America: 0.428083211183548
Europe: 0.5127239227294922
Japan: 0.059192921966314316


# FROM SCRATCH

In [None]:
input_size = X_train.shape[1]
output_size = y_train.shape[1]
hidden_size = 32 # number of neurons in the hidden layer

w1 = np.random.randn(input_size, hidden_size)
w2 = np.random.randn(hidden_size, output_size)

b1 = np.zeros((1, hidden_size))
b2 = np.zeros((1, output_size))

In [None]:
def relu(x):
    return np.maximum(0, x)

def softmax(x):
    return np.exp(x) / np.sum(np.exp(x), axis=1, keepdims=True)

In [None]:
# Convertir les données d'entrée et de sortie en tenseurs NumPy
X_train = np.array(train[['Genre', 'Platform']])
y_train = np.array(train[['NA_Sales', 'EU_Sales', 'JP_Sales']])

X_test = np.array(test[['Genre', 'Platform']])
y_test = np.array(test[['NA_Sales', 'EU_Sales', 'JP_Sales']])

# Encoder les données en utilisant OneHotEncoder
encoder = OneHotEncoder(handle_unknown='ignore')
X_train = encoder.fit_transform(X_train)
X_test = encoder.transform(X_test)

In [None]:
# forward propagation
def forward():
    pass