In [1]:
import numpy as np
import pandas as pd
from missingno import matrix
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.preprocessing import StandardScaler, LabelBinarizer
from sklearn.model_selection import train_test_split
from tensorflow.keras.models import load_model, Sequential
from tensorflow.keras.layers import Dense, Dropout, Input
from tensorflow.keras.callbacks import EarlyStopping, ReduceLROnPlateau, ModelCheckpoint
%matplotlib inline
%config Completer.use_jedi=  False
%config IPCompleter.use_jedi= False

In [2]:
tr_data = pd.read_csv("../input/video-games-rating-by-esrb/Video_games_esrb_rating.csv")
ts_data = pd.read_csv("../input/video-games-rating-by-esrb/test_esrb.csv")

In [3]:
X = tr_data.copy()
X.drop(["title", "esrb_rating"], axis=1, inplace=True)

y = tr_data["esrb_rating"]
y

0        E
1       ET
2        M
3       ET
4        T
        ..
1890     M
1891     T
1892     E
1893     T
1894     E
Name: esrb_rating, Length: 1895, dtype: object

In [4]:
X_test = ts_data.copy()
X_test.drop(['title', 'esrb_rating'], axis=1, inplace=True)

y_test = ts_data['esrb_rating'].copy()
y_test

0       T
1       E
2       T
3       E
4      ET
       ..
495     M
496     M
497     M
498     T
499     T
Name: esrb_rating, Length: 500, dtype: object

In [5]:
y_total = y.append(y_test, ignore_index=True)
print(y_total.unique())

['E' 'ET' 'M' 'T']


In [6]:
y_encoder = LabelBinarizer()
y_transform = y_encoder.fit_transform(y_total)
y_transform

array([[1, 0, 0, 0],
       [0, 1, 0, 0],
       [0, 0, 1, 0],
       ...,
       [0, 0, 1, 0],
       [0, 0, 0, 1],
       [0, 0, 0, 1]])

In [7]:
y = y_transform[:1895]
y_test = y_transform[1895:]

In [8]:
X_train, X_valid, y_train, y_valid = train_test_split(X, y, test_size=.25)

In [9]:
es = EarlyStopping(monitor="val_accuracy", mode='max', patience=25, restore_best_weights=True, verbose=2)
rl = ReduceLROnPlateau(monitor="val_accuracy", mode="max", patience=3, min_lr=0.001, factor=.2, verbose=2)
mc = ModelCheckpoint("./models/Weights-{epoch:03d}--{val_accuracy:.5f}.hdf5", monitor='val_accuracy', verbose = 2, save_best_only = True, mode ='max')

model = Sequential()
model.add(Input(shape=X_train.shape[1:], name="input_layer"))
model.add(Dense(64, activation='relu'))
model.add(Dense(32, activation='relu'))
model.add(Dense(8, activation='relu'))
model.add(Dense(4, activation='sigmoid'))
model.compile(loss="categorical_crossentropy", optimizer="adam", metrics=["accuracy"])
histo = model.fit(X_train, y_train, validation_data=(X_valid, y_valid), epochs=500, batch_size=16, callbacks=[es, rl, mc])

Epoch 1/500

Epoch 00001: val_accuracy improved from -inf to 0.72996, saving model to ./models/Weights-001--0.72996.hdf5
Epoch 2/500

Epoch 00002: val_accuracy improved from 0.72996 to 0.80802, saving model to ./models/Weights-002--0.80802.hdf5
Epoch 3/500

Epoch 00003: val_accuracy improved from 0.80802 to 0.83755, saving model to ./models/Weights-003--0.83755.hdf5
Epoch 4/500

Epoch 00004: val_accuracy improved from 0.83755 to 0.86709, saving model to ./models/Weights-004--0.86709.hdf5
Epoch 5/500

Epoch 00005: val_accuracy did not improve from 0.86709
Epoch 6/500

Epoch 00006: val_accuracy improved from 0.86709 to 0.87131, saving model to ./models/Weights-006--0.87131.hdf5
Epoch 7/500

Epoch 00007: val_accuracy did not improve from 0.87131
Epoch 8/500

Epoch 00008: val_accuracy did not improve from 0.87131
Epoch 9/500

Epoch 00009: ReduceLROnPlateau reducing learning rate to 0.001.

Epoch 00009: val_accuracy did not improve from 0.87131
Epoch 10/500

Epoch 00010: val_accuracy did no