### **Import library and dataset**

In [None]:
import numpy as np 
import pandas as pd 
from sklearn.metrics import confusion_matrix, accuracy_score
from sklearn.ensemble import ExtraTreesClassifier
from sklearn.model_selection import GridSearchCV
import seaborn as sns
import joblib
import matplotlib.pyplot as plt
import warnings
warnings.filterwarnings('ignore')

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

In [None]:
#Loading our train dataset
df = pd.read_csv("/kaggle/input/video-games-rating-by-esrb/Video_games_esrb_rating.csv")
df.head()

In [None]:
#Loading our result dataset
df_result = pd.read_csv("/kaggle/input/video-games-rating-by-esrb/test_esrb.csv")
df_result.head()

### **Data Visualization**

**We'll only see the data that train our model.**

In [None]:
# Checking the shape of our train dataset
df.shape

In [None]:
# Checking if there's any null data
df.isna().sum()

In [None]:
# Checking correlation between int attributes
plt.figure(figsize=(30, 15))

sns.heatmap(df.corr(), annot=True, cmap="RdBu")

In [None]:
# Checking columns names
df.columns

In [None]:
# Checking attributes types
df.info()

In [None]:
# Creating a variable with only int attributes
feature = [feature for feature in df.columns if df[feature].dtype != 'O']
feature

# Creating a list of enumerated attributes
list(enumerate(feature))

# Plotting the counts of each attributes in the enumerated list
for n in enumerate(feature):
    plt.subplot(25,2,n[0]+1)
    sns.countplot(n[1],data=df)
plt.show()

### **Creating our model**

In [None]:
# Creating our predictors and class for training
x_train = df.drop(["title", "esrb_rating"], axis = 1)
y_train = df["esrb_rating"]

# Creating our predictors and class for test 
x_test = df_result.drop(["title", "esrb_rating"], axis = 1)
y_test = df_result["esrb_rating"]

In [None]:
# Creating our model
et_classifier = ExtraTreesClassifier()
et_classifier.fit(x_train, y_train)
predictions_train = et_classifier.predict(x_train)
predictions_test = et_classifier.predict(x_test)

accuracy_train = accuracy_score(y_train, predictions_train)
accuracy_test = accuracy_score(y_test, predictions_test)

In [None]:
# Model's accuracy for training
accuracy_train

In [None]:
# Model's accuracy for test
accuracy_test

### **Tuning our hyperparameters with Grid Search**

In [None]:
# Creating arrays to n_estimators and max_features range of parameters
n_estimators = np.arange(25, 201, 25)
max_features = np.arange(1, 6, 1)

# Creating a dictionary with our arrays parameters
param_grid = dict(n_estimators=n_estimators, 
                  max_features=max_features)

# Tuning our hyperparameters with Grid Search
grid = GridSearchCV(estimator=et_classifier, param_grid=param_grid, cv=5)

grid.fit(x_train, y_train)

In [None]:
# Getting the best parameters for our model and a score for them
grid.best_params_, grid.best_score_

In [None]:
# Recreating the model with our ideal parameters
et_classifier = ExtraTreesClassifier(n_estimators=50, max_features=1, random_state=0)
et_classifier.fit(x_train, y_train)
predictions_train = et_classifier.predict(x_train)
predictions_test = et_classifier.predict(x_test)

accuracy_train = accuracy_score(y_train, predictions_train)
accuracy_test = accuracy_score(y_test, predictions_test)

In [None]:
# Accuracy of training for our tuned model
accuracy_train

In [None]:
# Accuracy of test for our tuned model
accuracy_test

In [None]:
# Plotting the confusion matrix generated by the tuned model
confusion_matrix = confusion_matrix(y_test, predictions_test)

plt.figure(figsize=(10, 5))

sns.heatmap(confusion_matrix, annot=True, cmap="RdBu")

In [None]:
# Saving our model in a pickle file
joblib.dump(et_classifier, "video_games_category.pkl")