# Import libraries

In [4]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import sklearn
import joblib

import import_ipynb

# Import data

In [6]:
import data_preprocessing

class CreateModel():
    def load_data(self, print_debug=False):
        # load the data as formatted dataframe
        self.data = data_preprocessing.Data(tags_or_genres='genres')
        
        if print_debug:
            print(self.data.games.head())
            print(self.data.games['description'].shape)

# Input preprocessing
Convert complex input (game description) into simple input for model  
Result input variable will be named `X`

We will be using [Sklearn library](https://scikit-learn.org/1.5/modules/feature_extraction.html) to perform the feature extraction

In [7]:
import input_preprocessing

class CreateModel(CreateModel):
    def preprocess_input(self, input_preprocessor, print_debug=False):
        self.X = input_preprocessor(self.data.games['description'], max_features=10000)

        if print_debug:
            print(self.X.shape)
            
        # X = input_preprocessing.bag_of_words(self.data.games['description'], max_features=10000)
        # X = input_preprocessing.tfidf(self.data.games['description'], max_features=10000)
        # X = input_preprocessing.hashing(self.data.games['description'], max_features=100)

# Output preprocessing
Convert complex output (game tags) into simple output for model (multi-label classification)  
Resulting output variable will be named `y`

In [8]:
import output_preprocessing

class CreateModel(CreateModel):
    def preprocess_output(self, output_preprocessor, print_debug=False):
        self.y = output_preprocessor(self.data.games['genres'])
        
        if print_debug:
            print(self.y.shape)
            
        # y = output_preprocessing.multilabel_binarizer(self.data.games['genres'])

# Evaluation methods
We want to create function `evaluate(y_pred, y_true)` that will evaluate the model performance

In [9]:
import evaluation_methods

class CreateModel(CreateModel):
    def create_evaluate(self, evaluation_method):
        self.evaluate = evaluation_method

# evaluate = evaluation_methods.recall
# evaluate = evaluation_methods.f1score

# Model
Use the given preprocessed input `X`, output `y`, evaluation functions `evaluate(y_pred, y_true)`, to train a chosen model (and save the model)  
Resulting model will be class named `Model` with methods:  
* fit(X, y) - train the model
* predict(X) - predict the output for given input
* save_model(path) - save the model to the given path
* load_model(path) - load the model from the given path
* evaluate() - evaluate the model performance

In [10]:
import models

class CreateModel(CreateModel):
    def create_model(self, model):
        self.model = model(self.X, self.y, self.evaluate)
        
# model = models.KNN(X, y, evaluate)
# model = models.LogisticRegression(X, y, evaluate)
# model = models.DecisionTree(X, y, evaluate)
# model = models.RandomForest(X, y, evaluate)
# model = models.NaiveBayes(X, y, evaluate)
# model = models.SVM(X, y, evaluate)
# model = models.MLP(X, y, evaluate)

# Model training and evaluation

In [11]:
class CreateModel(CreateModel):
    def fit(self):
        self.model.fit()
        
    def evaluate_model(self):
        return self.model.evaluate()

# model.fit()
# print(model.evaluate())

# Class constructor

In [12]:
class CreateModel(CreateModel):
    def __init__(self, input_preprocessor, output_preprocessor, model, evaluation_method, print_debug=False):
        self.load_data(print_debug=print_debug)
        self.preprocess_input(input_preprocessor, print_debug=print_debug)
        self.preprocess_output(output_preprocessor, print_debug=print_debug)
        self.create_evaluate(evaluation_method)
        self.create_model(model)
        
        self.fit()