# Model
Use the given preprocessed input `X`, output `y`, evaluation functions `evaluate(y_pred, y_true)`, to train a chosen model (and save the model)  
Resulting model will be class named `Model` with methods:  
* fit(X, y) - train the model
* predict(X) - predict the output for given input
* save_model(path) - save the model to the given path
* load_model(path) - load the model from the given path
* evaluate() - evaluate the model performance

### KNN (K Nearest Neighbors)
https://scikit-learn.org/1.5/modules/generated/sklearn.neighbors.KNeighborsClassifier.html

In [9]:
from sklearn.model_selection import train_test_split
from sklearn.neighbors import KNeighborsClassifier
import joblib

class KNN:
    def __init__(self, X, y, evaluate, k_value=10):
        self.evaluate_ = evaluate
        self.X_train, self.X_test, self.y_train, self.y_test = train_test_split(X, y, test_size=0.2)
        self.knn = KNeighborsClassifier(n_neighbors=k_value)
        
    def fit(self):
        self.knn.fit(self.X_train, self.y_train)

    def predict(self, X):
        return self.knn.predict(X)

    def evaluate(self):
        y_pred = self.predict(self.X_test)
        return self.evaluate_(y_pred, self.y_test)

    def save_model(self, path):
        joblib.dump(self.knn, path)

    def load_model(self, path):
        self.knn = joblib.load(path)

### Logistic Regression
https://scikit-learn.org/1.5/modules/generated/sklearn.linear_model.LogisticRegression.html

In [10]:
import joblib
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression as LR
from sklearn.multioutput import MultiOutputClassifier

class LogisticRegression:
    def __init__(self, X, y, evaluate, max_iter=10, regularization='l2', solver='lbfgs'):
        self.evaluate_ = evaluate
        self.max_iter = max_iter
        self.X_train, self.X_test, self.y_train, self.y_test = train_test_split(X, y, test_size=0.2)
        self.lr = MultiOutputClassifier(LR(penalty=regularization, max_iter=1, warm_start=True, solver=solver), n_jobs=-1)
        self.history_ = []

    def fit(self):
        for iter in range(self.max_iter):
            self.lr.estimator.set_params(max_iter=iter+1)
            self.lr.fit(self.X_train, self.y_train)
            # for(estimator, y) in zip(self.lr.estimators_, self.y_train.T):
            #     estimator.set_params(class_weight='balanced')
            #     estimator.fit(self.X_train, y)

            score = self.evaluate()
            self.history_.append(score)

    def predict(self, X):
        return self.lr.predict(X)
    
    def evaluate(self):
        y_pred = self.predict(self.X_test)
        return self.evaluate_(y_pred, self.y_test)
    
    def save_model(self, path):
        joblib.dump(self.lr, path)

    def load_model(self, path):
        self.lr = joblib.load(path)


### Decision Trees
https://scikit-learn.org/1.5/modules/tree.html

In [11]:
from sklearn import tree 

class DecisionTree:
	def __init__(self, X, y, evaluate, max_depth_range=(1, 10)):
		self.evaluate_ = evaluate
		self.max_depth_range = max_depth_range
		self.X_train, self.X_test, self.y_train, self.y_test = train_test_split(X, y, test_size=0.2)
		self.history_ = []
		self.tree = None  

	def fit(self):
		for max_depth in range(self.max_depth_range[0], self.max_depth_range[1] + 1):
			temp_tree = tree.DecisionTreeClassifier(max_depth=max_depth)
			temp_tree.fit(self.X_train, self.y_train)

			y_pred = temp_tree.predict(self.X_test)
			score = self.evaluate_(y_pred, self.y_test)

			self.history_.append({'max_depth': max_depth, 'score': score})

		best_depth = max(self.history_, key=lambda x: x['score'])['max_depth']
		self.tree = tree.DecisionTreeClassifier(max_depth=best_depth)
		self.tree.fit(self.X_train, self.y_train)
		
	def predict(self, X):
		return self.tree.predict(X)
	
	def evaluate(self):
		y_pred = self.predict(self.X_test)
		return self.evaluate_(y_pred, self.y_test)
	
	def save_model(self, path):
		joblib.dump(self.tree, path)

	def load_model(self, path):
		self.tree = joblib.load(path)

### Random Forest

https://scikit-learn.org/1.5/modules/generated/sklearn.ensemble.RandomForestClassifier.html

In [12]:
from sklearn.ensemble import RandomForestClassifier

class RandomForest:
	def __init__(self, X, y, evaluate):
		self.evaluate_ = evaluate
		self.X_train, self.X_test, self.y_train, self.y_test = train_test_split(X, y, test_size=0.2)
		self.rfc = RandomForestClassifier(n_jobs=-1)

	def fit(self):
		self.rfc.fit(self.X_train, self.y_train)
		
	def predict(self, X):
		return self.rfc.predict(X)
	
	def evaluate(self):
		y_pred = self.predict(self.X_test)
		return self.evaluate_(y_pred, self.y_test)
	
	def save_model(self, path):
		joblib.dump(self.rfc, path)

	def load_model(self, path):
		self.rfc = joblib.load(path)

### Naive Bayes
https://scikit-learn.org/1.5/modules/naive_bayes.html

In [13]:
from sklearn.naive_bayes import MultinomialNB
from sklearn.multioutput import MultiOutputClassifier

class NaiveBayes:
	def __init__(self, X, y, evaluate, alpha_values=None):
		self.evaluate_ = evaluate
		self.X_train, self.X_test, self.y_train, self.y_test = train_test_split(X, y, test_size=0.2)
		self.alpha_values = alpha_values if alpha_values is not None else [0.1, 0.5, 1.0]
		self.nb = None
		self.history_ = []

	def fit(self):
		for alpha in self.alpha_values:
			temp_nb = MultiOutputClassifier(MultinomialNB(alpha=alpha))
			temp_nb.fit(self.X_train, self.y_train)
			
			y_pred = temp_nb.predict(self.X_test)
			score = self.evaluate_(y_pred, self.y_test)
			
			self.history_.append({'alpha': alpha, 'score': score})
        
		best_alpha = max(self.history_, key=lambda x: x['score'])['alpha']
		self.nb = MultiOutputClassifier(MultinomialNB(alpha=best_alpha))
		self.nb.fit(self.X_train, self.y_train)
		
	def predict(self, X):
		return self.nb.predict(X)
	
	def evaluate(self):
		y_pred = self.predict(self.X_test)
		return self.evaluate_(y_pred, self.y_test)
	
	def save_model(self, path):
		joblib.dump(self.nb, path)

	def load_model(self, path):
		self.nb = joblib.load(path)

### SVM (Support Vector Machine)
https://scikit-learn.org/1.5/modules/generated/sklearn.svm.SVC.html

In [14]:
from sklearn.svm import SVC
from sklearn.multioutput import MultiOutputClassifier

class SVM:
    def __init__(self, X, y, evaluate, C_value=1.0):
        self.evaluate_ = evaluate
        self.X_train, self.X_test, self.y_train, self.y_test = train_test_split(X, y, test_size=0.2)
        self.svm = MultiOutputClassifier(SVC(C=C_value), n_jobs=-1)
        self.history_ = []

    def fit(self):
        self.svm.fit(self.X_train, self.y_train)    
        
    def predict(self, X):
        return self.svm.predict(X)
    
    def evaluate(self):
        y_pred = self.predict(self.X_test)
        return self.evaluate_(y_pred, self.y_test)
    
    def save_model(self, path):
        joblib.dump(self.svm, path)

    def load_model(self, path):
        self.svm = joblib.load(path)

### Neural Network (Multi-layer Perceptron)
https://scikit-learn.org/1.5/modules/neural_networks_supervised.html

https://scikit-learn.org/dev/modules/generated/sklearn.neural_network.MLPClassifier.html

In [15]:
from sklearn.neural_network import MLPClassifier

# TODO: need to test

class MLP:
    def __init__(self, X, y, evaluate, max_iter=30, hidden_layer_sizes=(100,)):
        self.evaluate_ = evaluate
        self.X_train, self.X_test, self.y_train, self.y_test = train_test_split(X, y, test_size=0.2)
        self.max_iter = max_iter
        self.hidden_layer_sizes = hidden_layer_sizes
        self.nn = MLPClassifier(hidden_layer_sizes=self.hidden_layer_sizes, max_iter=1, warm_start=True)
        self.history_ = []

    def fit(self):
        for i in range(self.max_iter):
            self.nn.fit(self.X_train, self.y_train)
            
            y_pred = self.nn.predict(self.X_test)
            score = self.evaluate_(y_pred, self.y_test)
            
            self.history_.append({'iteration': i + 1, 'score': score, 'loss': self.nn.loss_})
        
    def predict(self, X):
        return self.nn.predict(X)
    
    def evaluate(self):
        y_pred = self.predict(self.X_test)
        return self.evaluate_(y_pred, self.y_test)
    
    def save_model(self, path):
        joblib.dump(self.nn, path)

    def load_model(self, path):
        self.nn = joblib.load(path)