In [1]:
from sklearn.model_selection import train_test_split

from sklearn.model_selection import GridSearchCV
from sklearn.preprocessing import OneHotEncoder
from sklearn.impute import SimpleImputer

# from sklearn.svm import SVC, LinearSVC
# from sklearn.linear_model import SGDClassifier, RidgeClassifier
from sklearn.ensemble import RandomForestClassifier, AdaBoostClassifier

import pandas as pd
import tensorflow as tf
from tensorflow import keras as k

from utility import *

Init Plugin
Init Graph Optimizer
Init Kernel


In [2]:
data = load_data()
X, y = create_XY(data)

enc = OneHotEncoder(sparse=False)
y = enc.fit_transform(y)
X_imputed = SimpleImputer().fit_transform(X)

In [3]:
x_train, x_test, y_train, y_test = train_test_split(X_imputed, y, test_size=test_size)

# RandomForestClassifier

In [4]:
forest = RandomForestClassifier(n_estimators=2, random_state=2)
forest = forest.fit(x_train, y_train)


#Forest Model Metrics
print("Forest Classifier")
print("Train Score: ", forest.score(x_train, y_train))
print("Test Score: ", forest.score(x_test, y_test))

Forest Classifier
Train Score:  0.6319078947368421
Test Score:  0.22017543859649122


In [5]:
n = 10
m = 10
max_depth = 10
forests = []
grid = [{"n_estimators": list(range(1, n)), "random_state": list(range(0, m)), "max_depth": list(range(1, max_depth))}]
gridSearch = GridSearchCV(RandomForestClassifier(), param_grid=grid, n_jobs=10, return_train_score=True, scoring='accuracy')
gridSearch.fit(x_train, y_train)

In [6]:
print("Forest Classifiers Best Score: ", gridSearch.best_score_)
print("Forest Classifiers Best Params: ", gridSearch.best_params_)
print("Forest Classifiers Best Params: ", gridSearch.best_estimator_)
print("Forest Classifiers Best estimator train evaluation: ", gridSearch.best_estimator_.score(x_train, y_train))
print("Forest Classifiers Best estimator test evaluation: ", gridSearch.best_estimator_.score(x_test, y_test))

Forest Classifiers Best Score:  0.4358552631578947
Forest Classifiers Best Params:  {'max_depth': 3, 'n_estimators': 1, 'random_state': 5}
Forest Classifiers Best Params:  RandomForestClassifier(max_depth=3, n_estimators=1, random_state=5)
Forest Classifiers Best estimator train evaluation:  0.2832236842105263
Forest Classifiers Best estimator test evaluation:  0.2614035087719298


# Neural Network

In [7]:
x_train_nn, x_test_nn, y_train_nn, y_test_nn = train_test_split(X_imputed, y, shuffle=False)

In [13]:
nn = k.models.Sequential([
    k.layers.Flatten(),
    k.layers.Dense(300, activation='relu'),
    k.layers.Dense(100, activation='relu'),
    k.layers.Dropout(0.3),
    k.layers.Dense(3, activation='softmax'),
])

In [14]:
learning_rate=0.01

nn.compile(
    loss='categorical_crossentropy',
    optimizer=k.optimizers.Adam(learning_rate),
    metrics=['accuracy']
)
nn(x_train_nn)
nn.summary()

Model: "sequential_1"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
flatten_1 (Flatten)          (3135, 94)                0         
_________________________________________________________________
dense_3 (Dense)              (3135, 300)               28500     
_________________________________________________________________
dense_4 (Dense)              (3135, 100)               30100     
_________________________________________________________________
dropout_1 (Dropout)          (3135, 100)               0         
_________________________________________________________________
dense_5 (Dense)              (3135, 3)                 303       
Total params: 58,903
Trainable params: 58,903
Non-trainable params: 0
_________________________________________________________________


In [15]:
epochs=500
batch_size=50
callback = tf.keras.callbacks.EarlyStopping(monitor='loss', patience=10)
nn.fit(x_train_nn, y_train_nn, epochs=epochs, batch_size=batch_size, callbacks=[callback])

Epoch 1/500
 5/63 [=>............................] - ETA: 0s - loss: 10.2537 - accuracy: 0.3760

2022-12-22 17:11:05.719339: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:112] Plugin optimizer for device_type GPU is enabled.


Epoch 2/500
Epoch 3/500
Epoch 4/500
Epoch 5/500
Epoch 6/500
Epoch 7/500
Epoch 8/500
Epoch 9/500
Epoch 10/500
Epoch 11/500
Epoch 12/500
Epoch 13/500
Epoch 14/500
Epoch 15/500
Epoch 16/500
Epoch 17/500
Epoch 18/500
Epoch 19/500
Epoch 20/500
Epoch 21/500
Epoch 22/500
Epoch 23/500
Epoch 24/500
Epoch 25/500
Epoch 26/500
Epoch 27/500
Epoch 28/500
Epoch 29/500
Epoch 30/500
Epoch 31/500
Epoch 32/500
Epoch 33/500
Epoch 34/500
Epoch 35/500
Epoch 36/500
Epoch 37/500
Epoch 38/500
Epoch 39/500
Epoch 40/500
Epoch 41/500
Epoch 42/500
Epoch 43/500
Epoch 44/500
Epoch 45/500
Epoch 46/500
Epoch 47/500
Epoch 48/500
Epoch 49/500
Epoch 50/500
Epoch 51/500
Epoch 52/500
Epoch 53/500
Epoch 54/500
Epoch 55/500
Epoch 56/500


<tensorflow.python.keras.callbacks.History at 0x2b6f49a30>

In [16]:
report1(nn, x_train_nn, y_train_nn, "train", enc)
report1(nn, x_test_nn, y_test_nn, "test", enc)

2022-12-22 17:12:01.283442: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:112] Plugin optimizer for device_type GPU is enabled.


TRAIN REPORT
              precision    recall  f1-score   support

           A      0.402     0.905     0.557       896
           D      0.000     0.000     0.000       809
           H      0.735     0.575     0.645      1430

    accuracy                          0.521      3135
   macro avg      0.379     0.493     0.401      3135
weighted avg      0.450     0.521     0.453      3135

--------------------------------------------------
TEST REPORT
              precision    recall  f1-score   support

           A      0.420     0.908     0.575       316
           D      0.000     0.000     0.000       228
           H      0.740     0.535     0.621       501

    accuracy                          0.531      1045
   macro avg      0.387     0.481     0.399      1045
weighted avg      0.482     0.531     0.472      1045

--------------------------------------------------


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
