In [0]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [0]:
import numpy as np
import pandas as pd
import cv2
from time import time
from sklearn.model_selection import train_test_split
from scipy.sparse import csr_matrix
from sklearn.metrics import f1_score, confusion_matrix, accuracy_score
from sklearn.linear_model import LogisticRegression
from sklearn.naive_bayes import BernoulliNB, ComplementNB, MultinomialNB
from sklearn.tree import DecisionTreeClassifier
from sklearn.svm import LinearSVC
from sklearn.ensemble import VotingClassifier, RandomForestClassifier
from sklearn import model_selection

TRAIN_PATH = '/content/drive/My Drive/comp551/data/train_max_x'
TARGETS_PATH = '/content/drive/My Drive/comp551/data/train_max_y.csv'
THRESH = 240

print('Standard models:')
# load images as a numpy array
train_dataset = np.array(np.load(TRAIN_PATH, allow_pickle=True))
train_dataset = np.array([cv2.threshold(i, THRESH, 255, cv2.THRESH_BINARY)[1] for i in train_dataset])
train_dataset = train_dataset / 255.0

nsamples, nx, ny = train_dataset.shape
train_dataset = train_dataset.reshape((nsamples,nx*ny)) # reshape the array into a 2d

targets = pd.read_csv(TARGETS_PATH, delimiter=',', skipinitialspace=True)
targets = targets.to_numpy()
# remove id column
targets = targets[:, 1]
targets = targets.astype(int)

X_train, X_test, y_train, y_test = train_test_split(train_dataset, targets, test_size=0.2, random_state=42)
X_train = csr_matrix(X_train)
X_test = csr_matrix(X_test)
# Clean memory
train_dataset = None
targets = None

LR = LogisticRegression(random_state=0, solver='saga', multi_class='multinomial').fit(X_train, y_train)
y_pred = LR.predict(X_test)
print("Logistic Regression")
print(confusion_matrix(y_test, y_pred))
print("Accuracy = ", accuracy_score(y_test, y_pred))
print("f1_score = ", f1_score(y_test, y_pred, average='micro'))
print("-------------------------------")

MB = MultinomialNB(alpha=0.4, fit_prior=True, class_prior=None).fit(X_train, y_train)
y_pred = MB.predict(X_test)
print("Multinomial NB")
print(confusion_matrix(y_test, y_pred))
print("Accuracy = ", accuracy_score(y_test, y_pred))
print("f1_score = ", f1_score(y_test, y_pred, average='micro'))
print("-------------------------------")

CNB = ComplementNB(alpha=4.0, class_prior=None, fit_prior=True, norm=False).fit(X_train, y_train)
y_pred = CNB.predict(X_test)
print("Multinomial NB")
print(confusion_matrix(y_test, y_pred))
print("Accuracy = ", accuracy_score(y_test, y_pred))
print("f1_score = ", f1_score(y_test, y_pred, average='micro'))
print("-------------------------------")

RF = RandomForestClassifier(n_estimators=100).fit(X_train, y_train)
y_pred = RF.predict(X_test)
print("Random Forest")
print(confusion_matrix(y_test, y_pred))
print("Accuracy = ", accuracy_score(y_test, y_pred))
print("f1_score = ", f1_score(y_test, y_pred, average='micro'))
print("-------------------------------")

LSVC = LinearSVC().fit(X_train, y_train)
y_pred = LSVC.predict(X_test)
print("Linear SVC")
print(confusion_matrix(y_test, y_pred))
print("Accuracy = ", accuracy_score(y_test, y_pred))
print("f1_score = ", f1_score(y_test, y_pred, average='micro'))
print("-------------------------------")

models = [
        ('RF', RandomForestClassifier(n_estimators=100)),
        ('LR', LogisticRegression(random_state=0, solver='saga', multi_class='multinomial')),
        ('CNB', ComplementNB(alpha=4.0, class_prior=None, fit_prior=True, norm=False)),
        ('MNB', MultinomialNB(alpha=0.4, fit_prior=True, class_prior=None))
    ]
ensemble = VotingClassifier(models).ensemble.fit(X, y)
y_pred = ensemble.predict(X_test)
print('Ensemble of RF, LR, NB, MNB')
print(confusion_matrix(y_test, y_pred))
print("Accuracy = ", accuracy_score(y_test, y_pred))
print("f1_score = ", f1_score(y_test, y_pred, average='micro'))
print("-------------------------------")

Standard models:




Logistic Regression
[[  0   0   0   0   2   1   1   1   2   3]
 [  0   1   0   2   2   6  11  11  22  20]
 [  0   0   1   7   7   8  19  56  55  57]
 [  0   1   4   5  14  29  68  80  91 127]
 [  0   0   4  13  33  48  78 107 158 210]
 [  0   2   9  16  37  47 101 164 192 255]
 [  0   3  14  23  47  82 156 236 295 354]
 [  0   3  11  61  62 120 221 308 448 536]
 [  0   0  15  51  97 140 223 404 526 684]
 [  0   3  21  62 109 202 309 500 651 835]]
Accuracy =  0.1912
f1_score =  0.1912
-------------------------------
Multinomial NB
[[  0   0   0   1   0   1   1   2   3   2]
 [  5   8  11   8   8   5   9   8  11   2]
 [ 13  15  21  22  19  29  33  24  19  15]
 [ 22  29  47  58  54  32  62  49  37  29]
 [ 22  62  60  81  85  57 123  76  43  42]
 [ 39  70  97  85 109  83 110  84  79  67]
 [ 38 119 107 131 143 131 199 125 107 110]
 [ 92 156 187 200 218 151 247 202 154 163]
 [108 179 219 212 291 211 332 244 160 184]
 [116 239 266 262 361 274 367 301 228 278]]
Accuracy =  0.1094
f1_score =  0.



Linear SVC
[[  0   0   1   1   2   1   1   2   1   1]
 [  2   2   1   2   2   4  17  12  20  13]
 [  0   2   2  11  11  18  26  49  45  46]
 [  0   3   6  10  21  52  69  83  87  88]
 [  1   3  15  30  42  66  87 118 137 152]
 [  1   7  18  21  66  70 119 164 165 192]
 [  2  11  31  24  80 115 176 273 250 248]
 [  4  11  35  64 111 174 266 321 380 404]
 [  1  12  31  78 140 211 333 378 454 502]
 [  4  17  33  94 180 249 411 512 574 618]]
Accuracy =  0.1695
f1_score =  0.1695
-------------------------------


AttributeError: ignored