In [6]:
import pandas as pd 
import numpy as np
import cv2
import time

from sklearn.metrics import confusion_matrix

from sklearn.neighbors import KNeighborsClassifier
from sklearn.svm import SVC
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier, AdaBoostClassifier, GradientBoostingClassifier
from sklearn.naive_bayes import GaussianNB
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis, QuadraticDiscriminantAnalysis
from sklearn.linear_model import LogisticRegression
from sklearn.dummy import DummyClassifier

In [7]:
train = pd.read_csv('train_sift.csv')
test = pd.read_csv('test_sift.csv')

train.drop(columns='Unnamed: 0',inplace = True)
test.drop(columns='Unnamed: 0',inplace = True)

In [8]:
X_train = np.array(train.iloc[:,:-1])
y_train = np.array(train['y'])

X_test = np.array(test.iloc[:,:-1])
y_test = np.array(test['y'])

In [9]:
X_train.shape, y_train.shape, X_test.shape, y_test.shape

((198, 150), (198,), (150, 150), (150,))

In [10]:
def study(model):
    start = time.time()
    
    model.fit(X_train, y_train)
    pred = model.predict(X_test)
    matrix = confusion_matrix(y_test,pred)
    
    # 0 - cup, 1 - nothing, 2 - anticeptik  
    print(matrix)
    print("Type 1 error (cup):" ,(sum(matrix[0]) - matrix[0][0]) / sum(matrix[0]))
    print("Type 1 error (anticeptik):" ,(sum(matrix[2]) - matrix[2][2]) / sum(matrix[2]))
    print("Type 2 error:" ,(sum(matrix[1]) - matrix[1][1]) / sum(matrix[1]))
    print("Accuracy:", (matrix[0][0] + matrix[1][1] + matrix[2][2]) / len(y_test))
    print(str(round(time.time() - start, 5)) + ' sec\n\n')

In [20]:
classifiers = [
    DummyClassifier(),
    SVC(gamma='scale'),
    KNeighborsClassifier(3),
    LogisticRegression(),
    DecisionTreeClassifier(),
    RandomForestClassifier(),
    AdaBoostClassifier(),
    GradientBoostingClassifier(),
    #GaussianNB(),
    #LinearDiscriminantAnalysis(),
    #QuadraticDiscriminantAnalysis(),
]

for clf in classifiers:
    print(clf.__class__.__name__)
    study(clf)

DummyClassifier
[[22  9 19]
 [20  7 23]
 [23  4 23]]
Type 1 error (cup): 0.56
Type 1 error (anticeptik): 0.54
Type 2 error: 0.86
Accuracy: 0.3466666666666667
0.00416 sec


SVC
[[42  0  8]
 [ 8  0 42]
 [ 3  0 47]]
Type 1 error (cup): 0.16
Type 1 error (anticeptik): 0.06
Type 2 error: 1.0
Accuracy: 0.5933333333333334
0.01876 sec


KNeighborsClassifier
[[41  1  8]
 [12  3 35]
 [ 5  0 45]]
Type 1 error (cup): 0.18
Type 1 error (anticeptik): 0.1
Type 2 error: 0.94
Accuracy: 0.5933333333333334
0.01871 sec


LogisticRegression
[[39  5  6]
 [ 4 13 33]
 [ 3  5 42]]
Type 1 error (cup): 0.22
Type 1 error (anticeptik): 0.16
Type 2 error: 0.74
Accuracy: 0.6266666666666667
0.05324 sec


DecisionTreeClassifier
[[42  1  7]
 [ 5 25 20]
 [ 2 10 38]]
Type 1 error (cup): 0.16
Type 1 error (anticeptik): 0.24
Type 2 error: 0.5
Accuracy: 0.7
0.00651 sec


RandomForestClassifier


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression


[[45  0  5]
 [ 4  5 41]
 [ 3  0 47]]
Type 1 error (cup): 0.1
Type 1 error (anticeptik): 0.06
Type 2 error: 0.9
Accuracy: 0.6466666666666666
0.16495 sec


AdaBoostClassifier
[[42  2  6]
 [ 1 12 37]
 [ 2  9 39]]
Type 1 error (cup): 0.16
Type 1 error (anticeptik): 0.22
Type 2 error: 0.76
Accuracy: 0.62
0.10571 sec


GradientBoostingClassifier
[[45  1  4]
 [ 5 16 29]
 [ 1  2 47]]
Type 1 error (cup): 0.1
Type 1 error (anticeptik): 0.06
Type 2 error: 0.68
Accuracy: 0.72
0.86954 sec




In [21]:
train = pd.read_csv('train_brisk.csv')
test = pd.read_csv('test_brisk.csv')

train.drop(columns='Unnamed: 0',inplace = True)
test.drop(columns='Unnamed: 0',inplace = True)

X_train = np.array(train.iloc[:,:-1])
y_train = np.array(train['y'])

X_test = np.array(test.iloc[:,:-1])
y_test = np.array(test['y'])

In [24]:
for clf in classifiers:
    print(clf.__class__.__name__)
    study(clf)

DummyClassifier
[[19  7 24]
 [18  4 28]
 [24 12 14]]
Type 1 error (cup): 0.62
Type 1 error (anticeptik): 0.72
Type 2 error: 0.92
Accuracy: 0.24666666666666667
0.00313 sec


SVC
[[45  0  5]
 [ 8  0 42]
 [10  0 40]]
Type 1 error (cup): 0.1
Type 1 error (anticeptik): 0.2
Type 2 error: 1.0
Accuracy: 0.5666666666666667
0.02145 sec


KNeighborsClassifier
[[45  0  5]
 [ 4  8 38]
 [ 5  1 44]]
Type 1 error (cup): 0.1
Type 1 error (anticeptik): 0.12
Type 2 error: 0.84
Accuracy: 0.6466666666666666
0.01411 sec


LogisticRegression
[[42  1  7]
 [ 5 14 31]
 [ 4  3 43]]
Type 1 error (cup): 0.16
Type 1 error (anticeptik): 0.14
Type 2 error: 0.72
Accuracy: 0.66
0.04109 sec


DecisionTreeClassifier
[[39  1 10]
 [ 4  8 38]
 [ 6  6 38]]
Type 1 error (cup): 0.22
Type 1 error (anticeptik): 0.24
Type 2 error: 0.84
Accuracy: 0.5666666666666667
0.00713 sec


RandomForestClassifier


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression


[[44  0  6]
 [ 1  1 48]
 [ 8  0 42]]
Type 1 error (cup): 0.12
Type 1 error (anticeptik): 0.16
Type 2 error: 0.98
Accuracy: 0.58
0.16306 sec


AdaBoostClassifier
[[40  0 10]
 [ 1 11 38]
 [ 2  2 46]]
Type 1 error (cup): 0.2
Type 1 error (anticeptik): 0.08
Type 2 error: 0.78
Accuracy: 0.6466666666666666
0.10766 sec


GradientBoostingClassifier
[[44  1  5]
 [ 3  6 41]
 [ 5  2 43]]
Type 1 error (cup): 0.12
Type 1 error (anticeptik): 0.14
Type 2 error: 0.88
Accuracy: 0.62
0.90281 sec


