In [1]:
import pandas as pd 
import numpy as np
import cv2
import time

from sklearn.metrics import confusion_matrix

from sklearn.neighbors import KNeighborsClassifier
from sklearn.svm import SVC
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier, AdaBoostClassifier, GradientBoostingClassifier
from sklearn.naive_bayes import GaussianNB
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis, QuadraticDiscriminantAnalysis
from sklearn.linear_model import LogisticRegression
from sklearn.dummy import DummyClassifier

In [2]:
train = pd.read_csv('train_sift.csv')
test = pd.read_csv('test_sift.csv')

train.drop(columns='Unnamed: 0',inplace = True)
test.drop(columns='Unnamed: 0',inplace = True)

In [3]:
X_train = np.array(train.iloc[:,:-1])
y_train = np.array(train['y'])

X_test = np.array(test.iloc[:,:-1])
y_test = np.array(test['y'])

In [4]:
X_train.shape, y_train.shape, X_test.shape, y_test.shape

((198, 150), (198,), (68, 150), (68,))

In [5]:
def study(model):
    start = time.time()
    
    model.fit(X_train, y_train)
    pred = model.predict(X_test)
    matrix = confusion_matrix(y_test,pred)
    
    # 0 - cup, 1 - nothing, 2 - anticeptik  
    print(matrix)
    print("Type 1 error (cup):" ,(sum(matrix[0]) - matrix[0][0]) / sum(matrix[0]))
    print("Type 1 error (anticeptik):" ,(sum(matrix[2]) - matrix[2][2]) / sum(matrix[2]))
    print("Type 2 error:" ,(sum(matrix[1]) - matrix[1][1]) / sum(matrix[1]))
    print("Accuracy:", (matrix[0][0] + matrix[1][1] + matrix[2][2]) / len(y_test))
    print(str(round(time.time() - start, 5)) + ' sec\n\n')

In [6]:
classifiers = [
    DummyClassifier(),
    SVC(gamma='scale'),
    KNeighborsClassifier(3),
    LogisticRegression(),
    DecisionTreeClassifier(),
    RandomForestClassifier(),
    AdaBoostClassifier(),
    GradientBoostingClassifier(),
    #GaussianNB(),
    #LinearDiscriminantAnalysis(),
    #QuadraticDiscriminantAnalysis(),
]

for clf in classifiers:
    print(clf.__class__.__name__)
    study(clf)

DummyClassifier
[[12  5  7]
 [ 9  1  7]
 [12  3 12]]
Type 1 error (cup): 0.5
Type 1 error (anticeptik): 0.5555555555555556
Type 2 error: 0.9411764705882353
Accuracy: 0.36764705882352944
0.00569 sec


SVC
[[17  0  7]
 [ 3  0 14]
 [ 2  0 25]]
Type 1 error (cup): 0.2916666666666667
Type 1 error (anticeptik): 0.07407407407407407
Type 2 error: 1.0
Accuracy: 0.6176470588235294
0.01413 sec


KNeighborsClassifier
[[17  0  7]
 [ 6  3  8]
 [ 3  0 24]]
Type 1 error (cup): 0.2916666666666667
Type 1 error (anticeptik): 0.1111111111111111
Type 2 error: 0.8235294117647058
Accuracy: 0.6470588235294118
0.00842 sec


LogisticRegression
[[17  1  6]
 [ 1  6 10]
 [ 2  3 22]]
Type 1 error (cup): 0.2916666666666667
Type 1 error (anticeptik): 0.18518518518518517
Type 2 error: 0.6470588235294118
Accuracy: 0.6617647058823529
0.06153 sec


DecisionTreeClassifier
[[19  2  3]
 [ 1 10  6]
 [ 1  7 19]]
Type 1 error (cup): 0.20833333333333334
Type 1 error (anticeptik): 0.2962962962962963
Type 2 error: 0.4117647058823

STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression


[[19  1  4]
 [ 3  2 12]
 [ 3  0 24]]
Type 1 error (cup): 0.20833333333333334
Type 1 error (anticeptik): 0.1111111111111111
Type 2 error: 0.8823529411764706
Accuracy: 0.6617647058823529
0.17407 sec


AdaBoostClassifier
[[17  2  5]
 [ 0  4 13]
 [ 1  6 20]]
Type 1 error (cup): 0.2916666666666667
Type 1 error (anticeptik): 0.25925925925925924
Type 2 error: 0.7647058823529411
Accuracy: 0.6029411764705882
0.10207 sec


GradientBoostingClassifier
[[19  1  4]
 [ 2  7  8]
 [ 1  2 24]]
Type 1 error (cup): 0.20833333333333334
Type 1 error (anticeptik): 0.1111111111111111
Type 2 error: 0.5882352941176471
Accuracy: 0.7352941176470589
0.84625 sec




In [7]:
train = pd.read_csv('train_brisk.csv')
test = pd.read_csv('test_brisk.csv')

train.drop(columns='Unnamed: 0',inplace = True)
test.drop(columns='Unnamed: 0',inplace = True)

X_train = np.array(train.iloc[:,:-1])
y_train = np.array(train['y'])

X_test = np.array(test.iloc[:,:-1])
y_test = np.array(test['y'])

In [8]:
for clf in classifiers:
    print(clf.__class__.__name__)
    study(clf)

DummyClassifier
[[ 9  2 13]
 [ 7  3  7]
 [12  2 13]]
Type 1 error (cup): 0.625
Type 1 error (anticeptik): 0.5185185185185185
Type 2 error: 0.8235294117647058
Accuracy: 0.36764705882352944
0.0029 sec


SVC
[[19  0  5]
 [ 2  0 15]
 [ 7  0 20]]
Type 1 error (cup): 0.20833333333333334
Type 1 error (anticeptik): 0.25925925925925924
Type 2 error: 1.0
Accuracy: 0.5735294117647058
0.01669 sec


KNeighborsClassifier
[[19  0  5]
 [ 2  1 14]
 [ 5  1 21]]
Type 1 error (cup): 0.20833333333333334
Type 1 error (anticeptik): 0.2222222222222222
Type 2 error: 0.9411764705882353
Accuracy: 0.6029411764705882
0.00898 sec


LogisticRegression
[[17  1  6]
 [ 1  6 10]
 [ 1  2 24]]
Type 1 error (cup): 0.2916666666666667
Type 1 error (anticeptik): 0.1111111111111111
Type 2 error: 0.6470588235294118
Accuracy: 0.6911764705882353
0.04086 sec


DecisionTreeClassifier
[[13  2  9]
 [ 1  4 12]
 [ 5  3 19]]
Type 1 error (cup): 0.4583333333333333
Type 1 error (anticeptik): 0.2962962962962963
Type 2 error: 0.764705882352

STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression


[[19  0  5]
 [ 1  1 15]
 [ 7  0 20]]
Type 1 error (cup): 0.20833333333333334
Type 1 error (anticeptik): 0.25925925925925924
Type 2 error: 0.9411764705882353
Accuracy: 0.5882352941176471
0.15924 sec


AdaBoostClassifier
[[17  0  7]
 [ 1  5 11]
 [ 1  1 25]]
Type 1 error (cup): 0.2916666666666667
Type 1 error (anticeptik): 0.07407407407407407
Type 2 error: 0.7058823529411765
Accuracy: 0.6911764705882353
0.11187 sec


GradientBoostingClassifier
[[18  0  6]
 [ 1  2 14]
 [ 4  1 22]]
Type 1 error (cup): 0.25
Type 1 error (anticeptik): 0.18518518518518517
Type 2 error: 0.8823529411764706
Accuracy: 0.6176470588235294
0.93334 sec


