In [1]:
from Classes.KNumNeighborsClassifier import KNumNeighborsClassifier
from Classes.KNumNeighborsTeacher import KNumNeighborsTeacher
from Classes.NaiveBayesianClassificator import NaiveBayesianClassificator
from Classes.NaiveBayesianTeacher import NaiveBayesianTeacher

from Functions.distance_functions import euclid_distance
from Functions.functions_of_priority import max_count_class
from Functions.spreading_functions import normal_spread
from Functions.metrics import *


from sklearn.model_selection import train_test_split
import pandas as pd
import numpy as np

In [2]:
models = pd.read_csv('../Data/neo_task_smoted.csv')
X_train, X_test, y_train, y_test = train_test_split(models.drop('hazardous', axis=1), models['hazardous'], test_size=0.1, random_state=42)

In [3]:
y_test

30724     0.0
35732     0.0
118948    1.0
76136     0.0
36621     0.0
         ... 
41761     0.0
19297     0.0
149944    1.0
103037    1.0
154557    1.0
Name: hazardous, Length: 16400, dtype: float64

In [4]:
X_train

Unnamed: 0,est_diameter_max,relative_velocity,miss_distance,absolute_magnitude
161658,-0.993405,-0.382161,0.718675,0.034654
46173,-0.999696,-0.956341,-0.981829,0.574468
124370,-0.994114,-0.839733,-0.699950,0.094784
109476,-0.978060,-0.331272,0.952452,-0.182535
106322,-0.989715,-0.730527,0.273584,-0.045514
...,...,...,...,...
119879,-0.981295,-0.635733,-0.532257,-0.153718
103694,-0.984032,-0.714345,0.317797,-0.125175
131932,-0.992591,-0.636823,-0.081927,0.013857
146867,-0.989026,-0.652895,-0.262361,-0.057075


In [5]:
y_test.shape, y_train.shape, X_train.shape

((16400,), (147592,), (147592, 4))

In [6]:
x = KNumNeighborsClassifier(5, euclid_distance, max_count_class)
KNumNeighborsTeacher(x).teach(np.array(X_train), np.array(y_train))

In [7]:
x.predict(np.array(X_test[0:100]))

array([1., 0., 1., 0., 0., 1., 1., 0., 0., 0., 1., 1., 0., 1., 1., 0., 0.,
       0., 1., 0., 0., 0., 1., 1., 0., 1., 1., 1., 0., 0., 1., 1., 1., 1.,
       1., 0., 1., 1., 0., 1., 1., 0., 1., 1., 1., 1., 1., 0., 0., 0., 0.,
       0., 1., 1., 0., 1., 1., 1., 1., 1., 0., 1., 0., 1., 0., 1., 1., 0.,
       1., 1., 1., 0., 1., 1., 0., 1., 1., 0., 1., 0., 0., 0., 0., 1., 1.,
       1., 0., 1., 0., 0., 0., 0., 0., 1., 1., 1., 1., 1., 0., 1.])

In [8]:
print(
    f'TP : {TruePositive(x.predict(np.array(X_test[0:100])), y_test[0:100])}',
    f'TN : {TrueNegative(x.predict(np.array(X_test[0:100])), y_test[0:100])}',
    f'FN : {FalseNegative(x.predict(np.array(X_test[0:100])), y_test[0:100])}',
    f'FP : {FalsePositive(x.predict(np.array(X_test[0:100])), y_test[0:100])}',
    f'Accuracy : {accuracy(x.predict(np.array(X_test[0:100])), y_test[0:100])}',
    f'Specificity : {specificity(x.predict(np.array(X_test[0:100])), y_test[0:100])}',
    f'Prcision : {precision(x.predict(np.array(X_test[0:100])), y_test[0:100])}',
    f'Recall : {recall(x.predict(np.array(X_test[0:100])), y_test[0:100])}',
    sep='\n'
    )

TP : 47
TN : 43
FN : 0
FP : 10
Accuracy : 0.9
Specificity : 0.8113207547169812
Prcision : 0.8245614035087719
Recall : 1.0


In [9]:
k = NaiveBayesianClassificator(lambda a: a > 0)
NaiveBayesianTeacher(k).teach(np.array(X_train), np.array(y_train), [normal_spread, normal_spread, normal_spread, normal_spread])


In [10]:
print(
    f'TP : {TruePositive(k.predict(np.array(X_test[0:100])), y_test[0:100])}',
    f'TN : {TrueNegative(k.predict(np.array(X_test[0:100])), y_test[0:100])}',
    f'FN : {FalseNegative(k.predict(np.array(X_test[0:100])), y_test[0:100])}',
    f'FP : {FalsePositive(k.predict(np.array(X_test[0:100])), y_test[0:100])}',
    f'Accuracy : {accuracy(k.predict(np.array(X_test[0:100])), y_test[0:100])}',
    f'Specificity : {specificity(k.predict(np.array(X_test[0:100])), y_test[0:100])}',
    f'Prcision : {precision(k.predict(np.array(X_test[0:100])), y_test[0:100])}',
    f'Recall : {recall(k.predict(np.array(X_test[0:100])), y_test[0:100])}',
    sep='\n'
    )

TP : 41
TN : 39
FN : 6
FP : 14
Accuracy : 0.8
Specificity : 0.7358490566037735
Prcision : 0.7454545454545455
Recall : 0.8723404255319149
