# Импорт.

In [1]:
from Classes.KNumNeighborsClassifier import KNumNeighborsClassifier
from Classes.KNumNeighborsTeacher import KNumNeighborsTeacher
from Classes.NaiveBayesianClassificator import NaiveBayesianClassificator
from Classes.NaiveBayesianTeacher import NaiveBayesianTeacher

from Functions.distance_functions import euclid_distance
from Functions.functions_of_priority import max_count_class
from Functions.spreading_functions import normal_spread
from Functions.metrics import *

from sklearn.metrics import classification_report
from sklearn.model_selection import train_test_split

import pandas as pd
import numpy as np

# Загрузка Dataset'а.

In [2]:
models = pd.read_csv('../Data/DataLaba2/neo_task_smoted.csv')
X_train, X_test, y_train, y_test = train_test_split(models.drop('hazardous', axis=1), models['hazardous'], test_size=0.1, random_state=42)

In [3]:
y_test

30724     0.0
35732     0.0
118948    1.0
76136     0.0
36621     0.0
         ... 
41761     0.0
19297     0.0
149944    1.0
103037    1.0
154557    1.0
Name: hazardous, Length: 16400, dtype: float64

In [4]:
X_train

Unnamed: 0,est_diameter_max,relative_velocity,miss_distance,absolute_magnitude
161658,-0.993405,-0.382161,0.718675,0.034654
46173,-0.999696,-0.956341,-0.981829,0.574468
124370,-0.994114,-0.839733,-0.699950,0.094784
109476,-0.978060,-0.331272,0.952452,-0.182535
106322,-0.989715,-0.730527,0.273584,-0.045514
...,...,...,...,...
119879,-0.981295,-0.635733,-0.532257,-0.153718
103694,-0.984032,-0.714345,0.317797,-0.125175
131932,-0.992591,-0.636823,-0.081927,0.013857
146867,-0.989026,-0.652895,-0.262361,-0.057075


In [5]:
y_test.shape, y_train.shape, X_train.shape

((16400,), (147592,), (147592, 4))

# KNN

In [6]:
knn = KNumNeighborsClassifier(k=5, 
                            distance=euclid_distance, 
                            function_of_priority=max_count_class)
knn_teacher = KNumNeighborsTeacher(classifier=knn)
knn_teacher.teach(np.array(X_train), np.array(y_train))
knn_predict = knn.predict(np.array(X_test))

In [8]:
print(
    f'TP : {TruePositive(knn_predict, y_test)}',
    f'TN : {TrueNegative(knn_predict, y_test)}',
    f'FN : {FalseNegative(knn_predict, y_test)}',
    f'FP : {FalsePositive(knn_predict, y_test)}',
    f'Accuracy : {accuracy(knn_predict, y_test)}',
    f'Specificity : {specificity(knn_predict, y_test)}',
    f'Prcision : {precision(knn_predict, y_test)}',
    f'Recall : {recall(knn_predict, y_test)}',
    f'F1 : {f1(knn_predict, y_test)}',
    sep='\n'
    )

TP : 8148
TN : 6830
FN : 105
FP : 1317
Accuracy : 0.9132926829268293
Specificity : 0.8383454032159077
Prcision : 0.8608557844690967
Recall : 0.9872773536895675
F1 : 0.9197426346088724


In [10]:
print(classification_report(knn_predict, y_test))

              precision    recall  f1-score   support

         0.0       0.84      0.98      0.91      6935
         1.0       0.99      0.86      0.92      9465

    accuracy                           0.91     16400
   macro avg       0.91      0.92      0.91     16400
weighted avg       0.92      0.91      0.91     16400



# Naive Bayesian

In [12]:
nbc = NaiveBayesianClassificator(lambda a: a > 0)
NaiveBayesianTeacher(nbc).teach(np.array(X_train), np.array(y_train), [normal_spread, normal_spread, normal_spread, normal_spread])
nbc_predict = nbc.predict(np.array(X_test))


In [14]:
print(
    f'TP : {TruePositive(nbc_predict, y_test)}',
    f'TN : {TrueNegative(nbc_predict, y_test)}',
    f'FN : {FalseNegative(nbc_predict, y_test)}',
    f'FP : {FalsePositive(nbc_predict, y_test)}',
    f'Accuracy : {accuracy(nbc_predict, y_test)}',
    f'Specificity : {specificity(nbc_predict, y_test)}',
    f'Prcision : {precision(nbc_predict, y_test)}',
    f'Recall : {recall(nbc_predict, y_test)}',
    f'F1 : {f1(nbc_predict, y_test)}',
    sep='\n'
    )

TP : 6954
TN : 6371
FN : 1299
FP : 1776
Accuracy : 0.8125
Specificity : 0.7820056462501535
Prcision : 0.7965635738831615
Recall : 0.8426026899309342
F1 : 0.8189365836424659


In [16]:
print(classification_report(nbc_predict, y_test))

              precision    recall  f1-score   support

       False       0.78      0.83      0.81      7670
        True       0.84      0.80      0.82      8730

    accuracy                           0.81     16400
   macro avg       0.81      0.81      0.81     16400
weighted avg       0.81      0.81      0.81     16400

