In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import classification_report, confusion_matrix, accuracy_score

In [2]:
file_names = ['at1.csv', 'at2.csv', 'at4.csv', 'at8.csv', 'at16.csv']
dataset_path = "dataset/" 

In [3]:
data = pd.concat([pd.read_csv(dataset_path + file) for file in file_names], ignore_index=True)

In [4]:
print("Dataset Info:")
data.info()

Dataset Info:
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 512434 entries, 0 to 512433
Data columns (total 18 columns):
 #   Column        Non-Null Count   Dtype  
---  ------        --------------   -----  
 0   sendtime_1    512434 non-null  float64
 1   sender_1      512434 non-null  float64
 2   messageID     512434 non-null  float64
 3   pos-x1        512434 non-null  float64
 4   pos-y1        512434 non-null  float64
 5   pos-z1        512434 non-null  float64
 6   spd-x1        512434 non-null  float64
 7   spd-y1        512434 non-null  float64
 8   spd-z1        512434 non-null  float64
 9   AttackerType  512434 non-null  int64  
 10  sendtime_2    512434 non-null  float64
 11  sender_2      512434 non-null  float64
 12  pos-x2        512434 non-null  float64
 13  pos-y2        512434 non-null  float64
 14  pos-z2        512434 non-null  float64
 15  spd-x2        512434 non-null  float64
 16  spd-y2        512434 non-null  float64
 17  spd-z2        512434 non-null  flo

In [5]:
print("\nDataset Head:")
print(data.head())


Dataset Head:
     sendtime_1  sender_1  messageID       pos-x1       pos-y1  pos-z1  \
0  25200.437587       7.0    26096.0  3588.822555  5912.419649   1.895   
1  25201.437587       7.0    91142.0  3590.480099  5942.298376   1.895   
2  25202.437587       7.0   155702.0  3592.700818  5972.122750   1.895   
3  25203.437587       7.0   221221.0  3594.920543  6001.933773   1.895   
4  25204.437587       7.0   285613.0  3597.878662  6031.726375   1.895   

     spd-x1     spd-y1  spd-z1  AttackerType    sendtime_2  sender_2  \
0  1.277829  30.012756     0.0             0  25201.437587       7.0   
1  2.228684  29.931341     0.0             0  25202.437587       7.0   
2  2.224379  29.873523     0.0             0  25203.437587       7.0   
3  2.226374  29.900320     0.0             0  25204.437587       7.0   
4  3.156679  29.892922     0.0             0  25205.437587       7.0   

        pos-x2       pos-y2  pos-z2    spd-x2     spd-y2  spd-z2  
0  3590.480099  5942.298376   1.895  2.2

In [6]:
print("\nDescriptive Statistics:")
print(data.describe())


Descriptive Statistics:
          sendtime_1       sender_1     messageID         pos-x1  \
count  512434.000000  512434.000000  5.124340e+05  512434.000000   
mean    25249.552158    1218.164505  3.084429e+06    5026.993652   
std        28.552058     704.023988  1.799226e+06    1485.275792   
min     25200.000055       7.000000  9.680000e+02       0.258571   
25%     25224.766982     631.000000  1.517130e+06    3852.798057   
50%     25249.686924    1147.000000  3.081699e+06    5143.945281   
75%     25274.081683    1753.000000  4.647444e+06    6051.056013   
max     25298.999854    3097.000000  6.474055e+06   13639.479228   

              pos-y1        pos-z1         spd-x1         spd-y1    spd-z1  \
count  512434.000000  5.124340e+05  512434.000000  512434.000000  512434.0   
mean     5579.264790  1.895000e+00       1.428893      -0.555232       0.0   
std       847.617694  1.110224e-15      10.280654      13.823938       0.0   
min         0.844663  1.895000e+00     -37.688940 

In [7]:
print("\nMissing Values:")
print(data.isnull().sum())


Missing Values:
sendtime_1      0
sender_1        0
messageID       0
pos-x1          0
pos-y1          0
pos-z1          0
spd-x1          0
spd-y1          0
spd-z1          0
AttackerType    0
sendtime_2      0
sender_2        0
pos-x2          0
pos-y2          0
pos-z2          0
spd-x2          0
spd-y2          0
spd-z2          0
dtype: int64


In [None]:
data.drop(columns=[])

In [8]:
features = ['sendtime_1', 'sender_1', 'pos-x1', 'pos-y1', 'spd-x1', 'spd-y1',
            'sendtime_2', 'sender_2', 'pos-x2', 'pos-y2', 'spd-x2', 'spd-y2']
target = 'AttackerType'

X = data[features]
y = data[target]

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

In [9]:
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

In [10]:
k = 6
knn = KNeighborsClassifier(n_neighbors=k)
knn.fit(X_train_scaled, y_train)

In [11]:
# Model Evaluation
y_pred = knn.predict(X_test_scaled)

print("\nConfusion Matrix:")
print(confusion_matrix(y_test, y_pred))

print("\nClassification Report:")
print(classification_report(y_test, y_pred))

print("\nAccuracy Score:")
print(accuracy_score(y_test, y_pred))



Confusion Matrix:
[[108854     20     16      0      7     39]
 [   445   8743      0      0      3     18]
 [  1277      9   7716      0     27     89]
 [    26     17     26   8928     90     12]
 [  5710    149    954      0   1195    718]
 [  3384    163    150      0     50   4896]]

Classification Report:
              precision    recall  f1-score   support

           0       0.91      1.00      0.95    108936
           1       0.96      0.95      0.95      9209
           2       0.87      0.85      0.86      9118
           4       1.00      0.98      0.99      9099
           8       0.87      0.14      0.24      8726
          16       0.85      0.57      0.68      8643

    accuracy                           0.91    153731
   macro avg       0.91      0.75      0.78    153731
weighted avg       0.91      0.91      0.89    153731


Accuracy Score:
0.9128412616843707
