In [23]:
import pandas as pd
import numpy as np
import sklearn
from sklearn.model_selection import train_test_split
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report
from sklearn.preprocessing import StandardScaler
import matplotlib.pyplot as plt

In [24]:
networkpatterns = pd.read_csv('network_traffic_data_extended.csv')
networkpatterns.head(10)

Unnamed: 0,timestamp,bandwidth_in (kb),bandwidth_out (kb),jenis_probo,ping,label
0,2024-12-01 00:00:00,1924,509,ftp,28.23,Normal
1,2024-12-01 00:00:15,2386,1779,ftp,74.33,Normal
2,2024-12-01 00:01:10,1524,9774,http,4.15,Flood
3,2024-12-01 00:00:18,3682,3911,https,60.6,Normal
4,2024-12-01 00:02:24,3357,11831,ftp,70.43,Flood
5,2024-12-01 00:02:15,3711,7459,https,28.54,Normal
6,2024-12-01 00:05:36,206,2715,ftp,42.84,Normal
7,2024-12-01 00:02:06,2647,3627,dns,34.32,Normal
8,2024-12-01 00:00:48,6324,1684,icmp,84.9,Normal
9,2024-12-01 00:05:51,4433,811,ftp,46.48,Normal


In [25]:
networkpatterns.tail(10)

Unnamed: 0,timestamp,bandwidth_in (kb),bandwidth_out (kb),jenis_probo,ping,label
740,2024-12-03 00:30:20,5297,7826,tcp,90.32,Normal
741,2024-12-03 01:22:15,643,10540,dns,41.03,Flood
742,2024-12-03 00:09:28,3655,3410,udp,97.51,Normal
743,2024-12-03 01:52:01,7871,5299,udp,55.17,Normal
744,2024-12-03 01:21:36,7754,3004,icmp,8.81,Normal
745,2024-12-03 02:25:00,11347,9177,dns,55.16,Flood
746,2024-12-03 00:46:14,9003,4030,http,22.99,Flood
747,2024-12-03 01:40:27,6534,466,dns,57.67,Normal
748,2024-12-03 00:56:44,8543,4959,icmp,7.1,Flood
749,2024-12-03 02:09:08,4883,8167,tcp,12.9,Flood


In [26]:
networkpatterns.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 750 entries, 0 to 749
Data columns (total 6 columns):
 #   Column              Non-Null Count  Dtype  
---  ------              --------------  -----  
 0   timestamp           750 non-null    object 
 1   bandwidth_in (kb)   750 non-null    int64  
 2   bandwidth_out (kb)  750 non-null    int64  
 3   jenis_probo         750 non-null    object 
 4   ping                750 non-null    float64
 5   label               750 non-null    object 
dtypes: float64(1), int64(2), object(3)
memory usage: 35.3+ KB


In [27]:
networkpatterns['label'] = networkpatterns['label'].map({'Normal': 0, 'Flood': 1}) 
networkpatterns

Unnamed: 0,timestamp,bandwidth_in (kb),bandwidth_out (kb),jenis_probo,ping,label
0,2024-12-01 00:00:00,1924,509,ftp,28.23,0
1,2024-12-01 00:00:15,2386,1779,ftp,74.33,0
2,2024-12-01 00:01:10,1524,9774,http,4.15,1
3,2024-12-01 00:00:18,3682,3911,https,60.60,0
4,2024-12-01 00:02:24,3357,11831,ftp,70.43,1
...,...,...,...,...,...,...
745,2024-12-03 02:25:00,11347,9177,dns,55.16,1
746,2024-12-03 00:46:14,9003,4030,http,22.99,1
747,2024-12-03 01:40:27,6534,466,dns,57.67,0
748,2024-12-03 00:56:44,8543,4959,icmp,7.10,1


In [29]:
# Cek nilai unik dalam label
print("Unique values before cleaning:", networkpatterns['label'].unique())

# Mapping nilai
mapping = {'Normal': 0, 'Flood': 1}
networkpatterns['label'] = networkpatterns['label'].map(mapping)

# Cek nilai setelah mapping
print("Unique values after mapping:", networkpatterns['label'].unique())

# Tangani nilai NaN
if networkpatterns['label'].isna().sum() > 0:
    print("Rows with NaN values in 'label':")
    print(networkpatterns[networkpatterns['label'].isna()])


Unique values before cleaning: [nan]
Unique values after mapping: [nan]
Rows with NaN values in 'label':
               timestamp  bandwidth_in (kb)  bandwidth_out (kb) jenis_probo  \
0    2024-12-01 00:00:00               1924                 509         ftp   
1    2024-12-01 00:00:15               2386                1779         ftp   
2    2024-12-01 00:01:10               1524                9774        http   
3    2024-12-01 00:00:18               3682                3911       https   
4    2024-12-01 00:02:24               3357               11831         ftp   
..                   ...                ...                 ...         ...   
745  2024-12-03 02:25:00              11347                9177         dns   
746  2024-12-03 00:46:14               9003                4030        http   
747  2024-12-03 01:40:27               6534                 466         dns   
748  2024-12-03 00:56:44               8543                4959        icmp   
749  2024-12-03 02:09:08  