In [1]:
import os
import pandas as pd
import numpy as np

In [2]:
SAMPLE_SIZE = 200000

def clean_columns(df):
    df.columns = df.columns.str.strip()
    return df

In [25]:
print("PortScan Verisi Yükleniyor...")

df_portscan = pd.read_csv("../data/CICIDS_2017/PortScan.csv")
df_portscan = clean_columns(df_portscan)

# portscan saldırılarını topluyoruz
ps_attacks = df_portscan[df_portscan['Label'] == 'PortScan'].copy()
# normal trafiği de alıyoruz
ps_benign = df_portscan[df_portscan['Label'] == 'BENIGN'].copy()
ps_benign['Label'] = 'Benign'

if len(ps_attacks) > SAMPLE_SIZE:
    ps_attacks = ps_attacks.sample(n = SAMPLE_SIZE, random_state = 0)
else:
    print(f"Örneklem almak için yeterli veri bulunamadı maksimum şu kadar veri alınıyor: {len(ps_attacks)}")

print(f"PortScan Verileri Hazır -> Toplam {len(ps_attacks)} satır.")

PortScan Verisi Yükleniyor...
Örneklem almak için yeterli veri bulunamadı maksimum şu kadar veri alınıyor: 158930
PortScan Verileri Hazır -> Toplam 158930 satır.


In [26]:
print("BruteForce Verisi Yükleniyor...")

df_bruteforce_ssh = pd.read_csv("../data/CICIDS_2018/BruteForce_SSH.csv")
df_bruteforce_ftp = pd.read_csv("../data/CICIDS_2018/BruteForce_FTP.csv")

df_bruteforce_ssh = clean_columns(df_bruteforce_ssh)
df_bruteforce_ftp = clean_columns(df_bruteforce_ftp)

df_bruteforce_raw = pd.concat([df_bruteforce_ssh, df_bruteforce_ftp])

bf_attacks = df_bruteforce_raw[df_bruteforce_raw['Label'].isin(['FTP-BruteForce', 'SSH-Bruteforce'])].copy()
bf_benign = df_bruteforce_raw[df_bruteforce_raw['Label'] == 'Benign'].copy()
bf_benign['Label'] = 'Benign'

if len(bf_attacks) > SAMPLE_SIZE:
    bf_attacks = bf_attacks.sample(n = SAMPLE_SIZE, random_state = 0)
else:
    print(f"Örneklem almak için yeterli veri bulunamadı!\nMaksimum şu kadar veri alınıyor: {len(bf_attacks)}")

print(f"BruteForce Verileri Hazır -> Toplam {len(bf_attacks)} satır.")

BruteForce Verisi Yükleniyor...
BruteForce Verileri Hazır -> Toplam 200000 satır.


In [46]:
print("DDoS Verisi Yükleniyor...")

ddos_file_path = "../data/Hybrid_IDS2017_2018_2019/final_dataset.csv"

# RAMi patlatmak istemiyorum :)
chunk_size = 100000
ddos_chunks = []
benign_chunks = []

for chunk in pd.read_csv(ddos_file_path, chunksize=chunk_size, low_memory=False):
    chunk = clean_columns(chunk)
    
    filtered_chunk = chunk[chunk['Label'] == 'ddos']
    filtered_benign = chunk[chunk['Label'] == 'Benign']
    
    if ((not (filtered_chunk.empty)) and (len(filtered_chunk)) <= SAMPLE_SIZE):
        ddos_chunks.append(filtered_chunk)

    if ((not (filtered_benign.empty)) and (len(filtered_benign)) <= SAMPLE_SIZE):
        benign_chunks.append(filtered_benign)

    if (len(ddos_chunks) > SAMPLE_SIZE * 3) and (len(benign_chunks) > SAMPLE_SIZE * 3):
        break

print("Parçalar birleştiriliyor...")

if ddos_chunks:
    ddos_attacks = pd.concat(ddos_chunks, axis=0)
    
    if len(ddos_attacks) > SAMPLE_SIZE:
        ddos_attacks = ddos_attacks.sample(n = SAMPLE_SIZE, random_state = 0)
    else:
        print(f"Uyarı: İstenen sayıya ulaşılamadı. Toplam DDoS verisi: {len(ddos_attacks)}")
        
    print(f"DDoS Verileri Hazır -> Final Boyut: {len(ddos_attacks)} satır.")
else:
    print("Hata: Hiç DDoS verisi bulunamadı!")

if benign_chunks:
    ddos_benign = pd.concat(benign_chunks, axis = 0)
    if len(benign_chunks) > SAMPLE_SIZE:
        ddos_benign = ddos_benign.sample(n = SAMPLE_SIZE, random_state = 0)
    else:
        print(f"Uyarı: İstenen sayıya ulaşılamadı. Toplam Benign verisi: {len(ddos_benign)}")

    print(f"Benign Verileri Hazır -> Final Boyut: {len(ddos_benign)} satır.")
else:
    print("Hata: Hiç Benign verisi bulunamadı!")

DDoS Verisi Yükleniyor...
Parçalar birleştiriliyor...
DDoS Verileri Hazır -> Final Boyut: 200000 satır.
Uyarı: İstenen sayıya ulaşılamadı. Toplam Benign verisi: 6321980
Benign Verileri Hazır -> Final Boyut: 6321980 satır.


In [29]:
print(f"PortScan Benign Verisi: {len(ps_benign)}")
print(f"BruteForce Bening Verisi: {len(bf_benign)}")

PortScan Benign Verisi: 127537
BruteForce Bening Verisi: 907680


In [30]:
"""
print("Normal Trafik Verisi Hazırlanıyor...")

all_benign = pd.concat([ps_benign, bf_benign])

if len(all_benign) > SAMPLE_SIZE:
    all_benign = all_benign.sample(n = SAMPLE_SIZE, random_state = 0)

print(f"Normal Trafik Verisi Hazır -> Toplam {len(all_benign)} satır.")
"""

Normal Trafik Verisi Hazırlanıyor...
Normal Trafik Verisi Hazır -> Toplam 200000 satır.


In [32]:
all_benign['Label'].value_counts()
print(f"all_bening sütun sayısı: {len(all_benign.columns)}")

all_bening sütun sayısı: 130


In [33]:
all_benign['Label'] = 0 # Benign değerler 0 olarak kullanılacak
ddos_attacks['Label'] = 1 # DDoS 1 numaralı saldırı olarak kullanılacak
ps_attacks['Label'] = 2 # PortScan 2 numaralı saldırı olarak kullanılacak
bf_attacks['Label'] = 3 # BruteForce 3 numaralı saldırı olarak kullanılacak

In [34]:
print("Birleştirme İşlemine Geçiliyor...")

# Ortak sütunları bulmalıyız
common_cols = list(set(ps_attacks.columns) & set(bf_attacks.columns) & set(ddos_attacks.columns))

# Label sütununu en sona almak için listeden çıkarıp sona ekliyoruz
if 'Label' in common_cols:
    common_cols.remove('Label')
common_cols.append('Label')

# Tüm parçaları birleştiriyorum
final_df = pd.concat([
    all_benign[common_cols],
    ddos_attacks[common_cols],
    ps_attacks[common_cols],
    bf_attacks[common_cols]
], axis=0)

# Veri setini karıştırıyoruz
final_df = final_df.sample(frac = 1, random_state = 0).reset_index(drop = True)

# Sonsuz ve NaN değerleri temizliyoruz
final_df.replace([np.inf, -np.inf], np.nan, inplace=True)
final_df.dropna(inplace = True)

print("İşlem Tamamlandı!")
print(f"Final Veri Seti Boyutu: {final_df.shape}")
print(final_df['Label'].value_counts())

final_df.to_csv("../data/final_hybrid_data/final_dataset.csv", index = False)

Birleştirme İşlemine Geçiliyor...
İşlem Tamamlandı!
Final Veri Seti Boyutu: (758930, 28)
Label
0    200000
1    200000
3    200000
2    158930
Name: count, dtype: int64


In [35]:
final_df.head(10)

Unnamed: 0,Active Mean,Bwd IAT Max,Active Max,Flow Duration,Fwd PSH Flags,Idle Max,Bwd IAT Std,Fwd IAT Mean,Flow IAT Min,Idle Std,...,CWE Flag Count,Idle Mean,Active Min,Bwd IAT Mean,Active Std,Fwd IAT Min,Flow IAT Mean,Down/Up Ratio,Fwd IAT Max,Label
0,0.0,0.0,0.0,8,0,0.0,0.0,0.0,8.0,0.0,...,0,0.0,0.0,0.0,0.0,0.0,8.0,1.0,0.0,2
1,0.0,0.0,0.0,36,0,0.0,0.0,36.0,36.0,0.0,...,0,0.0,0.0,0.0,0.0,36.0,36.0,0.0,36.0,0
2,0.0,1527391.0,0.0,1527799,0,0.0,881723.0,184.5,2.0,0.0,...,0,0.0,0.0,509265.0,0.0,2.0,254633.1667,1.0,367.0,1
3,0.0,0.0,0.0,42,0,0.0,0.0,0.0,42.0,0.0,...,0,0.0,0.0,0.0,0.0,0.0,42.0,1.0,0.0,2
4,0.0,0.0,0.0,841,0,0.0,0.0,0.0,841.0,0.0,...,0,0.0,0.0,0.0,0.0,0.0,841.0,1.0,0.0,0
5,0.0,5362945.0,0.0,5452581,0,5362945.0,3079790.0,29876.666667,6.0,0.0,...,0,5362945.0,0.0,1806863.0,0.0,48.0,778940.142857,1.0,57644.0,1
6,0.0,0.0,0.0,201110,0,0.0,0.0,100555.0,1.0,0.0,...,0,0.0,0.0,0.0,0.0,1.0,67036.666667,0.0,201109.0,0
7,0.0,0.0,0.0,17,0,0.0,0.0,0.0,17.0,0.0,...,0,0.0,0.0,0.0,0.0,0.0,17.0,1.0,0.0,3
8,0.0,4231805.0,0.0,4332296,0,0.0,2423467.0,33495.0,6.0,0.0,...,0,0.0,0.0,1433461.0,0.0,29.0,618899.428571,1.0,68591.0,1
9,0.0,0.0,0.0,1770,0,0.0,0.0,0.0,1770.0,0.0,...,0,0.0,0.0,0.0,0.0,0.0,1770.0,1.0,0.0,1


In [36]:
ddos_attacks.columns

Index(['Unnamed: 0', 'Flow ID', 'Src IP', 'Src Port', 'Dst IP', 'Dst Port',
       'Protocol', 'Timestamp', 'Flow Duration', 'Tot Fwd Pkts',
       'Tot Bwd Pkts', 'TotLen Fwd Pkts', 'TotLen Bwd Pkts', 'Fwd Pkt Len Max',
       'Fwd Pkt Len Min', 'Fwd Pkt Len Mean', 'Fwd Pkt Len Std',
       'Bwd Pkt Len Max', 'Bwd Pkt Len Min', 'Bwd Pkt Len Mean',
       'Bwd Pkt Len Std', 'Flow Byts/s', 'Flow Pkts/s', 'Flow IAT Mean',
       'Flow IAT Std', 'Flow IAT Max', 'Flow IAT Min', 'Fwd IAT Tot',
       'Fwd IAT Mean', 'Fwd IAT Std', 'Fwd IAT Max', 'Fwd IAT Min',
       'Bwd IAT Tot', 'Bwd IAT Mean', 'Bwd IAT Std', 'Bwd IAT Max',
       'Bwd IAT Min', 'Fwd PSH Flags', 'Bwd PSH Flags', 'Fwd URG Flags',
       'Bwd URG Flags', 'Fwd Header Len', 'Bwd Header Len', 'Fwd Pkts/s',
       'Bwd Pkts/s', 'Pkt Len Min', 'Pkt Len Max', 'Pkt Len Mean',
       'Pkt Len Std', 'Pkt Len Var', 'FIN Flag Cnt', 'SYN Flag Cnt',
       'RST Flag Cnt', 'PSH Flag Cnt', 'ACK Flag Cnt', 'URG Flag Cnt',
       'CWE Flag

In [37]:
ps_attacks.columns

Index(['Destination Port', 'Flow Duration', 'Total Fwd Packets',
       'Total Backward Packets', 'Total Length of Fwd Packets',
       'Total Length of Bwd Packets', 'Fwd Packet Length Max',
       'Fwd Packet Length Min', 'Fwd Packet Length Mean',
       'Fwd Packet Length Std', 'Bwd Packet Length Max',
       'Bwd Packet Length Min', 'Bwd Packet Length Mean',
       'Bwd Packet Length Std', 'Flow Bytes/s', 'Flow Packets/s',
       'Flow IAT Mean', 'Flow IAT Std', 'Flow IAT Max', 'Flow IAT Min',
       'Fwd IAT Total', 'Fwd IAT Mean', 'Fwd IAT Std', 'Fwd IAT Max',
       'Fwd IAT Min', 'Bwd IAT Total', 'Bwd IAT Mean', 'Bwd IAT Std',
       'Bwd IAT Max', 'Bwd IAT Min', 'Fwd PSH Flags', 'Bwd PSH Flags',
       'Fwd URG Flags', 'Bwd URG Flags', 'Fwd Header Length',
       'Bwd Header Length', 'Fwd Packets/s', 'Bwd Packets/s',
       'Min Packet Length', 'Max Packet Length', 'Packet Length Mean',
       'Packet Length Std', 'Packet Length Variance', 'FIN Flag Count',
       'SYN Flag Co

In [38]:
bf_attacks.columns

Index(['Dst Port', 'Protocol', 'Flow Duration', 'Tot Fwd Pkts', 'Tot Bwd Pkts',
       'TotLen Fwd Pkts', 'TotLen Bwd Pkts', 'Fwd Pkt Len Max',
       'Fwd Pkt Len Min', 'Fwd Pkt Len Mean', 'Fwd Pkt Len Std',
       'Bwd Pkt Len Max', 'Bwd Pkt Len Min', 'Bwd Pkt Len Mean',
       'Bwd Pkt Len Std', 'Flow Byts/s', 'Flow Pkts/s', 'Flow IAT Mean',
       'Flow IAT Std', 'Flow IAT Max', 'Flow IAT Min', 'Fwd IAT Tot',
       'Fwd IAT Mean', 'Fwd IAT Std', 'Fwd IAT Max', 'Fwd IAT Min',
       'Bwd IAT Tot', 'Bwd IAT Mean', 'Bwd IAT Std', 'Bwd IAT Max',
       'Bwd IAT Min', 'Fwd PSH Flags', 'Bwd PSH Flags', 'Fwd URG Flags',
       'Bwd URG Flags', 'Fwd Header Len', 'Bwd Header Len', 'Fwd Pkts/s',
       'Bwd Pkts/s', 'Pkt Len Min', 'Pkt Len Max', 'Pkt Len Mean',
       'Pkt Len Std', 'Pkt Len Var', 'FIN Flag Cnt', 'SYN Flag Cnt',
       'RST Flag Cnt', 'PSH Flag Cnt', 'ACK Flag Cnt', 'URG Flag Cnt',
       'CWE Flag Count', 'ECE Flag Cnt', 'Down/Up Ratio', 'Pkt Size Avg',
       'Fwd Seg Siz

In [39]:
final_df.columns

Index(['Active Mean', 'Bwd IAT Max', 'Active Max', 'Flow Duration',
       'Fwd PSH Flags', 'Idle Max', 'Bwd IAT Std', 'Fwd IAT Mean',
       'Flow IAT Min', 'Idle Std', 'Fwd URG Flags', 'Bwd IAT Min', 'Idle Min',
       'Bwd URG Flags', 'Fwd IAT Std', 'Flow IAT Max', 'Flow IAT Std',
       'Bwd PSH Flags', 'CWE Flag Count', 'Idle Mean', 'Active Min',
       'Bwd IAT Mean', 'Active Std', 'Fwd IAT Min', 'Flow IAT Mean',
       'Down/Up Ratio', 'Fwd IAT Max', 'Label'],
      dtype='object')

In [40]:
target_features = [
    'Destination Port', 'Flow Duration', 
    'Total Fwd Packets', 'Total Backward Packets', 
    'Total Length of Fwd Packets', 'Total Length of Bwd Packets', 
    'Fwd Packet Length Max', 'Fwd Packet Length Min', 'Fwd Packet Length Mean', 'Fwd Packet Length Std',
    'Bwd Packet Length Max', 'Bwd Packet Length Min', 'Bwd Packet Length Mean', 'Bwd Packet Length Std',
    'Flow Bytes/s', 'Flow Packets/s', 
    'Flow IAT Mean', 'Flow IAT Std', 'Flow IAT Max', 'Flow IAT Min',
    'Fwd IAT Total', 'Bwd IAT Total', 
    'Fwd Header Length', 'Bwd Header Length', 
    'Fwd Packets/s', 'Bwd Packets/s', 
    'Min Packet Length', 'Max Packet Length', 'Packet Length Mean', 'Packet Length Std', 'Packet Length Variance',
    # Flags
    'FIN Flag Count', 'SYN Flag Count', 'RST Flag Count', 'PSH Flag Count', 
    'ACK Flag Count', 'URG Flag Count', 'ECE Flag Count', 
    'Down/Up Ratio', 'Average Packet Size', 
    'Init_Win_bytes_forward', 'Init_Win_bytes_backward', 
    'Label'
]

column_mapping = {
    'Dst Port': 'Destination Port',
    'Tot Fwd Pkts': 'Total Fwd Packets',
    'Tot Bwd Pkts': 'Total Backward Packets',
    'TotLen Fwd Pkts': 'Total Length of Fwd Packets',
    'TotLen Bwd Pkts': 'Total Length of Bwd Packets',
    'Fwd Pkt Len Max': 'Fwd Packet Length Max',
    'Fwd Pkt Len Min': 'Fwd Packet Length Min',
    'Fwd Pkt Len Mean': 'Fwd Packet Length Mean',
    'Fwd Pkt Len Std': 'Fwd Packet Length Std',
    'Bwd Pkt Len Max': 'Bwd Packet Length Max',
    'Bwd Pkt Len Min': 'Bwd Packet Length Min',
    'Bwd Pkt Len Mean': 'Bwd Packet Length Mean',
    'Bwd Pkt Len Std': 'Bwd Packet Length Std',
    'Flow Byts/s': 'Flow Bytes/s',
    'Flow Pkts/s': 'Flow Packets/s',
    'Fwd IAT Tot': 'Fwd IAT Total',
    'Bwd IAT Tot': 'Bwd IAT Total',
    'Fwd Header Len': 'Fwd Header Length',
    'Bwd Header Len': 'Bwd Header Length',
    'Fwd Pkts/s': 'Fwd Packets/s',
    'Bwd Pkts/s': 'Bwd Packets/s',
    # Paket Boyutları
    'Pkt Len Min': 'Min Packet Length',
    'Pkt Len Max': 'Max Packet Length',
    'Pkt Len Mean': 'Packet Length Mean',
    'Pkt Len Std': 'Packet Length Std',
    'Pkt Len Var': 'Packet Length Variance',
    'Pkt Size Avg': 'Average Packet Size',
    'FIN Flag Cnt': 'FIN Flag Count',
    'SYN Flag Cnt': 'SYN Flag Count',
    'RST Flag Cnt': 'RST Flag Count',
    'PSH Flag Cnt': 'PSH Flag Count',
    'ACK Flag Cnt': 'ACK Flag Count',
    'URG Flag Cnt': 'URG Flag Count',
    'ECE Flag Cnt': 'ECE Flag Count',
    'CWE Flag Count': 'CWE Flag Count', # eklemekten zarar gelmez
    'Init Fwd Win Byts': 'Init_Win_bytes_forward',
    'Init Bwd Win Byts': 'Init_Win_bytes_backward'
}

In [41]:
def standardize_and_select(df, features, mapping):
    df.columns = df.columns.str.strip()
    
    # Sadece sözlükteki isimleri değiştir
    df.rename(columns = mapping, inplace = True)
    
    # Sadece hedef listemizde olan sütunları seç
    # Eğer dataframe'de hedef sütun yoksa, hata vermemesi için kontrol etmeliyiz
    available_features = [c for c in features if c in df.columns]
    
    # Eksik sütunları raporla
    missing = set(features) - set(available_features)
    if missing:
        print(f"UYARI: Şu sütunlar bu dosyada bulunamadı ve atlanacak: {missing}")
    
    return df[available_features].copy()

In [48]:
print(f"Standartlaştırmadan önce ps_benign sütun sayısı: {len(ps_benign.columns)}")
ps_benign = standardize_and_select(ps_benign, target_features, column_mapping)
print(f"Standartlaştırmadan sonra ps_benign sütun sayısı: {len(ps_benign.columns)}")

print(f"Standartlaştırmadan önce bf_benign sütun sayısı: {len(bf_benign.columns)}")
bf_benign = standardize_and_select(bf_benign, target_features, column_mapping)
print(f"Standartlaştırmadan sonra bf_benign sütun sayısı: {len(bf_benign.columns)}")

Standartlaştırmadan önce ps_benign sütun sayısı: 79
Standartlaştırmadan sonra ps_benign sütun sayısı: 43
Standartlaştırmadan önce bf_benign sütun sayısı: 79
Standartlaştırmadan sonra bf_benign sütun sayısı: 43


In [49]:
print(f"Birleştirmeden önce ps_benign değerleri sayısı: {len(ps_benign['Label'])}")
print(f"Birleştirmeden önce bf_benign değerleri sayısı: {len(bf_benign['Label'])}")
all_benign = pd.concat([ps_benign, bf_benign])
print(f"Birleştirmeden sonra all_benign değerleri sayısı: {len(all_benign['Label'])}")

Birleştirmeden önce ps_benign değerleri sayısı: 127537
Birleştirmeden önce bf_benign değerleri sayısı: 907680
Birleştirmeden sonra all_benign değerleri sayısı: 1035217


In [50]:
all_benign = all_benign.sample(n = SAMPLE_SIZE, random_state = 0)
print(f"all_benign değerleri sayısı: {len(all_benign['Label'])}")

all_benign değerleri sayısı: 200000


In [54]:
all_benign['Label'] = 0
ddos_attacks['Label'] = 1
ps_attacks['Label'] = 2
bf_attacks['Label'] = 3

In [55]:
print("Veriler Standartlaştırılıyor...")

# DDoS Verisi (Chunk ile okuduğunu varsayıyorum, örnek olarak ddos_attacks değişkeni)
ddos_std = standardize_and_select(ddos_attacks, target_features, column_mapping)

# PortScan Verisi
ps_std = standardize_and_select(ps_attacks, target_features, column_mapping)

# BruteForce Verisi
bf_std = standardize_and_select(bf_attacks, target_features, column_mapping)

# Benign (Normal) Verisi
benign_std = standardize_and_select(all_benign, target_features, column_mapping)

Veriler Standartlaştırılıyor...


In [56]:
print("Birleştiriliyor...")

def remove_duplicate_columns(df):
    return df.loc[:, ~df.columns.duplicated()]

# Her bir dataframe için temizlik yapalım
ddos_std = remove_duplicate_columns(ddos_std)
ps_std = remove_duplicate_columns(ps_std)
bf_std = remove_duplicate_columns(bf_std)
benign_std = remove_duplicate_columns(benign_std)

final_cols = list(set(ddos_std.columns) & set(ps_std.columns) & set(bf_std.columns) & set(benign_std.columns))

# Label'ı en sona al
if 'Label' in final_cols:
    final_cols.remove('Label')
final_cols.append('Label')

final_df = pd.concat([
    benign_std[final_cols],
    ddos_std[final_cols],
    ps_std[final_cols],
    bf_std[final_cols]
], axis=0)

# Temizlik
final_df = final_df.sample(frac = 1, random_state = 0).reset_index(drop = True)
final_df.replace([np.inf, -np.inf], np.nan, inplace = True)
final_df.dropna(inplace = True)

print(f"İşlem Tamam! Sütun Sayısı: {final_df.shape[1]}")
print(f"Sütunlar: {final_df.columns.tolist()}")

# Kaydet
final_df.to_csv("../data/final_hybrid_data/final_dataset_selected_features.csv", index = False)

Birleştiriliyor...
İşlem Tamam! Sütun Sayısı: 43
Sütunlar: ['Packet Length Std', 'Flow Duration', 'Fwd Packet Length Std', 'Fwd Packet Length Max', 'Bwd Packets/s', 'ECE Flag Count', 'Fwd Header Length', 'Fwd IAT Total', 'Init_Win_bytes_forward', 'Flow Bytes/s', 'Total Backward Packets', 'Fwd Packets/s', 'Packet Length Variance', 'Flow IAT Min', 'FIN Flag Count', 'Flow Packets/s', 'Fwd Packet Length Mean', 'Min Packet Length', 'Total Length of Fwd Packets', 'Total Fwd Packets', 'ACK Flag Count', 'Average Packet Size', 'Flow IAT Max', 'Init_Win_bytes_backward', 'Flow IAT Std', 'Bwd Packet Length Max', 'Bwd Packet Length Min', 'SYN Flag Count', 'RST Flag Count', 'Bwd Header Length', 'Bwd Packet Length Std', 'Destination Port', 'Max Packet Length', 'Packet Length Mean', 'Total Length of Bwd Packets', 'Bwd Packet Length Mean', 'PSH Flag Count', 'Flow IAT Mean', 'Down/Up Ratio', 'URG Flag Count', 'Bwd IAT Total', 'Fwd Packet Length Min', 'Label']


In [57]:
final_df['Label'].value_counts()

Label
3    200000
1    199999
0    199962
2    158804
Name: count, dtype: int64