In [19]:
# Imports
import numpy as np # Support for large arrays and matrices, along with high-level mathematical functions.
import seaborn as sns # Graphing/Plotting module.
import pandas as pd # CSV handling with operations on tabular data.
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import OneHotEncoder, LabelEncoder
from sklearn.metrics import confusion_matrix, classification_report
from ast import literal_eval # Transform/Parse a string-list into a proper list.
import requests # GET requets to scrap a url/website
from utils import save_classification_report

In [20]:
# Read Data
''' 
There are 2 datasets available for you to use - 
1. 'mta' 
2. 'ustc'
Make sure that 
the path to the .csv files is correct. the following code loads the data from .csv file
into a DataFrame.
'''

dataset_type = 'mta' # or 'ustc'

filepath = f'./datasets/{dataset_type}/xy_train.csv'
df = pd.read_csv(filepath)
df

Unnamed: 0,expiration_id,src_ip,src_mac,src_oui,src_port,dst_ip,dst_mac,dst_oui,dst_port,protocol,...,user_agent,content_type,udps.n_bytes,udps.protocol_header_fields,udps.handshake_packets_duration,udps.n_bytes_per_packet,udps.stnn_image,file_name,label,malware_family
0,0,fe80::68e8:c1b0:4cf3:8cf7,5c:f9:dd:6d:16:82,5c:f9:dd,546,ff02::1:2,33:33:00:01:00:02,33:33:00,547,17,...,,,"[0.00392156862745098, 0.30196078431372547, 0.6...","[[0, 88, 0, 0], [0, 88, 997, 0], [0, 88, 2001,...",1.000,"[[0.00392156862745098, 0.30196078431372547, 0....","[[0.0, 364018.0, 53595.688, 119621.35, 2.23710...",nonvpn-voip-facebook-16.pcap.UDP_fe80--68e8-c1...,benign,benign
1,0,10.6.5.102,00:08:02:1c:47:ae,00:08:02,55391,106.10.248.83,20:e5:2a:b6:93:f1,20:e5:2a,25,6,...,,,"[0.19607843137254902, 0.19607843137254902, 0.1...","[[0, 0, 0, 8192], [1, 0, 75, 64240], [0, 0, 1,...",0.075,"[[0.19607843137254902, 0.19607843137254902, 0....","[[0.0, 229.0, 26.90625, 50.289715, 2.2989116, ...",2018-06-05-malware-emotet-malspam-infection-tr...,malware,emotet
2,0,10.0.0.11,ac:87:a3:39:05:cc,ac:87:a3,53334,173.194.132.52,c0:ff:d4:7f:d7:4d,c0:ff:d4,443,6,...,,,"[0.08627450980392157, 0.011764705882352941, 0....","[[0, 0, 0, 65535], [1, 0, 146, 28400], [0, 0, ...",0.146,"[[0.08627450980392157, 0.011764705882352941, 0...","[[0.0, 185.0, 24.46875, 56.990086, 1.945428, 6...",d_hi_safari_Batman_vs_Superman_14_15_47_480p.p...,benign,benign
3,0,10.2.9.101,00:08:02:1c:47:ae,00:08:02,50038,104.160.190.114,20:e5:2a:b6:93:f1,20:e5:2a,4443,6,...,,,"[0.08627450980392157, 0.011764705882352941, 0....","[[0, 0, 0, 65535], [1, 0, 92, 64240], [0, 0, 0...",0.092,"[[0.08627450980392157, 0.011764705882352941, 0...","[[0.0, 1378.0, 93.809525, 301.2553, 3.9512565,...",2021-02-09-malware-hancitor-infection-with-cob...,malware,hancitor
4,0,10.0.2.15,08:00:27:a3:83:43,08:00:27,49969,152.115.75.210,52:54:00:12:35:02,52:54:00,443,6,...,,,"[0.08627450980392157, 0.011764705882352941, 0....","[[0, 0, 0, 8192], [1, 0, 42, 65535], [0, 0, 1,...",0.042,"[[0.08627450980392157, 0.011764705882352941, 0...","[[0.0, 9779.0, 745.1111, 2322.5686, 3.251213, ...",2017-04-25-benign-benign.pcap.TCP_10-0-2-15_49...,benign,benign
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
20488,0,10.2.12.101,00:08:02:1c:47:ae,00:08:02,49939,138.201.149.51,20:e5:2a:b6:93:f1,20:e5:2a,443,6,...,,,"[0.08627450980392157, 0.011764705882352941, 0....","[[0, 0, 0, 65535], [1, 0, 181, 64240], [0, 0, ...",0.181,"[[0.08627450980392157, 0.011764705882352941, 0...","[[0.0, 231.0, 33.434784, 70.500046, 1.9358493,...",2021-02-12-malware-qakbot-Qakbot-infection-wit...,malware,qakbot
20489,0,10.0.0.9,fc:aa:14:7a:f7:ea,fc:aa:14,36444,212.179.180.116,c0:ff:d4:7f:d7:4d,c0:ff:d4,443,6,...,,,"[0.08627450980392157, 0.011764705882352941, 0....","[[0, 0, 0, 29200], [1, 0, 14, 28960], [0, 0, 0...",0.014,"[[0.08627450980392157, 0.011764705882352941, 0...","[[0.0, 45079.0, 2821.875, 11074.586, 3.614778,...",L_cyber_ff_09-08__18_05_04.pcap.TCP_10-0-0-9_3...,benign,benign
20490,0,10.6.5.102,00:08:02:1c:47:ae,00:08:02,56281,91.243.81.13,20:e5:2a:b6:93:f1,20:e5:2a,443,6,...,,,"[0.08627450980392157, 0.011764705882352941, 0....","[[0, 0, 0, 8192], [1, 0, 271, 64240], [0, 0, 0...",0.271,"[[0.08627450980392157, 0.011764705882352941, 0...","[[0.0, 832.0, 87.28571, 208.50111, 2.544312, 6...",2018-06-05-malware-emotet-malspam-infection-tr...,malware,emotet
20491,0,10.6.5.102,00:08:02:1c:47:ae,00:08:02,52994,64.233.188.108,20:e5:2a:b6:93:f1,20:e5:2a,25,6,...,,,"[0.19607843137254902, 0.19607843137254902, 0.1...","[[0, 0, 0, 8192], [1, 0, 47, 64240], [0, 0, 0,...",0.047,"[[0.19607843137254902, 0.19607843137254902, 0....","[[0.0, 347.0, 42.0625, 82.85235, 2.1231167, 60...",2018-06-05-malware-emotet-malspam-infection-tr...,malware,emotet


In [4]:
df.describe()

Unnamed: 0,expiration_id,src_port,dst_port,protocol,ip_version,vlan_id,bidirectional_first_seen_ms,bidirectional_last_seen_ms,bidirectional_duration_ms,bidirectional_packets,...,dst2src_syn_packets,dst2src_cwr_packets,dst2src_ece_packets,dst2src_urg_packets,dst2src_ack_packets,dst2src_psh_packets,dst2src_rst_packets,dst2src_fin_packets,application_is_guessed,udps.handshake_packets_duration
count,20493.0,20493.0,20493.0,20493.0,20493.0,20493.0,20493.0,20493.0,20493.0,20493.0,...,20493.0,20493.0,20493.0,20493.0,20493.0,20493.0,20493.0,20493.0,20493.0,20493.0
mean,0.0,48690.163958,2100.646465,6.590446,4.016201,0.0,1287795000000.0,1287795000000.0,296702.6,885.56712,...,0.934465,0.0,0.0,0.0,336.591617,50.95901,0.110867,0.773093,0.151564,0.212896
std,0.0,12127.103562,8191.80345,2.479229,0.179277,0.0,552475000000.0,552474800000.0,2596452.0,15691.789955,...,0.257901,0.0,0.0,0.0,8278.013757,779.30214,0.443142,0.448656,0.358606,0.350089
min,0.0,67.0,21.0,6.0,4.0,0.0,94468.0,99482.0,0.0,2.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.001
25%,0.0,49348.0,443.0,6.0,4.0,0.0,1430000000000.0,1430000000000.0,1228.0,19.0,...,1.0,0.0,0.0,0.0,9.0,3.0,0.0,1.0,0.0,0.035
50%,0.0,50249.0,443.0,6.0,4.0,0.0,1460000000000.0,1460000000000.0,7447.0,31.0,...,1.0,0.0,0.0,0.0,16.0,5.0,0.0,1.0,0.0,0.083
75%,0.0,53334.0,443.0,6.0,4.0,0.0,1600000000000.0,1600000000000.0,123417.0,51.0,...,1.0,0.0,0.0,0.0,26.0,11.0,0.0,1.0,0.0,0.217
max,0.0,65534.0,65514.0,17.0,6.0,0.0,1620000000000.0,1620000000000.0,183764600.0,902677.0,...,7.0,0.0,0.0,0.0,900360.0,51711.0,10.0,4.0,1.0,9.287


In [32]:
# preprocessing
'''
In the preprocessing you do all needed steps for the dataframe to make the columns/features useable.
for example, there are some list-like features that are represented as strings, you can transform 
them into proper lists using 'literal_eval' function, for example:

df['udps.n_bytes'] = df['udps.n_bytes'].transform(literal_eval)

In case of matrix-like data, you may want to flatten them all into a single list like that:

df['udps.n_bytes_per_packet'] = df['udps.n_bytes_per_packet'].apply(lambda a: np.array(a).flatten())
'''

def preprocess2(df,process_labels=True):
    for label in df:
        try:
            df[label] = df[label].transform(literal_eval)
            df[label] = df[label].apply(lambda a: np.array(a).flatten())
        except:
            pass
    return df

In [5]:
# preprocessing
'''
In the preprocessing you do all needed steps for the dataframe to make the columns/features useable.
for example, there are some list-like features that are represented as strings, you can transform 
them into proper lists using 'literal_eval' function, for example:

df['udps.n_bytes'] = df['udps.n_bytes'].transform(literal_eval)

In case of matrix-like data, you may want to flatten them all into a single list like that:

df['udps.n_bytes_per_packet'] = df['udps.n_bytes_per_packet'].apply(lambda a: np.array(a).flatten())
'''

def preprocess(df, process_labels=True):
    df['udps.src_ip'] = df['udps.src_ip'].transform(literal_eval)
    df['udps.src_ip'] = df['udps.src_ip'].apply(lambda a: np.array(a).flatten())
    return df

In [33]:
df = preprocess2(df)
df

Unnamed: 0,expiration_id,src_ip,src_mac,src_oui,src_port,dst_ip,dst_mac,dst_oui,dst_port,protocol,...,user_agent,content_type,udps.n_bytes,udps.protocol_header_fields,udps.handshake_packets_duration,udps.n_bytes_per_packet,udps.stnn_image,file_name,label,malware_family
0,0,fe80::68e8:c1b0:4cf3:8cf7,5c:f9:dd:6d:16:82,5c:f9:dd,546,ff02::1:2,33:33:00:01:00:02,33:33:00,547,17,...,,,"[0.00392156862745098, 0.30196078431372547, 0.6...","[0, 88, 0, 0, 0, 88, 997, 0, 0, 88, 2001, 0, 0...",1.000,"[0.00392156862745098, 0.30196078431372547, 0.6...","[0.0, 364018.0, 53595.688, 119621.35, 2.237109...",nonvpn-voip-facebook-16.pcap.UDP_fe80--68e8-c1...,benign,benign
1,0,10.6.5.102,00:08:02:1c:47:ae,00:08:02,55391,106.10.248.83,20:e5:2a:b6:93:f1,20:e5:2a,25,6,...,,,"[0.19607843137254902, 0.19607843137254902, 0.1...","[0, 0, 0, 8192, 1, 0, 75, 64240, 0, 0, 1, 6424...",0.075,"[0.19607843137254902, 0.19607843137254902, 0.1...","[0.0, 229.0, 26.90625, 50.289715, 2.2989116, 6...",2018-06-05-malware-emotet-malspam-infection-tr...,malware,emotet
2,0,10.0.0.11,ac:87:a3:39:05:cc,ac:87:a3,53334,173.194.132.52,c0:ff:d4:7f:d7:4d,c0:ff:d4,443,6,...,,,"[0.08627450980392157, 0.011764705882352941, 0....","[0, 0, 0, 65535, 1, 0, 146, 28400, 0, 0, 0, 41...",0.146,"[0.08627450980392157, 0.011764705882352941, 0....","[0.0, 185.0, 24.46875, 56.990086, 1.945428, 66...",d_hi_safari_Batman_vs_Superman_14_15_47_480p.p...,benign,benign
3,0,10.2.9.101,00:08:02:1c:47:ae,00:08:02,50038,104.160.190.114,20:e5:2a:b6:93:f1,20:e5:2a,4443,6,...,,,"[0.08627450980392157, 0.011764705882352941, 0....","[0, 0, 0, 65535, 1, 0, 92, 64240, 0, 0, 0, 655...",0.092,"[0.08627450980392157, 0.011764705882352941, 0....","[0.0, 1378.0, 93.809525, 301.2553, 3.9512565, ...",2021-02-09-malware-hancitor-infection-with-cob...,malware,hancitor
4,0,10.0.2.15,08:00:27:a3:83:43,08:00:27,49969,152.115.75.210,52:54:00:12:35:02,52:54:00,443,6,...,,,"[0.08627450980392157, 0.011764705882352941, 0....","[0, 0, 0, 8192, 1, 0, 42, 65535, 0, 0, 1, 6424...",0.042,"[0.08627450980392157, 0.011764705882352941, 0....","[0.0, 9779.0, 745.1111, 2322.5686, 3.251213, 5...",2017-04-25-benign-benign.pcap.TCP_10-0-2-15_49...,benign,benign
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
20488,0,10.2.12.101,00:08:02:1c:47:ae,00:08:02,49939,138.201.149.51,20:e5:2a:b6:93:f1,20:e5:2a,443,6,...,,,"[0.08627450980392157, 0.011764705882352941, 0....","[0, 0, 0, 65535, 1, 0, 181, 64240, 0, 0, 1, 65...",0.181,"[0.08627450980392157, 0.011764705882352941, 0....","[0.0, 231.0, 33.434784, 70.500046, 1.9358493, ...",2021-02-12-malware-qakbot-Qakbot-infection-wit...,malware,qakbot
20489,0,10.0.0.9,fc:aa:14:7a:f7:ea,fc:aa:14,36444,212.179.180.116,c0:ff:d4:7f:d7:4d,c0:ff:d4,443,6,...,,,"[0.08627450980392157, 0.011764705882352941, 0....","[0, 0, 0, 29200, 1, 0, 14, 28960, 0, 0, 0, 229...",0.014,"[0.08627450980392157, 0.011764705882352941, 0....","[0.0, 45079.0, 2821.875, 11074.586, 3.614778, ...",L_cyber_ff_09-08__18_05_04.pcap.TCP_10-0-0-9_3...,benign,benign
20490,0,10.6.5.102,00:08:02:1c:47:ae,00:08:02,56281,91.243.81.13,20:e5:2a:b6:93:f1,20:e5:2a,443,6,...,,,"[0.08627450980392157, 0.011764705882352941, 0....","[0, 0, 0, 8192, 1, 0, 271, 64240, 0, 0, 0, 642...",0.271,"[0.08627450980392157, 0.011764705882352941, 0....","[0.0, 832.0, 87.28571, 208.50111, 2.544312, 60...",2018-06-05-malware-emotet-malspam-infection-tr...,malware,emotet
20491,0,10.6.5.102,00:08:02:1c:47:ae,00:08:02,52994,64.233.188.108,20:e5:2a:b6:93:f1,20:e5:2a,25,6,...,,,"[0.19607843137254902, 0.19607843137254902, 0.1...","[0, 0, 0, 8192, 1, 0, 47, 64240, 0, 0, 0, 6424...",0.047,"[0.19607843137254902, 0.19607843137254902, 0.1...","[0.0, 347.0, 42.0625, 82.85235, 2.1231167, 60....",2018-06-05-malware-emotet-malspam-infection-tr...,malware,emotet


In [21]:
import statistics

In [30]:
for label in df:
    try:
        sample = df[label]
        var = statistics.variance(sample, xbar = -100)
        if (var > 11000):
            print(label, " /-> ", var)
    except:
        pass
   

src_port  /->  2527662906.2177434
dst_port  /->  71948724.959106
protocol  /->  11368.22408744876
bidirectional_first_seen_ms  /->  1.963725732255819e+24
bidirectional_last_seen_ms  /->  1.9637257322566244e+24
bidirectional_duration_ms  /->  6829659502247.764
bidirectional_packets  /->  247203661.93299824
bidirectional_bytes  /->  213142751153671.78
src2dst_first_seen_ms  /->  1.963725732255819e+24
src2dst_last_seen_ms  /->  1.9637257322566244e+24
src2dst_duration_ms  /->  6828098997344.833
src2dst_packets  /->  49739503.48023619
src2dst_bytes  /->  120769480188967.08
dst2src_first_seen_ms  /->  1.9142515569611577e+24
dst2src_last_seen_ms  /->  1.9142515569619553e+24
dst2src_duration_ms  /->  6604740610004.185
dst2src_packets  /->  104788747.3856139
dst2src_bytes  /->  71493822012506.88
bidirectional_min_ps  /->  11521.37292601991
bidirectional_mean_ps  /->  314587.3155921672
bidirectional_stddev_ps  /->  546292.3235689386
bidirectional_max_ps  /->  10117180.515908647
src2dst_min_ps  /

In [14]:
df[(df['bidirectional_bytes']/df['bidirectional_packets']>2)]
# df[(df['src_bytes']/df['dst_bytes'] > 4) & (df['duration_'] == 0)]


Unnamed: 0,expiration_id,src_ip,src_mac,src_oui,src_port,dst_ip,dst_mac,dst_oui,dst_port,protocol,...,user_agent,content_type,udps.n_bytes,udps.protocol_header_fields,udps.handshake_packets_duration,udps.n_bytes_per_packet,udps.stnn_image,file_name,label,malware_family
0,0,fe80::68e8:c1b0:4cf3:8cf7,5c:f9:dd:6d:16:82,5c:f9:dd,546,ff02::1:2,33:33:00:01:00:02,33:33:00,547,17,...,,,"[0.00392156862745098, 0.30196078431372547, 0.6...","[[0, 88, 0, 0], [0, 88, 997, 0], [0, 88, 2001,...",1.000,"[[0.00392156862745098, 0.30196078431372547, 0....","[[0.0, 364018.0, 53595.688, 119621.35, 2.23710...",nonvpn-voip-facebook-16.pcap.UDP_fe80--68e8-c1...,benign,benign
1,0,10.6.5.102,00:08:02:1c:47:ae,00:08:02,55391,106.10.248.83,20:e5:2a:b6:93:f1,20:e5:2a,25,6,...,,,"[0.19607843137254902, 0.19607843137254902, 0.1...","[[0, 0, 0, 8192], [1, 0, 75, 64240], [0, 0, 1,...",0.075,"[[0.19607843137254902, 0.19607843137254902, 0....","[[0.0, 229.0, 26.90625, 50.289715, 2.2989116, ...",2018-06-05-malware-emotet-malspam-infection-tr...,malware,emotet
2,0,10.0.0.11,ac:87:a3:39:05:cc,ac:87:a3,53334,173.194.132.52,c0:ff:d4:7f:d7:4d,c0:ff:d4,443,6,...,,,"[0.08627450980392157, 0.011764705882352941, 0....","[[0, 0, 0, 65535], [1, 0, 146, 28400], [0, 0, ...",0.146,"[[0.08627450980392157, 0.011764705882352941, 0...","[[0.0, 185.0, 24.46875, 56.990086, 1.945428, 6...",d_hi_safari_Batman_vs_Superman_14_15_47_480p.p...,benign,benign
3,0,10.2.9.101,00:08:02:1c:47:ae,00:08:02,50038,104.160.190.114,20:e5:2a:b6:93:f1,20:e5:2a,4443,6,...,,,"[0.08627450980392157, 0.011764705882352941, 0....","[[0, 0, 0, 65535], [1, 0, 92, 64240], [0, 0, 0...",0.092,"[[0.08627450980392157, 0.011764705882352941, 0...","[[0.0, 1378.0, 93.809525, 301.2553, 3.9512565,...",2021-02-09-malware-hancitor-infection-with-cob...,malware,hancitor
4,0,10.0.2.15,08:00:27:a3:83:43,08:00:27,49969,152.115.75.210,52:54:00:12:35:02,52:54:00,443,6,...,,,"[0.08627450980392157, 0.011764705882352941, 0....","[[0, 0, 0, 8192], [1, 0, 42, 65535], [0, 0, 1,...",0.042,"[[0.08627450980392157, 0.011764705882352941, 0...","[[0.0, 9779.0, 745.1111, 2322.5686, 3.251213, ...",2017-04-25-benign-benign.pcap.TCP_10-0-2-15_49...,benign,benign
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
20488,0,10.2.12.101,00:08:02:1c:47:ae,00:08:02,49939,138.201.149.51,20:e5:2a:b6:93:f1,20:e5:2a,443,6,...,,,"[0.08627450980392157, 0.011764705882352941, 0....","[[0, 0, 0, 65535], [1, 0, 181, 64240], [0, 0, ...",0.181,"[[0.08627450980392157, 0.011764705882352941, 0...","[[0.0, 231.0, 33.434784, 70.500046, 1.9358493,...",2021-02-12-malware-qakbot-Qakbot-infection-wit...,malware,qakbot
20489,0,10.0.0.9,fc:aa:14:7a:f7:ea,fc:aa:14,36444,212.179.180.116,c0:ff:d4:7f:d7:4d,c0:ff:d4,443,6,...,,,"[0.08627450980392157, 0.011764705882352941, 0....","[[0, 0, 0, 29200], [1, 0, 14, 28960], [0, 0, 0...",0.014,"[[0.08627450980392157, 0.011764705882352941, 0...","[[0.0, 45079.0, 2821.875, 11074.586, 3.614778,...",L_cyber_ff_09-08__18_05_04.pcap.TCP_10-0-0-9_3...,benign,benign
20490,0,10.6.5.102,00:08:02:1c:47:ae,00:08:02,56281,91.243.81.13,20:e5:2a:b6:93:f1,20:e5:2a,443,6,...,,,"[0.08627450980392157, 0.011764705882352941, 0....","[[0, 0, 0, 8192], [1, 0, 271, 64240], [0, 0, 0...",0.271,"[[0.08627450980392157, 0.011764705882352941, 0...","[[0.0, 832.0, 87.28571, 208.50111, 2.544312, 6...",2018-06-05-malware-emotet-malspam-infection-tr...,malware,emotet
20491,0,10.6.5.102,00:08:02:1c:47:ae,00:08:02,52994,64.233.188.108,20:e5:2a:b6:93:f1,20:e5:2a,25,6,...,,,"[0.19607843137254902, 0.19607843137254902, 0.1...","[[0, 0, 0, 8192], [1, 0, 47, 64240], [0, 0, 0,...",0.047,"[[0.19607843137254902, 0.19607843137254902, 0....","[[0.0, 347.0, 42.0625, 82.85235, 2.1231167, 60...",2018-06-05-malware-emotet-malspam-infection-tr...,malware,emotet


In [10]:
df[['bidirectional_bytes', 'bidirectional_packets', 'label']]

Unnamed: 0,bidirectional_bytes,bidirectional_packets,label
0,8624,98,benign
1,4772,38,malware
2,11506,34,benign
3,941,21,malware
4,5312,27,benign
...,...,...,...
20488,6691,23,malware
20489,5835,42,benign
20490,3269,21,malware
20491,4127,36,malware


# sklearn.pca


In [76]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 20493 entries, 0 to 20492
Data columns (total 91 columns):
 #   Column                           Non-Null Count  Dtype  
---  ------                           --------------  -----  
 0   expiration_id                    20493 non-null  int64  
 1   src_ip                           20493 non-null  object 
 2   src_mac                          19593 non-null  object 
 3   src_oui                          19593 non-null  object 
 4   src_port                         20493 non-null  int64  
 5   dst_ip                           20493 non-null  object 
 6   dst_mac                          19593 non-null  object 
 7   dst_oui                          19593 non-null  object 
 8   dst_port                         20493 non-null  int64  
 9   protocol                         20493 non-null  int64  
 10  ip_version                       20493 non-null  int64  
 11  vlan_id                          20493 non-null  int64  
 12  bidirectional_firs

In [74]:
# for i in range(3):
df1 = df.iloc[:,75:]
df1

Unnamed: 0,application_name,application_category_name,application_is_guessed,requested_server_name,client_fingerprint,server_fingerprint,user_agent,content_type,udps.n_bytes,udps.protocol_header_fields,udps.handshake_packets_duration,udps.n_bytes_per_packet,udps.stnn_image,file_name,label,malware_family
0,DHCPV6,Network,0,,,,,,"[0.00392156862745098, 0.30196078431372547, 0.6...","[0, 88, 0, 0, 0, 88, 997, 0, 0, 88, 2001, 0, 0...",1.000,"[0.00392156862745098, 0.30196078431372547, 0.6...","[0.0, 364018.0, 53595.688, 119621.35, 2.237109...",nonvpn-voip-facebook-16.pcap.UDP_fe80--68e8-c1...,benign,benign
1,SMTP,Email,0,,,,,,"[0.19607843137254902, 0.19607843137254902, 0.1...","[0, 0, 0, 8192, 1, 0, 75, 64240, 0, 0, 1, 6424...",0.075,"[0.19607843137254902, 0.19607843137254902, 0.1...","[0.0, 229.0, 26.90625, 50.289715, 2.2989116, 6...",2018-06-05-malware-emotet-malspam-infection-tr...,malware,emotet
2,TLS.YouTube,Media,0,r15---sn-p5qlsned.googlevideo.com,799135475da362592a4be9199d258726,3329c45bf8e2a41960ba1868831c7990,,,"[0.08627450980392157, 0.011764705882352941, 0....","[0, 0, 0, 65535, 1, 0, 146, 28400, 0, 0, 0, 41...",0.146,"[0.08627450980392157, 0.011764705882352941, 0....","[0.0, 185.0, 24.46875, 56.990086, 1.945428, 66...",d_hi_safari_Batman_vs_Superman_14_15_47_480p.p...,benign,benign
3,TLS,Web,1,,51c64c77e60f3980eea90869b68c58a8,ae4edc6faf64d08308082ad26be60767,,,"[0.08627450980392157, 0.011764705882352941, 0....","[0, 0, 0, 65535, 1, 0, 92, 64240, 0, 0, 0, 655...",0.092,"[0.08627450980392157, 0.011764705882352941, 0....","[0.0, 1378.0, 93.809525, 301.2553, 3.9512565, ...",2021-02-09-malware-hancitor-infection-with-cob...,malware,hancitor
4,TLS,Web,0,track.adform.net,0ffee3ba8e615ad22535e7f771690a28,72f4dca7257e1f53d32a5c79b62173c7,,,"[0.08627450980392157, 0.011764705882352941, 0....","[0, 0, 0, 8192, 1, 0, 42, 65535, 0, 0, 1, 6424...",0.042,"[0.08627450980392157, 0.011764705882352941, 0....","[0.0, 9779.0, 745.1111, 2322.5686, 3.251213, 5...",2017-04-25-benign-benign.pcap.TCP_10-0-2-15_49...,benign,benign
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
20488,TLS,Web,1,,7dd50e112cd23734a310b90f6f44a7cd,f176ba63b4d68e576b5ba345bec2c7b7,,,"[0.08627450980392157, 0.011764705882352941, 0....","[0, 0, 0, 65535, 1, 0, 181, 64240, 0, 0, 1, 65...",0.181,"[0.08627450980392157, 0.011764705882352941, 0....","[0.0, 231.0, 33.434784, 70.500046, 1.9358493, ...",2021-02-12-malware-qakbot-Qakbot-infection-wit...,malware,qakbot
20489,TLS.Google,Web,0,clients4.google.com,14e8ffbc70e60c915bcc493c25b1b552,834a51546ab3ba455cc79822764c1354,,,"[0.08627450980392157, 0.011764705882352941, 0....","[0, 0, 0, 29200, 1, 0, 14, 28960, 0, 0, 0, 229...",0.014,"[0.08627450980392157, 0.011764705882352941, 0....","[0.0, 45079.0, 2821.875, 11074.586, 3.614778, ...",L_cyber_ff_09-08__18_05_04.pcap.TCP_10-0-0-9_3...,benign,benign
20490,TLS,Web,0,adshiepkhach.top,4d7a28d6f2263ed61de88ca66eb011e3,80b3a14bccc8598a1f3bbe83e71f735f,,,"[0.08627450980392157, 0.011764705882352941, 0....","[0, 0, 0, 8192, 1, 0, 271, 64240, 0, 0, 0, 642...",0.271,"[0.08627450980392157, 0.011764705882352941, 0....","[0.0, 832.0, 87.28571, 208.50111, 2.544312, 60...",2018-06-05-malware-emotet-malspam-infection-tr...,malware,emotet
20491,SMTP,Email,0,,,,,,"[0.19607843137254902, 0.19607843137254902, 0.1...","[0, 0, 0, 8192, 1, 0, 47, 64240, 0, 0, 0, 6424...",0.047,"[0.19607843137254902, 0.19607843137254902, 0.1...","[0.0, 347.0, 42.0625, 82.85235, 2.1231167, 60....",2018-06-05-malware-emotet-malspam-infection-tr...,malware,emotet


# Source port number
This field identifies the sender's port, when used, and should be assumed to be the port to reply to if needed. If not used, it should be zero. If the source host is the client, the port number is likely to be an ephemeral port number. If the source host is the server, the port number is likely to be a well-known port number.

# Destination port number
This field identifies the receiver's port and is required. Similar to source port number, if the client is the destination host then the port number will likely be an ephemeral port number and if the destination host is the server then the port number will likely be a well-known port number.
# Length
This field specifies the length in bytes of the UDP header and UDP data. The minimum length is 8 bytes, the length of the header. The field size sets a theoretical limit of 65,535 bytes (8-byte header + 65,527 bytes of data) for a UDP datagram. However the actual limit for the data length, which is imposed by the underlying IPv4 protocol, is 65,507 bytes (65,535 bytes − 8-byte UDP header − 20-byte IP header).Using IPv6 jumbograms it is possible to have UDP datagrams of size greater than 65,535 bytes. RFC 2675 specifies that the length field is set to zero if the length of the UDP header plus UDP data is greater than 65,535.
# Checksum
The checksum field may be used for error-checking of the header and data. This field is optional in IPv4,and mandatory in IPv6. The field carries all-zeros if unused.