Basically, the program gets the RAW data of RPL network packets with this columns:

No 	|	Time 	|	Source	|	Destination	|	Protocol 	Length	|	Info

Then it splits the dataset into 1 second frames.
For each frame it calculates the columns below:

second | src | dst | packetcount | src_ratio | dst_ratio | src_duration_ratio | dst_duration_ratio | TotalPacketDuration | TotalPacketLenght | src_packet_ratio | dst_packet_ratio | DioCount | DisCount | DaoCount | OtherMsg | label



In [51]:
#importing libraries
import pandas as pd
import numpy as np

In [52]:
#reading the csv file
flnm = "SFA.csv"
filestr = "Attack Files/" + flnm

When giving names to the CSV files, I made the following coding.

DFA, SFA, SHA, SYA, VNA are attack types.


In [53]:
#For machine learning, we are classifying the dataset with digits
if flnm == "SHA.csv":
    lbl = 1
elif flnm == "DFA.csv" :
    lbl = 2
elif flnm == "SFA.csv" :
    lbl = 3
elif flnm == "SYA.csv" :
    lbl = 4
elif flnm == "VNA.csv":
    lbl = 5
else:
    lbl = 0
 

In [54]:
# resultfile variable is used for recording pruduct CSV dataset.
resultfile = "Results/" + flnm

In [55]:
# We take raw data to the Raw_Data dataset.
Raw_Data = pd.read_csv(filestr , index_col = "No.")

In [56]:
# converting Raw_Data to a numpy array
np_Raw_Data = np.array(Raw_Data)

In [57]:
# Sorting data on 0 axis.(0 axis is the time values.)

# The columns 0: Time, 1: Source, 2:Destination, 3:Protocol, 4:Packet Length, 5:Info
np_Raw_Data = np_Raw_Data[np.argsort(np_Raw_Data[:, 0])]

In [58]:
# packetDurations list is used for calculating packet durations. With the while loop below, 
# we substracted two values np_Raw_Data[n][0] - np_Raw_Data[n - 1][0] and appended to the 
# packetDurations list.
packetDurations = []
counter = 0
while counter < len(np_Raw_Data):
    duration = 0
    if counter != 0 and counter + 1 <  len(np_Raw_Data):
        duration = np.float32(np_Raw_Data[counter][0])-np.float32(np_Raw_Data[counter - 1][0])
    packetDurations.append(duration) 
    counter +=  1

In [59]:
# We delete the first row of packetDurations
packetDurations = np.delete(packetDurations, 0, axis = 0)

In [60]:
# We delete the last row of np_Raw_Data
np_Raw_Data = np.delete(np_Raw_Data,len(np_Raw_Data)-1,axis = 0)


In [61]:
# We add  packetDurations column to the np_Raw_Data as 1st column.
# The columns 0: Time, 1:Packet Durations, 2: Source, 3:Destination, 4:Protocol, 5:Packet Length, 6:Info
np_Raw_Data = np.insert(np_Raw_Data, 1, packetDurations, axis = 1)

In [62]:
# source_unique_array variable contains unique values of source IP addresses.
source_unique_array = np.unique(np.array(Raw_Data.iloc[:,1:2].astype(str)))

In [63]:
# destination_unique_array variable contains unique values of destination IP addresses.
destination_unique_array = np.unique(np.array(Raw_Data.iloc[:,2:3].astype(str)))

In [64]:
# info_unique_array variable contains unique values of info colunm.
info_unique_array = np.unique(np.array(Raw_Data.iloc[:,5:6]))

In [65]:
# protocol_unique_array variable contains unique values of protocol column.
protocol_unique_array = np.unique(np.array(Raw_Data.iloc[:,3:4]))

In [66]:
# all_ip_addresses variable contains unique values of all IP addresses.
all_ip_addresses = np.concatenate((source_unique_array,destination_unique_array))
all_ip_addresses = np.unique(all_ip_addresses)

In [67]:
# ip_dict dictionary will hold the IPV6:IP Number key value pairs.
# example fe80::c30c:0:0:1 : 0
ip_dict = {}

In [68]:
# Here we used sklearn labelEncoder to give numbers to the ip addresses.
from sklearn import preprocessing
le = preprocessing.LabelEncoder()
lb_all_ip_addresses = le.fit_transform(all_ip_addresses)

In [69]:
# with the for loop we added Ip address key value pairs.
cnt = 0
for x in all_ip_addresses:
    ip_dict[x] = lb_all_ip_addresses[cnt]
    cnt  += 1

In [70]:
# Sorting data on 0 axis.(0 axis is the time values.)
np_Raw_Data = np_Raw_Data[np.argsort(np_Raw_Data[:, 0])]

In [71]:
# duration variable is the last second of time column.
duration = np.floor(np.float32(np_Raw_Data[-1][0]))

In [72]:
# Variables that are used for calculating values.
counter = 0
currentSecond = 60.0
packetcount = {}
TotalPacketDuration = {}
TotalPacketLenght = {}
src_count = {}
dst_count = {}
src_duration = {}
dst_duration = {}
src_packet_lenght_sum = {}
dst_packet_lenght_sum = {}
DioCount = {}
DisCount = {}
DaoCount = {}
OtherMsg = {}
frame = []

In [73]:

#Create an empty pandas dataframe with the columns.

row = pd.DataFrame(columns = ['second','src', 'dst','packetcount','src_ratio', 'dst_ratio','src_duration_ratio', 'dst_duration_ratio','TotalPacketDuration','TotalPacketLenght','src_packet_ratio','dst_packet_ratio','DioCount','DisCount','DaoCount','OtherMsg','label'])


In [74]:
while counter < duration:  
    
    # one_second_frame variable holds rows for 1 second duration.
    one_second_frame = np_Raw_Data[np.where(np.logical_and(np_Raw_Data[:, 0] >= currentSecond, np_Raw_Data[:, 0] <= currentSecond + 1.0))]
    
    # if there is data in the one_second_frame, make calculations.
    if one_second_frame.size > 1:
        # clear all variables.
        packetcount.clear()
        TotalPacketDuration.clear()
        TotalPacketLenght.clear()
        DioCount.clear()
        DisCount.clear()
        DaoCount.clear()
        src_duration.clear()
        dst_duration.clear()
        totalpackets = 0
        frame_packet_length_sum = 0
        total_duration = 0.0
        src_packet_lenght_sum.clear()
        dst_packet_lenght_sum.clear()
        src_count.clear()
        dst_count.clear()
        
        # looping in each one_second_frame row
        for packet in one_second_frame:
            # IEEE 802.15.4 protocols or Ack messages do not have no IP addresses and they are null. We will not process this data.
            if not pd.isnull(packet[2]):
                # src_dst variable is string and it holds the value of source and destination value like fe80::c30c:0:0:3-fe80::c30c:0:0:1
                src = packet[2]
                dst = packet[3]
                src_dst  =  src + "-" + dst
                
                # packetcount dictionary holds the "src_dst : count" key value pairs. (How many packey counts do we have source to destination?)
                packetcount[src_dst]  =  1 if src_dst not in packetcount else packetcount[src_dst] + 1
                
                # TotalPacketDuration dictionary holds the "src_dst : duration" key value pairs. (It is the sum of all packet durations from source to destination in the 1-second frame.)
                TotalPacketDuration[src_dst] = packet[1] if src_dst not in TotalPacketDuration else TotalPacketDuration[src_dst] + packet[1]
                
                # TotalPacketLenght dictionary holds the "src_dst : length" key value pairs. (It is the sum of all packet lengths from source to destination in the 1-second frame.)
                TotalPacketLenght[src_dst] = packet[5] if src_dst not in TotalPacketLenght else TotalPacketLenght[src_dst] + packet[5]
                
                # src_count dictionary holds the "source : count" key value pairs. (How many source IP adress has in 1 second frame?)
                src_count[src] = 1 if src not in src_count else src_count[src] + 1
                
                # dst_count dictionary holds the "destination : count" key value pairs. (How many destination IP adress has in 1 second frame?)
                dst_count[dst] = 1 if dst not in dst_count else dst_count[dst] + 1
                
                # src_duration dictionary holds the "source : duration" key value pairs. (What is the duration of source IP address in 1 second frame?)
                src_duration[src] = packet[1] if src not in src_duration else src_duration[src] + packet[1]
                
                # dst_duration dictionary holds the "destination : duration" key value pairs. (What is the duration of destination IP address in 1 second frame?)
                dst_duration[dst] = packet[1] if dst not in dst_duration else dst_duration[dst] + packet[1]
                
                # total_duration will be used for calculating ratios in a one second frame
                total_duration += packet[1]
                
                # src_packet_lenght_sum dictionary holds "source : source_packet_length_sum" key value pairs.
                src_packet_lenght_sum[src] = packet[5] if src not in src_packet_lenght_sum else src_packet_lenght_sum[src] + packet[5]
                
                 # dst_packet_lenght_sum dictionary holds "destination : destination_packet_length_sum" key value pairs.
                dst_packet_lenght_sum[dst] = packet[5] if dst not in dst_packet_lenght_sum else dst_packet_lenght_sum[dst] + packet[5]
                
                # frame_packet_length_sum will be used for calculating ratios in a one second frame
                frame_packet_length_sum +=  packet[5]
                
                # totalpackets will be used for calculating ratios in a one second frame
                totalpackets +=  1
                
                # DIO, DIS, DAO messages counts.
                if packet[6]=="RPL Control (DODAG Information Object)":
                    DioCount[src_dst] = 1 if src_dst not in DioCount else DioCount[src_dst] + 1
                if packet[6]=="RPL Control (DODAG Information Solicitation)":
                    DisCount[src_dst] = 1 if src_dst not in DisCount else DisCount[src_dst] + 1
                if packet[6]=="RPL Control (Destination Advertisement Object)":
                    DaoCount[src_dst] = 1 if src_dst not in DaoCount else DaoCount[src_dst] + 1
                if ((packet[6]!="RPL Control (Destination Advertisement Object)") and (packet[6]!="RPL Control (DODAG Information Object)") and (packet[6]!="RPL Control (Destination Advertisement Object)")) :
                    OtherMsg[src_dst] = 1 if src_dst not in OtherMsg else OtherMsg[src_dst] + 1
       
        # this for loop calculates the ratios.
        for i in packetcount:
            if not i in DioCount:
                arr_diocount = 0
            else:
                arr_diocount = DioCount[i]
            if not i in DisCount:
                arr_discount = 0
            else:
                arr_discount = DisCount[i]
            if not i in DaoCount:
                arr_daocount = 0
            else:
                arr_daocount = DaoCount[i]
            if not i in OtherMsg:
                arr_orhermsg = 0
            else:
                arr_orhermsg = OtherMsg[i]
                
            # Splitting source and destination
            x = i.split("-")
            sourcee = x[0]
            destinatt = x[1]

             
            # calculating the source ratio in 1 second frame
            try:
                src_ratio = src_count[sourcee]/totalpackets
            except:
                src_ratio = None
            
    
        
            
            # calculating the destination ratio in 1 second frame          
            try:
                dst_ratio = dst_count[destinatt]/totalpackets
            except:
                dst_ratio = None
            
    


            # calculating the source duration ratio in 1 second frame
            try:
                src_duration_ratio = src_duration[sourcee]/total_duration
            except:
                src_duration_ratio = None
                


            # calculating the destination duration ratio in 1 second frame
            try:
                dst_duration_ratio = dst_duration[destinatt]/total_duration
            except:
                dst_duration_ratio = None
            
                


            # calculating the source duration ratio in 1 second frame
            try:
                src_packet_ratio = src_packet_lenght_sum[sourcee]/frame_packet_length_sum
            except:
                src_packet_ratio = None
            
                


            # calculating the destination duration ratio in 1 second frame
            try:
                dst_packet_ratio = dst_packet_lenght_sum[destinatt]/frame_packet_length_sum
            except:
                dst_packet_ratio = None
                
                       

            #establishing an array for adding the calculations to the row of row dataframe.
            # the columns are: 
            # 'second',
            # 'src', 
            # 'dst',
            # 'packetcount',
            # 'src_ratio', 
            # 'dst_ratio',
            # 'src_duration_ratio', 
            # 'dst_duration_ratio',
            # 'TotalPacketDuration',
            # 'TotalPacketLenght',
            # 'src_packet_ratio',
            # 'dst_packet_ratio',
            # 'DioCount',
            # 'DisCount',
            # 'DaoCount',
            # 'OtherMsg',
            # 'label'
            array = np.array([
                np.single(currentSecond),
                ip_dict[sourcee],
                ip_dict[destinatt],
                int(packetcount[i]),
                np.single(src_ratio),
                np.single(dst_ratio),
                np.single(src_duration_ratio),
                np.single(dst_duration_ratio),
                TotalPacketDuration[i],
                TotalPacketLenght[i],
                np.single(src_packet_ratio),
                np.single(dst_packet_ratio),
                arr_diocount,
                arr_discount,
                arr_daocount,
                arr_orhermsg,
                lbl
                ], dtype="object")

            #adding data into datframe name row with respect to each coloumn
            row.loc[len(row)] = array
 
    # increase second 1
    currentSecond += 1.0
    
    # increase counter 1
    counter +=  1
    
    # print data for observing the process.
    print(str(counter) + " of " + str(duration) + " of process is ok!!!")

1 of 592.0 of process is ok!!!
2 of 592.0 of process is ok!!!
3 of 592.0 of process is ok!!!
4 of 592.0 of process is ok!!!
5 of 592.0 of process is ok!!!
6 of 592.0 of process is ok!!!
7 of 592.0 of process is ok!!!
8 of 592.0 of process is ok!!!
9 of 592.0 of process is ok!!!
10 of 592.0 of process is ok!!!
11 of 592.0 of process is ok!!!
12 of 592.0 of process is ok!!!
13 of 592.0 of process is ok!!!
14 of 592.0 of process is ok!!!
15 of 592.0 of process is ok!!!
16 of 592.0 of process is ok!!!
17 of 592.0 of process is ok!!!
18 of 592.0 of process is ok!!!
19 of 592.0 of process is ok!!!
20 of 592.0 of process is ok!!!
21 of 592.0 of process is ok!!!
22 of 592.0 of process is ok!!!
23 of 592.0 of process is ok!!!
24 of 592.0 of process is ok!!!
25 of 592.0 of process is ok!!!
26 of 592.0 of process is ok!!!
27 of 592.0 of process is ok!!!
28 of 592.0 of process is ok!!!
29 of 592.0 of process is ok!!!
30 of 592.0 of process is ok!!!
31 of 592.0 of process is ok!!!
32 of 592.0 of pr

In [75]:
#Save as CSV file
row.to_csv(resultfile, index = False, sep = ",")