# 2017 Dataset

This dataset differs from the 2015 dataset in that the network data is stored as pcap files rather than csv files. The only problem with this dataset is that it contains no attack data. However, it will be useful as a training dataset. Pcap files are much more generalisable than csv files. The problem with the pcap files is that I need to clean the data myself! The pcap files have been converted to flows using Argus - it is these files that will be loaded here.

In [104]:
import os
import pandas as pd
import numpy as np
import pprint
from IPython.display import Markdown, display
from datetime import datetime
from datetime import timezone
import pickle

##local python file holding the paths to the directories I store the log files in
from directories_to_use import argus_text_files_dir, getTestingDir

### Load data into Dataframe
This is largely straightforward, however, the date is stored in the name of the text file rather than the actual records so need to extract that and add that to the records.

In [105]:
"""
Read Argus text files into one single dataframe
"""
def readDataIntoDataframe(argus_text_files_dir):
    first_time = True
    for t_file in os.listdir(argus_text_files_dir):
        if first_time:
            new_df = pd.read_csv(argus_text_files_dir + t_file)
            date = removeDateFromName(t_file)
            new_df["StartTime"] = new_df["StartTime"].apply(lambda x : date + x)
            new_df["LastTime"] = new_df["LastTime"].apply(lambda x : date + x)
            first_time = False
        else:
            temp_df = pd.read_csv(argus_text_files_dir + t_file)
            date = removeDateFromName(t_file)
            temp_df["StartTime"] = temp_df["StartTime"].apply(lambda x : date + x)
            temp_df["LastTime"] = temp_df["LastTime"].apply(lambda x : date + x)
            new_df = pd.concat([new_df, temp_df], ignore_index=True)
    return new_df

def removeDateFromName(filename):
    year = filename.split("_")[-1][0:4]
    month = filename.split("_")[-1][4:6]
    day = filename.split("_")[-1][6:8]
    full_date = day +"-"+ month +"-"+ year + " "
    return full_date

data2017_df = readDataIntoDataframe(argus_text_files_dir)
print(data2017_df.shape)

  if (await self.run_code(code, result,  async_=asy)):
  if (await self.run_code(code, result,  async_=asy)):


(2682823, 118)


### Clean data (i.e. remove columns that are unhelpful)
Just removing columns that only contain Nans or only contain one unique value

In [106]:
"""
Loops through all the columns in the dataframe and removes any that only contain nans
"""
def removeNanColumns(dataframe):
    print("Shape before: ", dataframe.shape)
    columns_removed = []
    for col in dataframe:
        unique_vals = dataframe[col].unique()
        if unique_vals.shape[0] == 1:
            if np.isnan(unique_vals[0]):
                dataframe = dataframe.drop([col], axis=1)
                columns_removed.append(col)
    print("Shape after: ", dataframe.shape)
    print("Columns with only Nans: " + str(columns_removed))
    return dataframe

"""
Remove columns that only contain one unique value.
"""
def removeSingleColumns(dataframe):
    print("Shape before: ", dataframe.shape)
    columns_removed = []
    for col in dataframe:
        unique_vals = dataframe[col].unique()
        if unique_vals.shape[0] == 1:
            dataframe = dataframe.drop([col], axis=1)
            columns_removed.append(col)
    print("Shape after: ", dataframe.shape)
    print("Columns with only one unique value: " + str(columns_removed))
    return dataframe
"""
Prints the columns in a dataframe and all its unique values
"""
def printColumnsAndUniqueVals(dataframe):
    pp = pprint.PrettyPrinter(indent=4)
    for col in dataframe.columns:
        printmd("**" + col + "**: " + str(dataframe[col].unique()))
        
"""
Basically prints in markdown form, can also render HTML
"""
def printmd(string):
    display(Markdown(string))

data2017_df = removeNanColumns(data2017_df)
data2017_df = removeSingleColumns(data2017_df)
                
            

Shape before:  (2682823, 118)
Shape after:  (2682823, 70)
Columns with only Nans: ['SrcMac', 'DstMac', 'SrcOui', 'DstOui', 'sCo', 'dCo', 'sMpls', 'dMpls', 'sAS', 'dAS', 'iAS', 'NStrok', 'sNStrok', 'dNStrok', 'SIntPkt', 'SIntDist', 'SIntPktAct', 'SIntActDist', 'SIntPktIdl', 'SIntIdlDist', 'DIntPkt', 'DIntDist', 'DIntPktAct', 'DIntActDist', 'DIntPktIdl', 'DIntIdlDist', 'SrcJitter', 'SrcJitAct', 'DstJitter', 'DstJitAct', 'Label', 'srcUdata', 'dstUdata', 'sVlan', 'dVlan', 'sVid', 'dVid', 'sVpri', 'dVpri', 'SRange', 'ERange', 'sPktSz', 'sMaxPktSz', 'dPktSz', 'dMaxPktSz', 'sMinPktSz', 'dMinPktSz', 'dMinPktSz.1']
Shape before:  (2682823, 70)
Shape after:  (2682823, 59)
Columns with only one unique value: ['Trans', 'StdDev', 'AutoId', 'TotAppByte', 'SAppBytes', 'DAppBytes', 'PCRatio', 'Retrans', 'SrcRetra', 'DstRetra', 'pRetran']


In [107]:
"""Print columns and the percentage of nans present in each column"""
data2017_df.isnull().mean() * 100

StartTime      0.000000
LastTime       0.000000
Flgs           0.000000
Seq            0.000000
Dur            0.000000
RunTime        0.000000
IdleTime       0.000000
Mean           0.000000
Sum            0.000000
Min            0.000000
Max            0.000000
SrcAddr        0.000000
DstAddr        0.000000
Proto          0.000000
Sport         10.424169
Dport         10.424169
sTos          10.826022
dTos          38.928733
sDSb          10.826022
dDSb          38.928733
sTtl          10.826022
dTtl          38.928733
sHops         10.826022
dHops         38.928733
sIpId         10.826022
dIpId         38.928733
Cause          0.000000
TotPkts        0.000000
SrcPkts        0.000000
DstPkts        0.000000
TotBytes       0.000000
SrcBytes       0.000000
DstBytes       0.000000
Load           0.000000
SrcLoad        0.000000
DstLoad        0.000000
Loss           0.000000
SrcLoss        0.000000
DstLoss        0.000000
pLoss          0.000000
SrcGap        43.607573
DstGap        43

## Calculated changes made to the dataset
### Drop Columns
- **Inode** is an IP Address, so very difficult to replace. Also >90% of the values were missing - so will drop it.
- **TcpOpt** is 99% Nan so will drop that column
- **SrcTCPBase** and **DstTCPBase** are 43% missing and they refer to the base sequence number in a TCP transaction. As they are specifically related to TCP, it's not a surprise that there would be values missing since not all the traffic is TCP related. Can't replace with mean as not all traffic is TCP - if I replace with 0, it will skew data. Will have to remove the columns 
- **SrcWin** and **DstWin** seem to refer to jitter. They are quite useful but with 43% of values being Nans means that it might be misleading to include them. Will have to remove them. 
- **dTos** and **dDSb** can be removed as their only unique values are 0 and nan.
- **sIpId** and **dIpId** are not necessary. It is unique IDs that tie src, dst and port. As they contain nulls and are difficult to simply impute. They can be removed since we have all the individual elements that make up the IDs.
- **sVid** and **dVid** can be removed since sVLan and dVLan are the same thing, which is VLAN ID.
- **seq** can cause the classification to overfit as it's just an Argus sequence number

### Drop Rows
- **SPort** and **Dport** can not be safely replaced/imputed without causing inconsistencies. Will remove rows that lack these fields - this makes up 10% of the dataset
- **sHops**, **sTtl** and **sDSb** contains only 0.5% (after removing rows from above) of nan values to might as well remove rows containing Nans for this.

### Replace with 0
- **SrcGap** and **DstGap** is 43% nan but refers to bytes missing from the stream, therefore, when Nan, we can just replace with 0. 
- **sTos** refers to Type of Service (whether traffic should take precedence etc.). Nans can be set to 0 for these fields.

### Imputed
- **dHops** will need to be imputed. Values are either 0 or 1.
- **dTtl** will also need to be imputed.
- **TODO** currently going to remove the columns as they contain 40% Nans. Will attempt to impute them at a later stage and determine if it affects the classification result. 


In [108]:
"""
Drop columns with too many nulls
"""
def dropChosenColumns(dataframe, column_names):
    for column in column_names: 
        dataframe = dataframe.drop([column], axis=1)
    return dataframe
        
"""
Replace Nans with 0
"""
def replaceNansWithZero(dataframe, column_names):
    for column in column_names:
        dataframe[column] = dataframe[column].fillna(0)
    return dataframe

"""
Remove rows with nans for chosen columns
"""
def removeNanRows(dataframe, column_names):
    for column in column_names:
        dataframe = dataframe[dataframe[column].notna()]
    return dataframe

data2017_df = dropChosenColumns(data2017_df, ['TcpOpt', 'Inode', 'SrcWin', 'DstWin', 'dTos', 'dDSb', 'SrcTCPBase', 'DstTCPBase', 'dIpId', 'dHops', 'dTtl', 'Seq'])
data2017_df = removeNanRows(data2017_df, ['Sport', 'Dport', 'sIpId', 'sHops', 'sTtl', 'sDSb'])
data2017_df = replaceNansWithZero(data2017_df, ['SrcGap', 'DstGap', 'sTos'])
data2017_df.shape

(2391347, 47)

## Convert all data to numeric form so that it can be passed to ML classifiers
- **StartTime** and **LastTime** need to be converted to timestamps - though they will not be included in the classification
- **SrcAddr** and **DstAddr** need to be converted to unique values. 
- **Sport** and **Dport** needs to be cast from string and checked for hex (which would also have to be cast).
- **Flgs**, **Dir**, **Cause**, and **State** need to be converted to integers.


In [109]:
"""
Needs to know the category e.g. IPs and value to add to the dict of dicts (unique_vals)
Returns: the unique int assigned to the value
"""
def convertToNum(category, val, unique_vals):
    if category not in unique_vals:
        unique_vals[category] = {}
    if val.strip() not in unique_vals[category].keys():
        new_val = len(unique_vals[category].keys())
        unique_vals[category][val.strip()] = len(unique_vals[category].keys())
        return new_val
    else:
        return unique_vals[category][val.strip()]
    
"""
Port numbers are a special case. They're integers stored as strings.
They are either hex numbers or standard int strings - therefore, we 
need to check for hex before casting.
"""  
def convertPortToNum(val):
    if type(val) == str:
        isHex = '0x' in val
    elif type(val) == float:
        isHex = False
    if isHex:
        return int(val, base=16)
    else:
        return int(val)
    
    

"""
Takes a string in the following format:
14-06-2017 11:25:58.288831
Returns: Timestamp
"""
def createTimestamp(datetime_string):
    row_date = datetime.strptime(datetime_string, "%d-%m-%Y %H:%M:%S.%f")
    timestamp = row_date.replace(tzinfo=timezone.utc).timestamp()
    return timestamp

unique_vals = dict()

data2017_df["Sport"] = data2017_df["Sport"].apply(lambda x : convertPortToNum(x))
data2017_df["Dport"] = data2017_df["Dport"].apply(lambda x : convertPortToNum(x))
data2017_df["sIpId"] = data2017_df["sIpId"].apply(lambda x : convertPortToNum(x))
data2017_df["StartTime"] = data2017_df["StartTime"].apply(lambda x: createTimestamp(x))
data2017_df["LastTime"] = data2017_df["LastTime"].apply(lambda x: createTimestamp(x))
data2017_df["SrcAddr"] = data2017_df["SrcAddr"].apply(lambda x : convertToNum("ips", x, unique_vals))
data2017_df["DstAddr"] = data2017_df["DstAddr"].apply(lambda x : convertToNum("ips", x, unique_vals))
#data2017_df["Cause"] = data2017_df["Cause"].apply(lambda x : convertToNum("cause", x, unique_vals))
#data2017_df["State"] = data2017_df["State"].apply(lambda x : convertToNum("state", x, unique_vals))
#data2017_df["Flgs"] = data2017_df["Flgs"].apply(lambda x : convertToNum("flgs", x, unique_vals))
data2017_df["Dir"] = data2017_df["Dir"].apply(lambda x : convertToNum("dir", x, unique_vals))
print(data2017_df.shape)


(2391347, 47)


## Load in malicious data from 2019 Dataset

In [110]:
data2019_df = readDataIntoDataframe(getTestingDir())
print(data2019_df.shape)

(4503102, 118)


## Reduce Feature set so that both datasets have the same amount of features

In [111]:
"""
Takes in two dataframes and makes sure that the columns
of test_df are the same as those of train_df. Returns a
modified dataframe.
"""
def alignToTrainingData(train_df, test_df):
    for col in test_df.columns:
        if col not in train_df.columns:
            test_df = test_df.drop([col], axis=1)
    return test_df

#data2019_mod_df = data2019_df.copy(deep=True)
data2019_df = alignToTrainingData(data2017_df, data2019_df)
print(data2019_df.shape)

(4503102, 47)


### Check for any remaning Nans

In [9]:
#pd.options.display.max_rows = 4000
data2019_df.isnull().mean() * 100

StartTime     0.000000
LastTime      0.000000
Flgs          0.000000
Dur           0.000000
RunTime       0.000000
IdleTime      0.000000
Mean          0.000000
Sum           0.000000
Min           0.000000
Max           0.000000
SrcAddr       0.000000
DstAddr       0.000000
Proto         0.000000
Sport         0.529946
Dport         0.529946
sTos          0.438520
sDSb          0.438520
sTtl          0.438520
sHops         0.438520
sIpId         0.438520
Cause         0.000000
TotPkts       0.000000
SrcPkts       0.000000
DstPkts       0.000000
TotBytes      0.000000
SrcBytes      0.000000
DstBytes      0.000000
Load          0.000000
SrcLoad       0.000000
DstLoad       0.000000
Loss          0.000000
SrcLoss       0.000000
DstLoss       0.000000
pLoss         0.000000
SrcGap        1.956163
DstGap        1.956163
Rate          0.000000
SrcRate       0.000000
DstRate       0.000000
Dir           0.000000
State         0.000000
TcpRtt        0.000000
SynAck        0.000000
AckDat     

### Remove any rows that contain Nans (provided the percentage of nans is not large <2%)

In [112]:
data2019_df = removeNanRows(data2019_df, ['SrcGap', 'DstGap', 'Sport', 'Dport', 'sIpId', 'sHops', 'sTtl', 'sDSb'])

### Convert all non-numeric fields to numbers

In [113]:
data2019_df["Sport"] = data2019_df["Sport"].apply(lambda x : convertPortToNum(x))
data2019_df["Dport"] = data2019_df["Dport"].apply(lambda x : convertPortToNum(x))
data2019_df["sIpId"] = data2019_df["sIpId"].apply(lambda x : convertPortToNum(x))
data2019_df["StartTime"] = data2019_df["StartTime"].apply(lambda x: createTimestamp(x))
data2019_df["LastTime"] = data2019_df["LastTime"].apply(lambda x: createTimestamp(x))
data2019_df["SrcAddr"] = data2019_df["SrcAddr"].apply(lambda x : convertToNum("ips", x, unique_vals))
data2019_df["DstAddr"] = data2019_df["DstAddr"].apply(lambda x : convertToNum("ips", x, unique_vals))
#data2019_df["Cause"] = data2019_df["Cause"].apply(lambda x : convertToNum("cause", x, unique_vals))
#data2019_df["State"] = data2019_df["State"].apply(lambda x : convertToNum("state", x, unique_vals))
#data2019_df["Flgs"] = data2019_df["Flgs"].apply(lambda x : convertToNum("flgs", x, unique_vals))
data2019_df["Dir"] = data2019_df["Dir"].apply(lambda x : convertToNum("dir", x, unique_vals))

### Add a new column representing whether the traffic is malicious or benign. 
0 is benign and 1 is malicious <br>
Go through time period of attack and set the value to 1. <br>
**Note** the time period was adjusted due to different time zones

In [114]:
data2019_df["Classification"] = 0
data2017_df["Classification"] = 0

In [115]:
first_attack = (data2019_df["StartTime"] >= createTimestamp("06-12-2019 02:20:00.00")) & (data2019_df["StartTime"] < createTimestamp("06-12-2019 03:30:00.00"))
data2019_df.loc[first_attack, "Classification"] = 1

second_attack = (data2019_df["StartTime"] >= createTimestamp("06-12-2019 04:30:00.00")) & (data2019_df["StartTime"] <= createTimestamp("06-12-2019 05:45:00.00"))
data2019_df.loc[second_attack, "Classification"] = 1

In [117]:
data_full_df = pd.concat([data2017_df, data2019_df], ignore_index=True, sort=False)
print(data_full_df.shape)

(6806361, 48)


## Need to convert the Flags to a one-hot encoding
Must be done manually due to the way it needs to be done.
Will need to load the 2017 and 2019 datasets before doing this.

In [118]:
unique_flags = []
def add_unique(flags_str):
    if flags_str not in unique_flags:
        unique_flags.append(flags_str)
        return flags_str
    
individual_flags = []
def extractIndividualFlags(unique_flags):
    for flag_str in unique_flags:
        flag_list = flag_str.split(" ")
        for flag in flag_list:
            if (flag != "") and (flag not in individual_flags):
                individual_flags.extend([char for char in flag if char not in individual_flags])
                
data_full_df["Flgs"].apply(lambda x : add_unique(x.strip()))
print(unique_flags)
extractIndividualFlags(unique_flags)
#Ugly but needs to be done manually. This is also a possible unique value.
individual_flags.append("*2")
print(individual_flags)

['e g', 'e', 'e i', 'e s', 'e r', 'e d', 'eU', 'e &', 'e     A', 'e *', '* *', '* d', '*', '* s', '* r']
['e', 'g', 'i', 's', 'r', 'd', 'U', '&', 'A', '*', '*2']


In [119]:
def add_new_columns(df, individual_flags):
    for flg in individual_flags:
        df["Flg-"+flg] = 0
    return df

add_new_columns(data_full_df, individual_flags)
print(data_full_df.head())

      StartTime      LastTime        Flgs       Dur   RunTime      IdleTime  \
0  1.497412e+09  1.497412e+09   e g        4.995715  4.995715  1.614704e+09   
1  1.497412e+09  1.497412e+09   e          4.991427  4.991427  1.614704e+09   
2  1.497412e+09  1.497412e+09   e g        4.999589  4.999589  1.614704e+09   
3  1.497412e+09  1.497412e+09   e          4.990616  4.990616  1.614704e+09   
4  1.497412e+09  1.497412e+09   e          4.999010  4.999010  1.614704e+09   

       Mean       Sum       Min       Max  ...  Flg-g  Flg-i  Flg-s  Flg-r  \
0  4.995715  4.995715  4.995715  4.995715  ...      0      0      0      0   
1  4.991427  4.991427  4.991427  4.991427  ...      0      0      0      0   
2  4.999589  4.999589  4.999589  4.999589  ...      0      0      0      0   
3  4.990616  4.990616  4.990616  4.990616  ...      0      0      0      0   
4  4.999010  4.999010  4.999010  4.999010  ...      0      0      0      0   

   Flg-d  Flg-U  Flg-&  Flg-A  Flg-*  Flg-*2  
0      0 

In [120]:
def convertFlagToNum(row, flg_str):
    flg_str = flg_str.replace(" ", "")
    for flg_char in flg_str:
        if flg_char != '*':
            row["Flg-" + flg_char] = 1
        else:
            if len(flg_str) == 1:
                row["Flg-*"] = 1
            else:
                if flg_str.count("*") == 2:
                    row["Flg-*"] = 1
                    row["Flg-*2"] = 1
                elif flg_str.index("*") != 0:
                    row["Flg-*2"] = 1
                elif flg_str.index("*") == 0:
                    row["Flg-*"] = 1
    return row
                    
data_full_df = data_full_df.apply(lambda x : convertFlagToNum(x, x.Flgs), axis=1)
            

In [121]:
data_full_df = data_full_df.drop(["Flgs"], axis=1)

In [122]:
cause_full = pd.get_dummies(data_full_df["Cause"])
data_full_df = data_full_df.drop(["Cause"], axis=1)
data_full_df = data_full_df.join(cause_full)

In [123]:
state_full = pd.get_dummies(data_full_df["State"])
data_full_df = data_full_df.drop(["State"], axis=1)
data_full_df = data_full_df.join(state_full)

In [132]:
cols = data_full_df.columns
cols_list = list(cols)
cols_list.remove("Classification")
cols_list.append("Classification")
data_full_df = data_full_df[cols_list]

In [134]:
data_full_df.to_csv("data_full_df.csv", index=False)

In [137]:
data_full_arr = data_full_df.to_numpy()
np.save("data_full_df.npy", data_full_arr)

### Convert to numpy arrays and save
This will allow us to actually only load the arrays and unloading everything else (should help with memory issues)

In [14]:
data2019_arr = data2019_df.to_numpy()
data2017_arr = data2017_df.to_numpy()

In [15]:
np.save('data2019_arr.npy', data2019_arr)
np.save('data2017_arr.npy', data2017_arr)

In [135]:
"""
This code is taken straight from: https://stackoverflow.com/questions/19201290/how-to-save-a-dictionary-to-a-file/32216025
"""

def save_obj(obj, name ):
    with open('obj/'+ name + '.pkl', 'wb+') as f:
        pickle.dump(obj, f, pickle.HIGHEST_PROTOCOL)

def load_obj(name ):
    with open('obj/' + name + '.pkl', 'rb') as f:
        return pickle.load(f)

In [17]:
save_obj(unique_vals, "unique_vals")

In [18]:
data2017_df.to_csv('data2017.csv', index=False)

In [19]:
data2019_df.to_csv('data2019.csv', index=False)

In [136]:
features = list(data_full_df.columns)
save_obj(features, 'features')

In [265]:
import json
ip_dict_reversed = {}
for ip, num in unique_vals["ips"].items():
    ip_dict_reversed[num] = ip

with open('ipdict.json', 'w+') as fp:
    json.dump(ip_dict_reversed, fp)

## Make Plots
Dataframes seem better suited to making plots - so I'll start making plots here. Might move this to a different Notebook eventually. 

In [130]:
data_df.columns

Index(['StartTime', 'LastTime', 'Flgs', 'Dur', 'RunTime', 'IdleTime', 'Mean',
       'Sum', 'Min', 'Max', 'SrcAddr', 'DstAddr', 'Proto', 'Sport', 'Dport',
       'sTos', 'sDSb', 'sTtl', 'sHops', 'sIpId', 'Cause', 'TotPkts', 'SrcPkts',
       'DstPkts', 'TotBytes', 'SrcBytes', 'DstBytes', 'Load', 'SrcLoad',
       'DstLoad', 'Loss', 'SrcLoss', 'DstLoss', 'pLoss', 'SrcGap', 'DstGap',
       'Rate', 'SrcRate', 'DstRate', 'Dir', 'State', 'TcpRtt', 'SynAck',
       'AckDat', 'Offset', 'sMeanPktSz', 'dMeanPktSz', 'Classification'],
      dtype='object')

In [245]:
data_df = pd.concat([data2017_df, data2019_df])
grouped_data = data_df.groupby(["Dport", "DstAddr", "Classification"]).agg({'TotBytes':['mean']})


In [246]:
grouped_data.columns = ["TotBytes"]
grouped_data = grouped_data.reset_index()
#grouped_data.to_csv("BytesPortAddr.csv", index=False)

In [256]:
grouped_data["CumBytes"] = grouped_data.groupby(['Dport', 'DstAddr'])['TotBytes'].transform('sum')
grouped_data["Percent"] = grouped_data["TotBytes"] / grouped_data["CumBytes"]
grouped_data.loc[grouped_data.Classification == 0, "Percent"] = 1 - grouped_data.loc[grouped_data.Classification == 0, "Percent"]
grouped_data = grouped_data.drop_duplicates(subset=["Dport", "DstAddr"])

In [258]:
grouped_data.to_csv("BytesPortAddrMal.csv", index=False)

In [257]:
grouped_data[(grouped_data["DstAddr"] == 0) & (grouped_data["Dport"] == 44818)]


Unnamed: 0,Dport,DstAddr,Classification,TotBytes,CumBytes,Percent
24578,44818,0,0,151688.358597,338525.773139,0.551915


In [159]:
grouped_data[grouped_data["DstAddr"] == 21]["Dport"].unique()

array([    0,     4, 49162, 49188, 49204, 49223, 49243, 49244, 49258,
       49304, 49336, 49394, 49439, 49440, 49460, 49481, 49498, 49502,
       49514, 49535, 49544, 49559, 49566, 49619, 49627, 49645, 49680,
       49691, 49791, 49794, 49796, 49808, 49842, 49849, 49855, 49942,
       50005, 50015, 50046, 50061, 50076, 50084, 50088, 50107, 50131,
       50153, 50160, 50213, 50261, 50276, 50294, 50312, 50316, 50353,
       50370, 50386, 50409, 50423, 50432, 50464, 50498, 50571, 50600,
       50640, 50645, 50661, 50676, 50678, 50690, 50709, 50710, 50739,
       50750, 50775, 50826, 50843, 50965, 50996, 51003, 51019, 51022,
       51030, 51040, 51085, 51090, 51116, 51138, 51180, 51183, 51222,
       51283, 51297, 51318, 51343, 51374, 51396, 51405, 51408, 51414,
       51418, 51419, 51434, 51440, 51457, 51461, 51464, 51475, 51531,
       51583, 51598, 51608, 51634, 51638, 51653, 51675, 51693, 51722,
       51724, 51736, 51770, 51779, 51788, 51799, 51831, 51832, 51855,
       51906, 51960,