## Import *.pcap par tshark, labellisaton par Suricata, ML

### Set-Up

In [1]:
# imports

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import logging
import collections
import seaborn as sns
from pprint import pprint
import os
import json

# import ipywidgets
# import warnings

# import pyshark
# import networkx as nx

# from sklearn.preprocessing import OrdinalEncoder, StandardScaler
# from sklearn.pipeline import Pipeline
# from sklearn.decomposition import PCA
# from sklearn.cluster import KMeans, DBSCAN
# from sklearn.manifold import TSNE
# from sklearn.metrics import pairwise_distances, silhouette_score
# from sklearn.mixture import GaussianMixture, BayesianGaussianMixture
# import umap

# from itertools import product

# PATH change to access library cyberlib
import sys
sys.path.append('/home/benjamin/Folders_Python/Cyber/libs')
import cyberlib as cbl

# to allow PyShark to run in Jupyter notebooks
# import nest_asyncio
# nest_asyncio.apply()

In [2]:
# logging set-up

lg = cbl.GetLogger('/home/benjamin/Folders_Python/Cyber/logs/pcap_labellisation.log')
logger = lg.get_custom_logger()

# start your engine
logger.info("-------- new run --------")

### Import *pcap by tshark, export to *.csv then DataFrame

In [3]:
# which *.pcap

DFNAME = 'smallFlows'

PCAPFILE = '/home/benjamin/Folders_Python/Cyber/data/input_pcaps/' + DFNAME + '.pcap'

In [4]:
# We use the tshark CLI to parse the *.pcap file and output a *.csv file for pandas
# doc here : https://www.wireshark.org/docs/man-pages/tshark.html

# for time exports : https://osqa-ask.wireshark.org/questions/30393/tshark-how-to-output-date-in-iso-format/
# NB : outputs times in UTC to avoid time zone mismatches
# -------> there is column 'Date' added in Wireshark preferences, with output in UTC day and time
# -------> output -t ud requested in tshark : forces otuput in UTC

# tshark :
# -r                :   reads the *.pcap
# -2, -R "tcp"      :   does 2 passes and keeps packets part of TCP conversations only
# -T fields         :   outputs a file with fields
# -E header=y       :   keeps the fields names on first row for pd.read_csv
# -E separator=,    :   for *.csv format
# -e <fields>       :   desired output fields
# -o                :   formats of the data in the fields

cli="tshark -r " + PCAPFILE + """ -2 \
    -R "tcp" \
    -T fields -E header=y -E separator=, \
    -e _ws.col.Date -t ud \
    -e frame.number \
    -e eth.src -e eth.dst \
    -e ip.src_host -e ip.dst_host \
    -e ip.len -e ip.hdr_len -e ip.ttl \
    -e tcp.srcport -e tcp.dstport -e tcp.stream -e tcp.len \
    -e tcp.seq -e tcp.ack -e tcp.hdr_len -e tcp.time_relative \
    -e tcp.time_delta \
    -e tcp.flags \
    -o 'gui.column.format:"No","%m","Date","%t","Source","%s","Destination","%d","Protocol","%p","Length","%L","Info","%i"' \
    > ~/Folders_Python/Cyber/data/input_pcaps/to_csv/test.csv"""

%time exit_code = os.system(cli)

if exit_code == 0:
    logger.info('Executed successfully *.pcap to *.csv translation with tshark')
else:
    logger.error('Error while using tshark to translate from *.pcap to *.csv')
    raise NameError('Error while using tshark to translate from *.pcap to *.csv')

CPU times: user 1.61 ms, sys: 165 µs, total: 1.78 ms
Wall time: 2.52 s


In [5]:
filename = '/home/benjamin/Folders_Python/Cyber/data/input_pcaps/to_csv/test.csv'

with open(file=filename, encoding='utf-8') as f:
    df_raw = pd.read_csv(
        f,
        header=0,               # using first row as columns names. they are exported by tshark -E header=y
        on_bad_lines='warn'     # if a line does not have the right length, skip it but warn me
        )

In [6]:
df_raw['DateTime'] = pd.to_datetime(df_raw['_ws.col.Date'], utc=True)

df_raw.drop(columns=['_ws.col.Date'], inplace=True)

In [7]:
df_raw

Unnamed: 0,frame.number,eth.src,eth.dst,ip.src_host,ip.dst_host,ip.len,ip.hdr_len,ip.ttl,tcp.srcport,tcp.dstport,tcp.stream,tcp.len,tcp.seq,tcp.ack,tcp.hdr_len,tcp.time_relative,tcp.time_delta,tcp.flags,DateTime
0,1,40:61:86:9a:f1:f5,00:1a:8c:15:f9:80,192.168.3.131,72.14.213.138,983,20,128,57011,80,0,943,1,1,20,0.000000,0.000000,0x0018,2011-01-25 18:52:22.484409+00:00
1,2,00:1a:8c:15:f9:80,40:61:86:9a:f1:f5,72.14.213.138,192.168.3.131,426,20,52,80,57011,0,386,1,944,20,0.029841,0.029841,0x0018,2011-01-25 18:52:22.514250+00:00
2,3,40:61:86:9a:f1:f5,00:1a:8c:15:f9:80,192.168.3.131,72.14.213.102,52,20,128,55950,80,1,0,0,0,32,0.000000,0.000000,0x0002,2011-01-25 18:52:22.708292+00:00
3,4,40:61:86:9a:f1:f5,00:1a:8c:15:f9:80,192.168.3.131,72.14.213.138,40,20,128,57011,80,0,0,944,387,20,0.229423,0.199582,0x0010,2011-01-25 18:52:22.713832+00:00
4,5,00:1a:8c:15:f9:80,40:61:86:9a:f1:f5,72.14.213.102,192.168.3.131,52,20,52,80,55950,1,0,0,1,32,0.018766,0.018766,0x0012,2011-01-25 18:52:22.727058+00:00
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
13703,13704,08:00:27:cc:3f:1b,52:54:00:12:35:02,10.0.2.15,65.55.15.244,40,20,128,2537,5480,407,0,5039,5738,20,71.195375,66.560501,0x0014,2011-01-25 18:57:20.768701+00:00
13704,13705,08:00:27:cc:3f:1b,52:54:00:12:35:02,10.0.2.15,207.46.105.186,40,20,128,2540,5480,409,0,398,93,20,70.606228,5.540471,0x0014,2011-01-25 18:57:20.768769+00:00
13705,13706,08:00:27:cc:3f:1b,52:54:00:12:35:02,10.0.2.15,96.17.8.49,40,20,128,2547,5480,419,0,496,8189,20,64.405045,64.259982,0x0014,2011-01-25 18:57:20.768861+00:00
13706,13707,08:00:27:cc:3f:1b,52:54:00:12:35:02,10.0.2.15,91.103.140.2,40,20,128,2546,5480,417,0,525,270,20,64.884164,64.357688,0x0014,2011-01-25 18:57:20.768911+00:00


In [8]:
# # ordinal encoding with Pandas

# columns_to_encode_as_ordinal = ['frame.number', 'eth.src', 'eth.dst', 'ip.src_host', 'ip.dst_host', 'tcp.flags']

# df_ord = pd.DataFrame()
# for c in columns_to_encode_as_ordinal:
#     codes, _ = pd.factorize(df_raw[c])
#     df_sup = pd.DataFrame(data={ c : list(codes) })
#     df_ord = pd.concat([df_ord, df_sup], axis=1)
    
# df = df_raw.drop(columns=columns_to_encode_as_ordinal)
# df.reset_index(drop=True)

# df = pd.concat([df, df_ord], axis=1)

# # columns_to_drop = ['TIMESTAMP_ts']
# # df.drop(columns=columns_to_drop, inplace=True)

In [9]:
df_raw.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 13708 entries, 0 to 13707
Data columns (total 19 columns):
 #   Column             Non-Null Count  Dtype              
---  ------             --------------  -----              
 0   frame.number       13708 non-null  int64              
 1   eth.src            13708 non-null  object             
 2   eth.dst            13708 non-null  object             
 3   ip.src_host        13708 non-null  object             
 4   ip.dst_host        13708 non-null  object             
 5   ip.len             13708 non-null  int64              
 6   ip.hdr_len         13708 non-null  int64              
 7   ip.ttl             13708 non-null  int64              
 8   tcp.srcport        13708 non-null  int64              
 9   tcp.dstport        13708 non-null  int64              
 10  tcp.stream         13708 non-null  int64              
 11  tcp.len            13708 non-null  int64              
 12  tcp.seq            13708 non-null  int64      

### Labellisation by Suricata

In [10]:
DATA_OUTPUT = '/home/benjamin/Folders_Python/Cyber/data/outputs'

cli="suricata -r " + PCAPFILE + " tcp " + " -l " + DATA_OUTPUT # + " -k none"

%time exit_code = os.system(cli)

if exit_code == 0:
    logger.info('Executed successfully *.pcap to EVE.json translation with suricata')
else:
    logger.error('Error while using suricata to analyse from *.pcap to EVE.json')
    raise NameError('Error while using suricata to analyse from *.pcap to EVE.json')

21/8/2023 -- 18:07:50 - <Notice> - This is Suricata version 6.0.10 RELEASE running in USER mode
21/8/2023 -- 18:07:50 - <Notice> - all 5 packet processing threads, 4 management threads initialized, engine started.
21/8/2023 -- 18:07:50 - <Notice> - Signal Received.  Stopping engine.
21/8/2023 -- 18:07:50 - <Notice> - Pcap-file module read 1 files, 13708 packets, 9135182 bytes
CPU times: user 26 µs, sys: 4 ms, total: 4.03 ms
Wall time: 345 ms


In [11]:
# !suricata -r /home/benjamin/Folders_Python/Cyber/data/input_pcaps/test.pcap -l /home/benjamin/Folders_Python/Cyber/data/outputs -k none

In [12]:
# Pandas provides a useful method – json_normalize – for normalizing nested JSON fields into dataframe. Resulting columns use dot notation to signify nested objects, similar to how Elasticsearch does it

SURICATA_EVE_LOG = "/home/benjamin/Folders_Python/Cyber/data/outputs/eve.json"

with open (SURICATA_EVE_LOG) as packets:
    df_log = pd.json_normalize(
        [json.loads(packet) for packet in packets],
        max_level=1
    )

In [13]:
df_log

Unnamed: 0,timestamp,flow_id,pcap_cnt,event_type,src_ip,src_port,dest_ip,dest_port,proto,tls.subject,...,stats.decoder,stats.flow,stats.defrag,stats.flow_bypassed,stats.tcp,stats.detect,stats.app_layer,stats.http,stats.ftp,stats.file_store
0,2011-01-25T19:52:22.968559+0100,2.051404e+15,20.0,tls,192.168.3.131,52152.0,72.14.213.147,443.0,TCP,"C=US, ST=California, L=Mountain View, O=Google...",...,,,,,,,,,,
1,2011-01-25T19:52:23.466591+0100,7.151276e+14,100.0,http,192.168.3.131,55953.0,65.55.206.209,80.0,TCP,,...,,,,,,,,,,
2,2011-01-25T19:52:23.832607+0100,3.339836e+14,161.0,http,192.168.3.131,55959.0,65.55.5.231,80.0,TCP,,...,,,,,,,,,,
3,2011-01-25T19:52:23.762574+0100,7.354749e+13,156.0,http,192.168.3.131,55956.0,66.235.139.121,80.0,TCP,,...,,,,,,,,,,
4,2011-01-25T19:52:24.330674+0100,7.238914e+14,186.0,http,192.168.3.131,55966.0,63.215.202.48,80.0,TCP,,...,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
21645,2011-01-25T19:52:22.484409+0100,9.844585e+14,,flow,192.168.3.131,56161.0,66.235.139.121,80.0,TCP,,...,,,,,,,,,,
21646,2011-01-25T19:52:22.484409+0100,4.217620e+14,,flow,10.0.2.15,2550.0,64.4.35.57,61863.0,TCP,,...,,,,,,,,,,
21647,2011-01-25T19:52:22.484409+0100,2.251540e+15,,flow,192.168.3.131,58609.0,208.82.236.130,80.0,TCP,,...,,,,,,,,,,
21648,2011-01-25T19:52:22.484409+0100,8.442085e+14,,flow,192.168.3.131,56438.0,65.54.95.75,80.0,TCP,,...,,,,,,,,,,


In [14]:
df_anomaly = df_log[df_log['event_type']=='anomaly']  # we keep only anomalies

In [15]:
df_anomaly.head(10)

Unnamed: 0,timestamp,flow_id,pcap_cnt,event_type,src_ip,src_port,dest_ip,dest_port,proto,tls.subject,...,stats.decoder,stats.flow,stats.defrag,stats.flow_bypassed,stats.tcp,stats.detect,stats.app_layer,stats.http,stats.ftp,stats.file_store
121,2011-01-25T19:52:49.989547+0100,1987239000000000.0,1029.0,anomaly,192.168.3.131,56021.0,66.235.139.121,80.0,TCP,,...,,,,,,,,,,
135,2011-01-25T19:52:51.106661+0100,240630300000000.0,1242.0,anomaly,192.168.3.131,56048.0,66.220.149.32,80.0,TCP,,...,,,,,,,,,,
143,2011-01-25T19:52:51.228619+0100,1987239000000000.0,1272.0,anomaly,192.168.3.131,56021.0,66.235.139.121,80.0,TCP,,...,,,,,,,,,,
144,2011-01-25T19:52:51.228619+0100,1987239000000000.0,1272.0,anomaly,192.168.3.131,56021.0,66.235.139.121,80.0,TCP,,...,,,,,,,,,,
223,2011-01-25T19:53:16.796686+0100,240630300000000.0,2789.0,anomaly,192.168.3.131,56048.0,66.220.149.32,80.0,TCP,,...,,,,,,,,,,
236,2011-01-25T19:52:51.089635+0100,680866600000000.0,1240.0,anomaly,192.168.3.131,56053.0,66.235.133.62,80.0,TCP,,...,,,,,,,,,,
240,2011-01-25T19:53:16.701303+0100,1392425000000000.0,2786.0,anomaly,192.168.3.131,56134.0,66.235.133.62,80.0,TCP,,...,,,,,,,,,,
257,2011-01-25T19:52:51.436632+0100,680866600000000.0,1367.0,anomaly,192.168.3.131,56053.0,66.235.133.62,80.0,TCP,,...,,,,,,,,,,
263,2011-01-25T19:53:27.976501+0100,240630300000000.0,3362.0,anomaly,192.168.3.131,56048.0,66.220.149.32,80.0,TCP,,...,,,,,,,,,,
284,2011-01-25T19:53:27.762489+0100,1392425000000000.0,3348.0,anomaly,192.168.3.131,56134.0,66.235.133.62,80.0,TCP,,...,,,,,,,,,,


In [16]:
df_anomaly['anomaly.event'].unique()

array(['UNABLE_TO_MATCH_RESPONSE_TO_REQUEST', 'INVALID_RECORD_LENGTH',
       'INVALID_RECORD_TYPE', 'stream.reassembly_seq_gap',
       'stream.fin_but_no_session', 'stream.fin_out_of_window',
       'stream.pkt_invalid_ack', 'stream.fin_invalid_ack',
       'stream.est_packet_out_of_window', 'stream.est_invalid_ack',
       'stream.rst_but_no_session', 'stream.pkt_retransmission',
       'stream.pkt_broken_ack'], dtype=object)

In [17]:
df_anomaly['DateTime'] = pd.to_datetime(df_anomaly['timestamp'],utc=True)

df_anomaly.drop(columns=['timestamp'], inplace=True)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_anomaly['DateTime'] = pd.to_datetime(df_anomaly['timestamp'],utc=True)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_anomaly.drop(columns=['timestamp'], inplace=True)


In [18]:
df_anomaly

Unnamed: 0,flow_id,pcap_cnt,event_type,src_ip,src_port,dest_ip,dest_port,proto,tls.subject,tls.issuerdn,...,stats.flow,stats.defrag,stats.flow_bypassed,stats.tcp,stats.detect,stats.app_layer,stats.http,stats.ftp,stats.file_store,DateTime
121,1.987239e+15,1029.0,anomaly,192.168.3.131,56021.0,66.235.139.121,80.0,TCP,,,...,,,,,,,,,,2011-01-25 18:52:49.989547+00:00
135,2.406303e+14,1242.0,anomaly,192.168.3.131,56048.0,66.220.149.32,80.0,TCP,,,...,,,,,,,,,,2011-01-25 18:52:51.106661+00:00
143,1.987239e+15,1272.0,anomaly,192.168.3.131,56021.0,66.235.139.121,80.0,TCP,,,...,,,,,,,,,,2011-01-25 18:52:51.228619+00:00
144,1.987239e+15,1272.0,anomaly,192.168.3.131,56021.0,66.235.139.121,80.0,TCP,,,...,,,,,,,,,,2011-01-25 18:52:51.228619+00:00
223,2.406303e+14,2789.0,anomaly,192.168.3.131,56048.0,66.220.149.32,80.0,TCP,,,...,,,,,,,,,,2011-01-25 18:53:16.796686+00:00
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
21112,9.065564e+14,12772.0,anomaly,192.168.3.131,57191.0,207.46.216.54,5480.0,TCP,,,...,,,,,,,,,,2011-01-25 18:56:49.801957+00:00
21113,9.065564e+14,12773.0,anomaly,207.46.216.54,5480.0,192.168.3.131,57191.0,TCP,,,...,,,,,,,,,,2011-01-25 18:56:49.816039+00:00
21115,6.807356e+14,13694.0,anomaly,192.168.3.131,57229.0,204.14.234.85,80.0,TCP,,,...,,,,,,,,,,2011-01-25 18:57:19.584353+00:00
21116,6.807356e+14,13696.0,anomaly,204.14.234.85,80.0,192.168.3.131,57229.0,TCP,,,...,,,,,,,,,,2011-01-25 18:57:19.621772+00:00


In [19]:
# idée : 
# 1. parcourir les DateTime du sous-ensemble des anomalies détectées par Suricata
# 2. regarder s'il y a un paquet avec ce timestamp exact dans l'extraction tshark
# 3. si oui : flagger y=1 le paquet dans la df tshark (et rajouter les champs d'explication)
# 4. si non : logger une anomalie orpheline

In [29]:
df_raw['class'] = 0
ctr_anomalies = 0
df_raw2 = df_raw.set_index(keys='DateTime', drop=False)

In [37]:
df_raw2.head(10)

Unnamed: 0_level_0,frame.number,eth.src,eth.dst,ip.src_host,ip.dst_host,ip.len,ip.hdr_len,ip.ttl,tcp.srcport,tcp.dstport,tcp.stream,tcp.len,tcp.seq,tcp.ack,tcp.hdr_len,tcp.time_relative,tcp.time_delta,tcp.flags,DateTime,class
DateTime,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1
2011-01-25 18:52:22.484409+00:00,1,40:61:86:9a:f1:f5,00:1a:8c:15:f9:80,192.168.3.131,72.14.213.138,983,20,128,57011,80,0,943,1,1,20,0.0,0.0,0x0018,2011-01-25 18:52:22.484409+00:00,0
2011-01-25 18:52:22.514250+00:00,2,00:1a:8c:15:f9:80,40:61:86:9a:f1:f5,72.14.213.138,192.168.3.131,426,20,52,80,57011,0,386,1,944,20,0.029841,0.029841,0x0018,2011-01-25 18:52:22.514250+00:00,0
2011-01-25 18:52:22.708292+00:00,3,40:61:86:9a:f1:f5,00:1a:8c:15:f9:80,192.168.3.131,72.14.213.102,52,20,128,55950,80,1,0,0,0,32,0.0,0.0,0x0002,2011-01-25 18:52:22.708292+00:00,0
2011-01-25 18:52:22.713832+00:00,4,40:61:86:9a:f1:f5,00:1a:8c:15:f9:80,192.168.3.131,72.14.213.138,40,20,128,57011,80,0,0,944,387,20,0.229423,0.199582,0x0010,2011-01-25 18:52:22.713832+00:00,0
2011-01-25 18:52:22.727058+00:00,5,00:1a:8c:15:f9:80,40:61:86:9a:f1:f5,72.14.213.102,192.168.3.131,52,20,52,80,55950,1,0,0,1,32,0.018766,0.018766,0x0012,2011-01-25 18:52:22.727058+00:00,0
2011-01-25 18:52:22.727105+00:00,6,40:61:86:9a:f1:f5,00:1a:8c:15:f9:80,192.168.3.131,72.14.213.102,40,20,128,55950,80,1,0,1,1,20,0.018813,4.7e-05,0x0010,2011-01-25 18:52:22.727105+00:00,0
2011-01-25 18:52:22.727459+00:00,7,40:61:86:9a:f1:f5,00:1a:8c:15:f9:80,192.168.3.131,72.14.213.102,984,20,128,55950,80,1,944,1,1,20,0.019167,0.000354,0x0018,2011-01-25 18:52:22.727459+00:00,0
2011-01-25 18:52:22.747390+00:00,8,40:61:86:9a:f1:f5,00:1a:8c:15:f9:80,192.168.3.131,72.14.213.102,40,20,128,55950,80,1,0,945,1,20,0.039098,0.019931,0x0011,2011-01-25 18:52:22.747390+00:00,0
2011-01-25 18:52:22.748483+00:00,9,00:1a:8c:15:f9:80,40:61:86:9a:f1:f5,72.14.213.102,192.168.3.131,40,20,52,80,55950,1,0,1,945,20,0.040191,0.001093,0x0010,2011-01-25 18:52:22.748483+00:00,0
2011-01-25 18:52:22.761080+00:00,10,00:1a:8c:15:f9:80,40:61:86:9a:f1:f5,72.14.213.102,192.168.3.131,527,20,52,80,55950,1,487,1,945,20,0.052788,0.012597,0x0018,2011-01-25 18:52:22.761080+00:00,0


In [34]:
for anomaly_datetime in df_anomaly['DateTime']:
    try:
        print(anomaly_datetime)
        # print(ctr_anomalies)
        print(df_raw.loc[anomaly_datetime])
        ctr_anomalies += 1
    except KeyError as e:
        # logger.error('anomaly packet from Suricata not found')
        print('not found')
        continue

2011-01-25 18:52:49.989547+00:00
not found
2011-01-25 18:52:51.106661+00:00
not found
2011-01-25 18:52:51.228619+00:00
not found
2011-01-25 18:52:51.228619+00:00
not found
2011-01-25 18:53:16.796686+00:00
not found
2011-01-25 18:52:51.089635+00:00
not found
2011-01-25 18:53:16.701303+00:00
not found
2011-01-25 18:52:51.436632+00:00
not found
2011-01-25 18:53:27.976501+00:00
not found
2011-01-25 18:53:27.762489+00:00
not found
2011-01-25 18:53:46.585886+00:00
not found
2011-01-25 18:53:58.213105+00:00
not found
2011-01-25 18:54:12.388720+00:00
not found
2011-01-25 18:54:08.108897+00:00
not found
2011-01-25 18:54:13.140906+00:00
not found
2011-01-25 18:54:14.193016+00:00
not found
2011-01-25 18:53:16.279190+00:00
not found
2011-01-25 18:53:16.601206+00:00
not found
2011-01-25 18:53:16.601206+00:00
not found
2011-01-25 18:54:23.641375+00:00
not found
2011-01-25 18:54:24.312611+00:00
not found
2011-01-25 18:54:12.543820+00:00
not found
2011-01-25 18:54:35.113376+00:00
not found
2011-01-25 

Unnamed: 0,frame.number,eth.src,eth.dst,ip.src_host,ip.dst_host,ip.len,ip.hdr_len,ip.ttl,tcp.srcport,tcp.dstport,tcp.stream,tcp.len,tcp.seq,tcp.ack,tcp.hdr_len,tcp.time_relative,tcp.time_delta,tcp.flags,DateTime,class
0,1,40:61:86:9a:f1:f5,00:1a:8c:15:f9:80,192.168.3.131,72.14.213.138,983,20,128,57011,80,0,943,1,1,20,0.000000,0.000000,0x0018,2011-01-25 18:52:22.484409+00:00,0
1,2,00:1a:8c:15:f9:80,40:61:86:9a:f1:f5,72.14.213.138,192.168.3.131,426,20,52,80,57011,0,386,1,944,20,0.029841,0.029841,0x0018,2011-01-25 18:52:22.514250+00:00,0
2,3,40:61:86:9a:f1:f5,00:1a:8c:15:f9:80,192.168.3.131,72.14.213.102,52,20,128,55950,80,1,0,0,0,32,0.000000,0.000000,0x0002,2011-01-25 18:52:22.708292+00:00,0
3,4,40:61:86:9a:f1:f5,00:1a:8c:15:f9:80,192.168.3.131,72.14.213.138,40,20,128,57011,80,0,0,944,387,20,0.229423,0.199582,0x0010,2011-01-25 18:52:22.713832+00:00,0
4,5,00:1a:8c:15:f9:80,40:61:86:9a:f1:f5,72.14.213.102,192.168.3.131,52,20,52,80,55950,1,0,0,1,32,0.018766,0.018766,0x0012,2011-01-25 18:52:22.727058+00:00,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
13703,13704,08:00:27:cc:3f:1b,52:54:00:12:35:02,10.0.2.15,65.55.15.244,40,20,128,2537,5480,407,0,5039,5738,20,71.195375,66.560501,0x0014,2011-01-25 18:57:20.768701+00:00,0
13704,13705,08:00:27:cc:3f:1b,52:54:00:12:35:02,10.0.2.15,207.46.105.186,40,20,128,2540,5480,409,0,398,93,20,70.606228,5.540471,0x0014,2011-01-25 18:57:20.768769+00:00,0
13705,13706,08:00:27:cc:3f:1b,52:54:00:12:35:02,10.0.2.15,96.17.8.49,40,20,128,2547,5480,419,0,496,8189,20,64.405045,64.259982,0x0014,2011-01-25 18:57:20.768861+00:00,0
13706,13707,08:00:27:cc:3f:1b,52:54:00:12:35:02,10.0.2.15,91.103.140.2,40,20,128,2546,5480,417,0,525,270,20,64.884164,64.357688,0x0014,2011-01-25 18:57:20.768911+00:00,0


In [39]:
d = df_raw2['DateTime']
d.info()

<class 'pandas.core.series.Series'>
DatetimeIndex: 13708 entries, 2011-01-25 18:52:22.484409+00:00 to 2011-01-25 18:57:20.768972+00:00
Series name: DateTime
Non-Null Count  Dtype              
--------------  -----              
13708 non-null  datetime64[ns, UTC]
dtypes: datetime64[ns, UTC](1)
memory usage: 214.2 KB


In [40]:
df_raw2.index

DatetimeIndex(['2011-01-25 18:52:22.484409+00:00',
               '2011-01-25 18:52:22.514250+00:00',
               '2011-01-25 18:52:22.708292+00:00',
               '2011-01-25 18:52:22.713832+00:00',
               '2011-01-25 18:52:22.727058+00:00',
               '2011-01-25 18:52:22.727105+00:00',
               '2011-01-25 18:52:22.727459+00:00',
               '2011-01-25 18:52:22.747390+00:00',
               '2011-01-25 18:52:22.748483+00:00',
               '2011-01-25 18:52:22.761080+00:00',
               ...
               '2011-01-25 18:57:19.821523+00:00',
               '2011-01-25 18:57:19.821772+00:00',
               '2011-01-25 18:57:19.821805+00:00',
               '2011-01-25 18:57:20.768556+00:00',
               '2011-01-25 18:57:20.768654+00:00',
               '2011-01-25 18:57:20.768701+00:00',
               '2011-01-25 18:57:20.768769+00:00',
               '2011-01-25 18:57:20.768861+00:00',
               '2011-01-25 18:57:20.768911+00:00',
            

In [48]:
df_anomaly['DateTime']

121     2011-01-25 18:52:49.989547+00:00
135     2011-01-25 18:52:51.106661+00:00
143     2011-01-25 18:52:51.228619+00:00
144     2011-01-25 18:52:51.228619+00:00
223     2011-01-25 18:53:16.796686+00:00
                      ...               
21112   2011-01-25 18:56:49.801957+00:00
21113   2011-01-25 18:56:49.816039+00:00
21115   2011-01-25 18:57:19.584353+00:00
21116   2011-01-25 18:57:19.621772+00:00
21130   2011-01-25 18:52:22.484409+00:00
Name: DateTime, Length: 1400, dtype: datetime64[ns, UTC]