## Import *.pcap par tshark, labellisaton par Suricata, ML

### Set-Up

In [30]:
# imports

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import logging
import collections
import seaborn as sns
from pprint import pprint
import os
import json

# import ipywidgets
# import warnings

# import pyshark
# import networkx as nx

# from sklearn.preprocessing import OrdinalEncoder, StandardScaler
# from sklearn.pipeline import Pipeline
# from sklearn.decomposition import PCA
# from sklearn.cluster import KMeans, DBSCAN
# from sklearn.manifold import TSNE
# from sklearn.metrics import pairwise_distances, silhouette_score
# from sklearn.mixture import GaussianMixture, BayesianGaussianMixture
# import umap

# from itertools import product

# PATH change to access library cyberlib
import sys
sys.path.append('/home/benjamin/Folders_Python/Cyber/libs')
import cyberlib as cbl

# to allow PyShark to run in Jupyter notebooks
# import nest_asyncio
# nest_asyncio.apply()

In [31]:
# logging set-up

lg = cbl.GetLogger('/home/benjamin/Folders_Python/Cyber/logs/pcap_labellisation.log')
logger = lg.get_custom_logger()

# start your engine
logger.info("-------- new run --------")

### Import *pcap by tshark, export to *.csv then DataFrame

In [32]:
# which *.pcap

DFNAME = 'smallFlows'

PCAPFILE = '/home/benjamin/Folders_Python/Cyber/data/input_pcaps/' + DFNAME + '.pcap'

In [33]:
# We use the tshark CLI to parse the *.pcap file and output a *.csv file for pandas
# doc here : https://www.wireshark.org/docs/man-pages/tshark.html

# for time exports : https://osqa-ask.wireshark.org/questions/30393/tshark-how-to-output-date-in-iso-format/
# NB : outputs times in UTC to avoid time zone mismatches
# -------> there is column 'Date' added in Wireshark preferences, with output in UTC day and time
# -------> output -t ud requested in tshark : forces otuput in UTC

# tshark :
# -r                :   reads the *.pcap
# -2, -R "tcp"      :   does 2 passes and keeps packets part of TCP conversations only
# -T fields         :   outputs a file with fields
# -E header=y       :   keeps the fields names on first row for pd.read_csv
# -E separator=,    :   for *.csv format
# -e <fields>       :   desired output fields
# -o                :   formats of the data in the fields

cli="tshark -r " + PCAPFILE + """ -2 \
    -R "tcp" \
    -T fields -E header=y -E separator=, \
    -e _ws.col.Date -t ud \
    -e frame.number \
    -e eth.src -e eth.dst \
    -e ip.src_host -e ip.dst_host \
    -e ip.len -e ip.hdr_len -e ip.ttl \
    -e tcp.srcport -e tcp.dstport -e tcp.stream -e tcp.len \
    -e tcp.seq -e tcp.ack -e tcp.hdr_len -e tcp.time_relative \
    -e tcp.time_delta \
    -e tcp.flags \
    -o 'gui.column.format:"No","%m","Date","%t","Source","%s","Destination","%d","Protocol","%p","Length","%L","Info","%i"' \
    > ~/Folders_Python/Cyber/data/input_pcaps/to_csv/test.csv"""

%time exit_code = os.system(cli)

if exit_code == 0:
    logger.info('Executed successfully *.pcap to *.csv translation with tshark')
else:
    logger.error('Error while using tshark to translate from *.pcap to *.csv')
    raise NameError('Error while using tshark to translate from *.pcap to *.csv')

CPU times: user 1.49 ms, sys: 131 µs, total: 1.62 ms
Wall time: 2.38 s


In [34]:
filename = '/home/benjamin/Folders_Python/Cyber/data/input_pcaps/to_csv/test.csv'

with open(file=filename, encoding='utf-8') as f:
    df_raw = pd.read_csv(
        f,
        header=0,               # using first row as columns names. they are exported by tshark -E header=y
        on_bad_lines='warn'     # if a line does not have the right length, skip it but warn me
        )

In [35]:
df_raw['DateTime'] = pd.to_datetime(df_raw['_ws.col.Date'], utc=True)

df_raw.drop(columns=['_ws.col.Date'], inplace=True)

In [36]:
df_raw

Unnamed: 0,frame.number,eth.src,eth.dst,ip.src_host,ip.dst_host,ip.len,ip.hdr_len,ip.ttl,tcp.srcport,tcp.dstport,tcp.stream,tcp.len,tcp.seq,tcp.ack,tcp.hdr_len,tcp.time_relative,tcp.time_delta,tcp.flags,DateTime
0,1,40:61:86:9a:f1:f5,00:1a:8c:15:f9:80,192.168.3.131,72.14.213.138,983,20,128,57011,80,0,943,1,1,20,0.000000,0.000000,0x0018,2011-01-25 18:52:22.484409+00:00
1,2,00:1a:8c:15:f9:80,40:61:86:9a:f1:f5,72.14.213.138,192.168.3.131,426,20,52,80,57011,0,386,1,944,20,0.029841,0.029841,0x0018,2011-01-25 18:52:22.514250+00:00
2,3,40:61:86:9a:f1:f5,00:1a:8c:15:f9:80,192.168.3.131,72.14.213.102,52,20,128,55950,80,1,0,0,0,32,0.000000,0.000000,0x0002,2011-01-25 18:52:22.708292+00:00
3,4,40:61:86:9a:f1:f5,00:1a:8c:15:f9:80,192.168.3.131,72.14.213.138,40,20,128,57011,80,0,0,944,387,20,0.229423,0.199582,0x0010,2011-01-25 18:52:22.713832+00:00
4,5,00:1a:8c:15:f9:80,40:61:86:9a:f1:f5,72.14.213.102,192.168.3.131,52,20,52,80,55950,1,0,0,1,32,0.018766,0.018766,0x0012,2011-01-25 18:52:22.727058+00:00
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
13703,13704,08:00:27:cc:3f:1b,52:54:00:12:35:02,10.0.2.15,65.55.15.244,40,20,128,2537,5480,407,0,5039,5738,20,71.195375,66.560501,0x0014,2011-01-25 18:57:20.768701+00:00
13704,13705,08:00:27:cc:3f:1b,52:54:00:12:35:02,10.0.2.15,207.46.105.186,40,20,128,2540,5480,409,0,398,93,20,70.606228,5.540471,0x0014,2011-01-25 18:57:20.768769+00:00
13705,13706,08:00:27:cc:3f:1b,52:54:00:12:35:02,10.0.2.15,96.17.8.49,40,20,128,2547,5480,419,0,496,8189,20,64.405045,64.259982,0x0014,2011-01-25 18:57:20.768861+00:00
13706,13707,08:00:27:cc:3f:1b,52:54:00:12:35:02,10.0.2.15,91.103.140.2,40,20,128,2546,5480,417,0,525,270,20,64.884164,64.357688,0x0014,2011-01-25 18:57:20.768911+00:00


In [37]:
# # ordinal encoding with Pandas

# columns_to_encode_as_ordinal = ['frame.number', 'eth.src', 'eth.dst', 'ip.src_host', 'ip.dst_host', 'tcp.flags']

# df_ord = pd.DataFrame()
# for c in columns_to_encode_as_ordinal:
#     codes, _ = pd.factorize(df_raw[c])
#     df_sup = pd.DataFrame(data={ c : list(codes) })
#     df_ord = pd.concat([df_ord, df_sup], axis=1)
    
# df = df_raw.drop(columns=columns_to_encode_as_ordinal)
# df.reset_index(drop=True)

# df = pd.concat([df, df_ord], axis=1)

# # columns_to_drop = ['TIMESTAMP_ts']
# # df.drop(columns=columns_to_drop, inplace=True)

In [38]:
df_raw.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 13708 entries, 0 to 13707
Data columns (total 19 columns):
 #   Column             Non-Null Count  Dtype              
---  ------             --------------  -----              
 0   frame.number       13708 non-null  int64              
 1   eth.src            13708 non-null  object             
 2   eth.dst            13708 non-null  object             
 3   ip.src_host        13708 non-null  object             
 4   ip.dst_host        13708 non-null  object             
 5   ip.len             13708 non-null  int64              
 6   ip.hdr_len         13708 non-null  int64              
 7   ip.ttl             13708 non-null  int64              
 8   tcp.srcport        13708 non-null  int64              
 9   tcp.dstport        13708 non-null  int64              
 10  tcp.stream         13708 non-null  int64              
 11  tcp.len            13708 non-null  int64              
 12  tcp.seq            13708 non-null  int64      

### Labellisation by Suricata

In [39]:
DATA_OUTPUT = '/home/benjamin/Folders_Python/Cyber/data/outputs'

cli="suricata -r " + PCAPFILE + " tcp " + " -l " + DATA_OUTPUT # + " -k none"

%time exit_code = os.system(cli)

if exit_code == 0:
    logger.info('Executed successfully *.pcap to EVE.json translation with suricata')
else:
    logger.error('Error while using suricata to analyse from *.pcap to EVE.json')
    raise NameError('Error while using suricata to analyse from *.pcap to EVE.json')

21/8/2023 -- 19:30:22 - <Notice> - This is Suricata version 6.0.10 RELEASE running in USER mode
21/8/2023 -- 19:30:22 - <Notice> - all 5 packet processing threads, 4 management threads initialized, engine started.
21/8/2023 -- 19:30:22 - <Notice> - Signal Received.  Stopping engine.
21/8/2023 -- 19:30:22 - <Notice> - Pcap-file module read 1 files, 13708 packets, 9135182 bytes
CPU times: user 6.71 ms, sys: 0 ns, total: 6.71 ms
Wall time: 319 ms


In [40]:
# !suricata -r /home/benjamin/Folders_Python/Cyber/data/input_pcaps/test.pcap -l /home/benjamin/Folders_Python/Cyber/data/outputs -k none

In [41]:
# Pandas provides a useful method – json_normalize – for normalizing nested JSON fields into dataframe. Resulting columns use dot notation to signify nested objects, similar to how Elasticsearch does it

SURICATA_EVE_LOG = "/home/benjamin/Folders_Python/Cyber/data/outputs/eve.json"

with open (SURICATA_EVE_LOG) as packets:
    df_log = pd.json_normalize(
        [json.loads(packet) for packet in packets],
        max_level=1
    )

In [42]:
df_log

Unnamed: 0,timestamp,flow_id,pcap_cnt,event_type,src_ip,src_port,dest_ip,dest_port,proto,tx_id,...,stats.decoder,stats.flow,stats.defrag,stats.flow_bypassed,stats.tcp,stats.detect,stats.app_layer,stats.http,stats.ftp,stats.file_store
0,2011-01-25T19:52:23.466591+0100,2.071268e+15,100.0,http,192.168.3.131,55953.0,65.55.206.209,80.0,TCP,0.0,...,,,,,,,,,,
1,2011-01-25T19:52:23.832607+0100,6.398239e+14,161.0,http,192.168.3.131,55959.0,65.55.5.231,80.0,TCP,0.0,...,,,,,,,,,,
2,2011-01-25T19:52:23.881606+0100,6.280213e+14,170.0,http,192.168.3.131,55960.0,206.108.207.139,80.0,TCP,0.0,...,,,,,,,,,,
3,2011-01-25T19:52:23.998656+0100,9.530794e+13,174.0,http,192.168.3.131,55962.0,65.55.5.232,80.0,TCP,0.0,...,,,,,,,,,,
4,2011-01-25T19:52:24.330674+0100,2.063820e+15,186.0,http,192.168.3.131,55966.0,63.215.202.48,80.0,TCP,0.0,...,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1868,2011-01-25T19:52:22.484409+0100,9.830949e+14,,flow,192.168.3.131,56090.0,65.54.95.75,80.0,TCP,,...,,,,,,,,,,
1869,2011-01-25T19:52:22.484409+0100,1.264875e+15,,flow,192.168.3.131,56406.0,65.54.95.68,80.0,TCP,,...,,,,,,,,,,
1870,2011-01-25T19:52:22.484409+0100,1.968835e+15,,flow,192.168.3.131,56457.0,65.54.95.68,80.0,TCP,,...,,,,,,,,,,
1871,2011-01-25T19:52:22.484409+0100,1.969576e+15,,flow,192.168.3.131,56069.0,65.54.95.75,80.0,TCP,,...,,,,,,,,,,


In [43]:
df_anomaly = df_log[df_log['event_type']=='anomaly']  # we keep only anomalies

In [44]:
df_anomaly.head(10)

Unnamed: 0,timestamp,flow_id,pcap_cnt,event_type,src_ip,src_port,dest_ip,dest_port,proto,tx_id,...,stats.decoder,stats.flow,stats.defrag,stats.flow_bypassed,stats.tcp,stats.detect,stats.app_layer,stats.http,stats.ftp,stats.file_store
8,2011-01-25T19:52:41.406229+0100,628021300000000.0,384.0,anomaly,192.168.3.131,55960.0,206.108.207.139,80.0,TCP,,...,,,,,,,,,,
9,2011-01-25T19:52:41.406421+0100,1419335000000000.0,385.0,anomaly,192.168.3.131,57038.0,74.217.50.10,80.0,TCP,,...,,,,,,,,,,
10,2011-01-25T19:52:41.414684+0100,628021300000000.0,386.0,anomaly,206.108.207.139,80.0,192.168.3.131,55960.0,TCP,,...,,,,,,,,,,
11,2011-01-25T19:52:41.414684+0100,628021300000000.0,386.0,anomaly,206.108.207.139,80.0,192.168.3.131,55960.0,TCP,,...,,,,,,,,,,
13,2011-01-25T19:52:41.414725+0100,628021300000000.0,387.0,anomaly,192.168.3.131,55960.0,206.108.207.139,80.0,TCP,,...,,,,,,,,,,
14,2011-01-25T19:52:41.488171+0100,1419335000000000.0,388.0,anomaly,74.217.50.10,80.0,192.168.3.131,57038.0,TCP,,...,,,,,,,,,,
18,2011-01-25T19:52:23.210582+0100,434539500000000.0,49.0,anomaly,72.14.213.147,443.0,192.168.3.131,52152.0,TCP,,...,,,,,,,,,,
33,2011-01-25T19:52:29.760963+0100,971831300000000.0,327.0,anomaly,72.14.213.105,443.0,192.168.3.131,57721.0,TCP,,...,,,,,,,,,,
34,2011-01-25T19:52:29.761451+0100,971831300000000.0,329.0,anomaly,192.168.3.131,57721.0,72.14.213.105,443.0,TCP,,...,,,,,,,,,,
39,2011-01-25T19:52:42.287953+0100,221414600000000.0,455.0,anomaly,72.14.213.102,443.0,192.168.3.131,52202.0,TCP,,...,,,,,,,,,,


In [45]:
df_anomaly['anomaly.event'].unique()

array(['stream.fin_out_of_window', 'stream.fin_but_no_session',
       'stream.pkt_invalid_ack', 'stream.fin_invalid_ack',
       'stream.est_packet_out_of_window', 'stream.reassembly_seq_gap',
       'UNABLE_TO_MATCH_RESPONSE_TO_REQUEST', 'stream.est_invalid_ack',
       'INVALID_RECORD_LENGTH', 'stream.rst_but_no_session',
       'INVALID_RECORD_TYPE', 'stream.pkt_retransmission',
       'stream.pkt_broken_ack'], dtype=object)

In [46]:
df_anomaly['DateTime'] = pd.to_datetime(df_anomaly['timestamp'],utc=True)

df_anomaly.drop(columns=['timestamp'], inplace=True)

df_anomaly.set_index(keys='DateTime', drop=False, inplace=True)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_anomaly['DateTime'] = pd.to_datetime(df_anomaly['timestamp'],utc=True)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_anomaly.drop(columns=['timestamp'], inplace=True)


In [47]:
df_anomaly

Unnamed: 0_level_0,flow_id,pcap_cnt,event_type,src_ip,src_port,dest_ip,dest_port,proto,tx_id,http.hostname,...,stats.flow,stats.defrag,stats.flow_bypassed,stats.tcp,stats.detect,stats.app_layer,stats.http,stats.ftp,stats.file_store,DateTime
DateTime,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2011-01-25 18:52:41.406229+00:00,6.280213e+14,384.0,anomaly,192.168.3.131,55960.0,206.108.207.139,80.0,TCP,,,...,,,,,,,,,,2011-01-25 18:52:41.406229+00:00
2011-01-25 18:52:41.406421+00:00,1.419335e+15,385.0,anomaly,192.168.3.131,57038.0,74.217.50.10,80.0,TCP,,,...,,,,,,,,,,2011-01-25 18:52:41.406421+00:00
2011-01-25 18:52:41.414684+00:00,6.280213e+14,386.0,anomaly,206.108.207.139,80.0,192.168.3.131,55960.0,TCP,,,...,,,,,,,,,,2011-01-25 18:52:41.414684+00:00
2011-01-25 18:52:41.414684+00:00,6.280213e+14,386.0,anomaly,206.108.207.139,80.0,192.168.3.131,55960.0,TCP,,,...,,,,,,,,,,2011-01-25 18:52:41.414684+00:00
2011-01-25 18:52:41.414725+00:00,6.280213e+14,387.0,anomaly,192.168.3.131,55960.0,206.108.207.139,80.0,TCP,,,...,,,,,,,,,,2011-01-25 18:52:41.414725+00:00
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2011-01-25 18:56:34.619292+00:00,1.350519e+15,12206.0,anomaly,65.55.15.244,80.0,192.168.3.131,57180.0,TCP,,,...,,,,,,,,,,2011-01-25 18:56:34.619292+00:00
2011-01-25 18:56:34.681663+00:00,6.563316e+14,12210.0,anomaly,65.55.206.9,80.0,192.168.3.131,57176.0,TCP,,,...,,,,,,,,,,2011-01-25 18:56:34.681663+00:00
2011-01-25 18:56:49.801957+00:00,8.755210e+14,12772.0,anomaly,192.168.3.131,57191.0,207.46.216.54,5480.0,TCP,,,...,,,,,,,,,,2011-01-25 18:56:49.801957+00:00
2011-01-25 18:56:49.816039+00:00,8.755210e+14,12773.0,anomaly,207.46.216.54,5480.0,192.168.3.131,57191.0,TCP,,,...,,,,,,,,,,2011-01-25 18:56:49.816039+00:00


In [48]:
# idée : 
# 1. parcourir les DateTime du sous-ensemble des anomalies détectées par Suricata
# 2. regarder s'il y a un paquet avec ce timestamp exact dans l'extraction tshark
# 3. si oui : flagger y=1 le paquet dans la df tshark (et rajouter les champs d'explication)
# 4. si non : logger une anomalie orpheline

In [49]:
df_raw2 = df_raw.copy()

ctr_anomalies = 0
df_raw2 = df_raw.set_index(keys='DateTime', drop=False)

In [50]:
df_raw2['y'] = 0

In [51]:
df_raw2['y'].sum()

0

In [52]:
for anomaly_datetime in df_anomaly.index:
    df_raw2.loc[anomaly_datetime, 'y'] = 1
    ctr_anomalies += 1

In [53]:
print(f'compté {ctr_anomalies} anomalies')

compté 250 anomalies


In [54]:
df_raw2

Unnamed: 0_level_0,frame.number,eth.src,eth.dst,ip.src_host,ip.dst_host,ip.len,ip.hdr_len,ip.ttl,tcp.srcport,tcp.dstport,tcp.stream,tcp.len,tcp.seq,tcp.ack,tcp.hdr_len,tcp.time_relative,tcp.time_delta,tcp.flags,DateTime,y
DateTime,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1
2011-01-25 18:52:22.484409+00:00,1,40:61:86:9a:f1:f5,00:1a:8c:15:f9:80,192.168.3.131,72.14.213.138,983,20,128,57011,80,0,943,1,1,20,0.000000,0.000000,0x0018,2011-01-25 18:52:22.484409+00:00,1
2011-01-25 18:52:22.514250+00:00,2,00:1a:8c:15:f9:80,40:61:86:9a:f1:f5,72.14.213.138,192.168.3.131,426,20,52,80,57011,0,386,1,944,20,0.029841,0.029841,0x0018,2011-01-25 18:52:22.514250+00:00,0
2011-01-25 18:52:22.708292+00:00,3,40:61:86:9a:f1:f5,00:1a:8c:15:f9:80,192.168.3.131,72.14.213.102,52,20,128,55950,80,1,0,0,0,32,0.000000,0.000000,0x0002,2011-01-25 18:52:22.708292+00:00,0
2011-01-25 18:52:22.713832+00:00,4,40:61:86:9a:f1:f5,00:1a:8c:15:f9:80,192.168.3.131,72.14.213.138,40,20,128,57011,80,0,0,944,387,20,0.229423,0.199582,0x0010,2011-01-25 18:52:22.713832+00:00,0
2011-01-25 18:52:22.727058+00:00,5,00:1a:8c:15:f9:80,40:61:86:9a:f1:f5,72.14.213.102,192.168.3.131,52,20,52,80,55950,1,0,0,1,32,0.018766,0.018766,0x0012,2011-01-25 18:52:22.727058+00:00,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2011-01-25 18:57:20.768701+00:00,13704,08:00:27:cc:3f:1b,52:54:00:12:35:02,10.0.2.15,65.55.15.244,40,20,128,2537,5480,407,0,5039,5738,20,71.195375,66.560501,0x0014,2011-01-25 18:57:20.768701+00:00,0
2011-01-25 18:57:20.768769+00:00,13705,08:00:27:cc:3f:1b,52:54:00:12:35:02,10.0.2.15,207.46.105.186,40,20,128,2540,5480,409,0,398,93,20,70.606228,5.540471,0x0014,2011-01-25 18:57:20.768769+00:00,0
2011-01-25 18:57:20.768861+00:00,13706,08:00:27:cc:3f:1b,52:54:00:12:35:02,10.0.2.15,96.17.8.49,40,20,128,2547,5480,419,0,496,8189,20,64.405045,64.259982,0x0014,2011-01-25 18:57:20.768861+00:00,0
2011-01-25 18:57:20.768911+00:00,13707,08:00:27:cc:3f:1b,52:54:00:12:35:02,10.0.2.15,91.103.140.2,40,20,128,2546,5480,417,0,525,270,20,64.884164,64.357688,0x0014,2011-01-25 18:57:20.768911+00:00,0


In [55]:
df_raw2['y'].sum()  # IL Y A DES DOUBLONS ! PLUSIEURS ANOMALIES PAR PAQUET

237

In [56]:
df_raw2.describe(include='all')

  df_raw2.describe(include='all')


Unnamed: 0,frame.number,eth.src,eth.dst,ip.src_host,ip.dst_host,ip.len,ip.hdr_len,ip.ttl,tcp.srcport,tcp.dstport,tcp.stream,tcp.len,tcp.seq,tcp.ack,tcp.hdr_len,tcp.time_relative,tcp.time_delta,tcp.flags,DateTime,y
count,13708.0,13708,13708,13708,13708,13708.0,13708.0,13708.0,13708.0,13708.0,13708.0,13708.0,13708.0,13708.0,13708.0,13708.0,13708.0,13708,13708,13708.0
unique,,7,7,106,110,,,,,,,,,,,,,8,13570,
top,,00:1a:8c:15:f9:80,40:61:86:9a:f1:f5,192.168.3.131,192.168.3.131,,,,,,,,,,,,,0x0010,2011-01-25 18:57:15.280863+00:00,
freq,,6166,6172,4257,6172,,,,,,,,,,,,,8622,4,
first,,,,,,,,,,,,,,,,,,,2011-01-25 18:52:22.484409+00:00,
last,,,,,,,,,,,,,,,,,,,2011-01-25 18:57:20.768972+00:00,
mean,6854.5,,,,,651.767435,20.0,111.141304,19262.756566,27147.845492,243.247593,611.089291,27912590.0,28167480.0,20.678144,14.842911,0.722714,,,0.017289
std,3957.30308,,,,,667.587111,0.0,60.890005,25557.152425,26963.813374,143.907153,668.202073,344955500.0,346337100.0,2.722977,37.033509,6.070659,,,0.130351
min,1.0,,,,,40.0,20.0,46.0,80.0,80.0,0.0,0.0,0.0,0.0,20.0,0.0,0.0,,,0.0
25%,3427.75,,,,,40.0,20.0,54.0,80.0,443.0,106.0,0.0,434.0,434.0,20.0,0.072679,2.6e-05,,,0.0


In [57]:
df_anomaly.index.unique()

DatetimeIndex(['2011-01-25 18:52:41.406229+00:00',
               '2011-01-25 18:52:41.406421+00:00',
               '2011-01-25 18:52:41.414684+00:00',
               '2011-01-25 18:52:41.414725+00:00',
               '2011-01-25 18:52:41.488171+00:00',
               '2011-01-25 18:52:23.210582+00:00',
               '2011-01-25 18:52:29.760963+00:00',
               '2011-01-25 18:52:29.761451+00:00',
               '2011-01-25 18:52:42.287953+00:00',
               '2011-01-25 18:52:49.989547+00:00',
               ...
               '2011-01-25 18:56:19.839352+00:00',
               '2011-01-25 18:56:19.885985+00:00',
               '2011-01-25 18:56:19.887076+00:00',
               '2011-01-25 18:56:34.602474+00:00',
               '2011-01-25 18:56:34.602617+00:00',
               '2011-01-25 18:56:34.619292+00:00',
               '2011-01-25 18:56:34.681663+00:00',
               '2011-01-25 18:56:49.801957+00:00',
               '2011-01-25 18:56:49.816039+00:00',
            