In [1]:
import pandas as pd
data=pd.read_csv("https://690v.blob.core.windows.net/690v/byminutebuiltconnectioncount.csv")

In [2]:
import pandas as pd
from bokeh.plotting import figure
from bokeh.io import show,output_notebook
from bokeh.models import ColumnDataSource,HoverTool,OpenURL, TapTool

p = figure(width=800, height=350)
p.xaxis.major_tick_line_color = None
p.xaxis.minor_tick_line_color = None
p.title.text="Number of New Connections Over Time"
p.title.align='center'
p.xaxis.major_label_text_font_size = '0pt'
p.xaxis.axis_label='Date & Time'
p.yaxis.axis_label='Connections built'
source=ColumnDataSource(data=dict(xaxis=data['index'],yaxis=data['builtconnectioncount'],label=data['datetime'],incident=data['incidentnum']))
p.line(source=source,x='xaxis',y='yaxis', color='navy', alpha=0.5)
p.add_tools(HoverTool(tooltips=[('date & time',"@label"),('new connections',"@yaxis")]))
p.add_tools(TapTool())
url = "https://690v.blob.core.windows.net/690v/inc@incident.html"
taptool = p.select(type=TapTool)
taptool.callback = OpenURL(url=url)
output_notebook()
show(p)

The Number of New Connections Over Time plot shows how many new connections are created each minute and we can obviously see that there's a peak at 11:39 in 4/13 and lasted for about 10 minutes.
You can click on the plot to show corresponding source and target IP information.

In [3]:
data=pd.read_csv("https://690v.blob.core.windows.net/690v/incident1sourcetargetconnectionscount.csv")

In [4]:
from bokeh.plotting import figure, output_notebook, show,output_file
from bokeh.models import ColumnDataSource,HoverTool,FactorRange
import pandas as pd
#hover does not work with varing line widths so differentiate with line color.
#xs=[['source','target']]*387
uniqueiplist=list(set(list(data.source.unique())+list(data.target.unique())))
overridedict={}
for i,ip in enumerate(uniqueiplist):
    overridedict.update({i+1:ip})
xs=[[1,2]]*387
ys=[]
for x in data.iterrows():
    #s.append([x[1].source,x[1].target])
    ys.append([uniqueiplist.index(x[1].source)+1,uniqueiplist.index(x[1].target)+1])
#reducelinewidth=list(data.newconnections/100000)
#linewidth=data.newconnections.map(lambda x:5 if x<1000 else 10)
colors=data.newconnections.map(lambda x:'navy' if x<1000 else 'red')
source=ColumnDataSource(data=dict(xaxis=xs,yaxis=ys,label=list(data.newconnections),color=colors,start=list(data.source),end=list(data.target)))#,lw=linewidth))
#newplot=figure(width=800,height=400,x_range=['source','target'],y_range=list(set(list(data.source.unique())+list(data.target.unique()))))
newplot=figure(width=800,height=400,x_range=[0,3])
newplot.title.text="First Incident (peak 11:39~11:49)"
newplot.title.align='center'
newplot.xaxis.axis_label="Source and Target"
newplot.yaxis.axis_label="IP Address"
newplot.multi_line(source=source,xs='xaxis',ys='yaxis',color='color',line_width=2,alpha=0.6,line_cap='round',hover_line_alpha=1.0)
newplot.add_tools(HoverTool(show_arrow=False, line_policy='next',tooltips=[('connections',"@label"),('source&target',"@start-@end")]))
newplot.xaxis.major_label_overrides = { 1: "source", 2: "target" } 
newplot.yaxis.major_label_overrides = overridedict
newplot.xaxis.ticker = [1,2]
newplot.yaxis.ticker = list(range(1,166))
output_notebook()
#output_file("d:\inc1.html")
show(newplot)

We can observe from the above plot that majority of the sources contributing to the connections peak is outside the organization  and they are all target the external web server at almost the same time. This indicates that a potential DOS attack happened.

In [23]:
import numpy as np
data=pd.read_csv('https://690v.blob.core.windows.net/690v/IDSbyminutecount.csv')
#unique rules
#'(portscan) TCP Portscan'
#'(portscan) TCP Portsweep',
#'(portscan) TCP Decoy Portscan',
#'(portscan) TCP Distributed Portscan',
#'(spp_frag3) Fragmentation overlap',
#'(snort_decoder): Tcp Window Scale Option found with length > 14',
#'COMMUNITY SIP TCP/IP message flooding directed to SIP proxy',
#'(http_inspect) BARE BYTE UNICODE ENCODING'
datapart1=data.copy()
datapart1["count"] = np.where(datapart1["rule"] == '(portscan) TCP Portscan',datapart1["count"],0)
datapart2=data.copy()
datapart2["count"] = np.where(datapart2["rule"] == '(portscan) TCP Portsweep',datapart2["count"],0)
datapart3=data.copy()
datapart3["count"] = np.where(datapart3["rule"] == '(portscan) TCP Decoy Portscan',datapart3["count"],0)
datapart4=data.copy()
datapart4["count"] = np.where(datapart4["rule"] == '(portscan) TCP Distributed Portscan',datapart4["count"],0)
datapart5=data.copy()
datapart5["count"] = np.where(datapart5["rule"] == '(spp_frag3) Fragmentation overlap',datapart5["count"],0)
datapart6=data.copy()
datapart6["count"] = np.where(datapart6["rule"] == '(snort_decoder): Tcp Window Scale Option found with length > 14',datapart6["count"],0)
datapart7=data.copy()
datapart7["count"] = np.where(datapart7["rule"] == 'COMMUNITY SIP TCP/IP message flooding directed to SIP proxy',datapart7["count"],0)
datapart8=data.copy()
datapart8["count"] = np.where(datapart8["rule"] == '(http_inspect) BARE BYTE UNICODE ENCODING',datapart8["count"],0)

In [24]:
p = figure(width=800, height=250)
p.xaxis.major_tick_line_color = None
p.xaxis.minor_tick_line_color = None
p.title.text="Intrusions Detected Over Time"
p.title.align='center'
p.xaxis.major_label_text_font_size = '0pt'
p.xaxis.axis_label='Date & Time'
p.yaxis.axis_label='Number of Incidents'
source1=ColumnDataSource(data=dict(xaxis=datapart1['index'],yaxis=datapart1['count'],label=datapart1['datetime'],rule=datapart1['rule']))
source2=ColumnDataSource(data=dict(xaxis=datapart2['index'],yaxis=datapart2['count'],label=datapart2['datetime'],rule=datapart2['rule']))
source3=ColumnDataSource(data=dict(xaxis=datapart3['index'],yaxis=datapart3['count'],label=datapart3['datetime'],rule=datapart3['rule']))
source4=ColumnDataSource(data=dict(xaxis=datapart4['index'],yaxis=datapart4['count'],label=datapart4['datetime'],rule=datapart4['rule']))
source5=ColumnDataSource(data=dict(xaxis=datapart5['index'],yaxis=datapart5['count'],label=datapart5['datetime'],rule=datapart5['rule']))
source6=ColumnDataSource(data=dict(xaxis=datapart6['index'],yaxis=datapart6['count'],label=datapart6['datetime'],rule=datapart6['rule']))
source7=ColumnDataSource(data=dict(xaxis=datapart7['index'],yaxis=datapart7['count'],label=datapart7['datetime'],rule=datapart7['rule']))
source8=ColumnDataSource(data=dict(xaxis=datapart8['index'],yaxis=datapart8['count'],label=datapart8['datetime'],rule=datapart8['rule']))
p.line(source=source1,x='xaxis',y='yaxis', color='navy',alpha=0.5, line_width=2,legend='(portscan) TCP Portscan')
p.line(source=source2,x='xaxis',y='yaxis', color='red', alpha=0.5,line_width=2,legend='(portscan) TCP Portsweep')
p.line(source=source3,x='xaxis',y='yaxis', color='green', alpha=1,line_width=2,legend='(portscan) TCP Decoy Portscan')
p.line(source=source4,x='xaxis',y='yaxis', color='yellow', alpha=1,line_width=2,legend='(portscan) TCP Distributed Portscan')
p.line(source=source5,x='xaxis',y='yaxis', color='black', alpha=1,line_width=2,legend='(spp_frag3) Fragmentation overlap')
p.line(source=source6,x='xaxis',y='yaxis', color='orange', alpha=1,line_width=2,legend='(snort_decoder): Tcp Window Scale Option found with length > 14')
p.line(source=source7,x='xaxis',y='yaxis', color='blue', alpha=1,line_width=2,legend='COMMUNITY SIP TCP/IP message flooding directed to SIP proxy')
p.line(source=source8,x='xaxis',y='yaxis', color='purple', alpha=1,line_width=2,legend='(http_inspect) BARE BYTE UNICODE ENCODING')
p.legend.background_fill_alpha = 0
p.add_tools(HoverTool(tooltips=[('incident type',"@rule"),('date & time',"@label"),('incident count',"@yaxis")]))
output_notebook()
show(p)

We can observe from the plot that the most notable instrusions detected is "TCP window scale option found with length >14". The rest of the errors could be ignored if compared with the TCP window scale error. So we treat this error seperately in the below plot.

In [5]:
import numpy as np
data=pd.read_csv('https://690v.blob.core.windows.net/690v/IDS_type6bysourcetargetcount.csv')
#xs=[['source','target']]*994
uniqueiplist=list(set(list(data.source.unique())+list(data.target.unique())))
overridedict={}
for i,ip in enumerate(uniqueiplist):
    overridedict.update({i+1:ip})
xs=[[1,2]]*994
ys=[]
for x in data.iterrows():
    #ys.append([x[1].source,x[1].target])
    ys.append([uniqueiplist.index(x[1].source)+1,uniqueiplist.index(x[1].target)+1])
#reducelinewidth=list(data.newconnections/100000)
#linewidth=data.newconnections.map(lambda x:5 if x<1000 else 10)
colors=data.connections.map(lambda x:'navy' if x<1000 else 'red')
source=ColumnDataSource(data=dict(xaxis=xs,yaxis=ys,label=list(data.connections),color=colors,start=list(data.source),end=list(data.target)))#,lw=linewidth))
#newplot=figure(width=800,height=400,x_range=['source','target'],y_range=list(set(list(data.source.unique())+list(data.target.unique()))))
newplot=figure(width=800,height=400,x_range=[0,3])
newplot.title.text="Type 6 Intrusion (Orange in Intrusions Detected Graph) Sources "
newplot.title.align='center'
newplot.xaxis.axis_label="Source and Target"
newplot.yaxis.axis_label="IP Address"
newplot.yaxis.major_label_text_font_size = '0pt'
newplot.multi_line(source=source,xs='xaxis',ys='yaxis',color='color',line_width=1,alpha=0.6,line_cap='round',hover_line_alpha=1.0)
newplot.add_tools(HoverTool(show_arrow=False, line_policy='next',tooltips=[('connections',"@label"),('source&target',"@start-@end")]))
newplot.xaxis.major_label_overrides = { 1: "source", 2: "target" } 
newplot.yaxis.major_label_overrides = overridedict
newplot.xaxis.ticker = [1,2]
#newplot.yaxis.ticker = list(range(1,995))
output_notebook()
show(newplot)

The source is highly concentrated to a few IP addreses and the target includes almost all machines in the network. 
It looks like someone is doing a scanning of the whole network.

In [8]:
from bokeh.io import show, output_notebook
from bokeh.plotting import figure
from bokeh.models import GraphRenderer, StaticLayoutProvider, Circle,HoverTool
from bokeh.palettes import Spectral8
import pandas as pd

node=pd.read_csv('https://690v.blob.core.windows.net/690v/network_node.csv')
edge=pd.read_csv('https://690v.blob.core.windows.net/690v/network_edge.csv')


N = 403
node_indices = list(range(N))
plot = figure(width=800,height=255,x_range=(0,243), y_range=(0,5))
plot.title.text="Connections Made in Network"
plot.title.align='center'

graph = GraphRenderer()

graph.node_renderer.glyph = Circle(size=7, fill_color="fill_color")
graph.node_renderer.data_source.data = dict(
    index=node['index'],
    fill_color=node['fill_color'],
    label=node['node'],
    nodetype=node['type'])

graph.edge_renderer.data_source.data = dict(
    start=edge['start'],
    end=edge['end'])

x = list(node.x)
y = list(node.y)

graph_layout = dict(zip(node_indices, zip(x, y)))
graph.layout_provider = StaticLayoutProvider(graph_layout=graph_layout)

plot.renderers.append(graph)
plot.add_tools(HoverTool(tooltips=[('IP',"@label"),('Machine Category',"@nodetype")]))

output_notebook()
show(plot)

E-1001 (BAD_COLUMN_NAME): Glyph refers to nonexistent column name: fill_color [renderer: GlyphRenderer(id='544a3dcc-56aa-4b8b-a684-d6fd876d449f', ...)]


We can see from this network plot that the shipping/routing databse and DNS server are accessed by outsiders so there's potential information leakage to outsiders.
We also noticed that the source IP address of one network connection does not belong to any of the company's network group and it indicates that this IP could be a fake one which is used by hackers.