#  Notebook

In [1]:
import numpy as np
import random
import time
import glob
import os
import sys
import unittest
import collections
from collections import Counter




import dash
from dash.dependencies import Output, Input
import dash_core_components as dcc
import dash_html_components as html
import plotly.graph_objs as go
import plotly.express as px
from plotly.subplots import make_subplots

In [None]:
#%%timeit

#every list comprehension can be rewritten in for loop, but every for loop can’t be rewritten in the form of list comprehension.
def connected_hostnames(logpath, init_datetime, end_datetime, Hostname):
    connected_hosts=[]
    
    input_log=open(logpath)
    for line in input_log:
        #Check if within the interval
        if (init_datetime<= int(line.split()[0]) <= end_datetime):
            #check if the host initialized the connection and append the receiver if true.
            if(line.split()[1]==Hostname):
                connected_hosts.append(line.split()[2])
            #check if the host received the connection and append the initializer if true.
            elif (line.split()[2]==Hostname):
                connected_hosts.append(line.split()[1])
        #Exit early: finish the process if the interval is exceeded         
        elif ((int(line.split()[0]) > end_datetime )):
                break
    input_log.close()
    return collections.Counter(connected_hosts)
            
'''
def connected_hostnames_one_liner(filepath, init_datetime, end_datetime, Hostname):
    s=[line.split()[2] if (init_datetime <= int(line.split()[0]) <= end_datetime and line.split()[1]==Hostname) else line.split()[1] if (init_datetime<= int(line.split()[0]) <= end_datetime and line.split()[2]==Hostname) else None for line in reversed(list(open(filepath)))]
    return list(filter(None, s))
'''


In [None]:
%%timeit
connected_hostnames("data/input-file.txt",1565647205599,1565679364288, 'Jadon')

In [None]:
%%timeit
connected_hostnames_one_liner("data/input-file.txt",1565647205599,1565679364288, 'Jadon')

In [2]:
def connected_to(logpath, init_datetime, end_datetime, Hostname):
    hostnames=[]
    input_log=open(logpath)
    for line in reversed(list(input_log)):
        #print(''.join(['parsed line: ',line]))

        if (int(line.split()[0]) >= init_datetime and  int(line.split()[0])<= end_datetime and line.split()[2]==Hostname):
            #print(''.join(['----> considered line: ',line]))
            hostnames.append(line.split()[1])
            
        if(int(line.split()[0]) < init_datetime ):
            break
            
    #print('------------------ \n\n')
    input_log.close()
    return collections.Counter(hostnames)

In [3]:
def received_from(logpath, init_datetime, end_datetime, Hostname):
    hostnames=[]
    input_log=open(logpath)
    for line in reversed(list(input_log)):
        #print(''.join(['parsed line: ',line]))


        if (int(line.split()[0]) >= init_datetime and  int(line.split()[0])<= end_datetime and line.split()[1]==Hostname):
            #print(''.join(['----> considered line: ',line]))
            hostnames.append(line.split()[2])
            
        if(int(line.split()[0]) < init_datetime ):
            break
    #print('------------------ \n\n') 
    input_log.close()
    return collections.Counter(hostnames)

In [4]:
def generated_conn(logpath, init_datetime, end_datetime):
    hostnames=[]
    input_log=open(logpath)
    for line in reversed(list(input_log)):
        #print(''.join(['parsed line: ',line]))
        if (int(line.split()[0]) >= init_datetime and  int(line.split()[0])<= end_datetime ):
            #print(''.join(['----> considered line: ',line]))
            hostnames.append(line.split()[1])
        
        if(int(line.split()[0]) < init_datetime ):
            break
    
    #print('------------------ \n\n')
    input_log.close()
    return collections.Counter(hostnames)

In [None]:
'''
strings in Python are immutable, and the “+” operation involves creating a new string and copying the old content 
at each step. A more efficient approach would be to use the array module to modify the individual characters and 
then use the join() function to re-create your final string.
'''

def process_log_files(Hostname, past_time, log_ofo_time):

    
    #can achieve the same effect slightly faster by using while 1. This is a single jump operation, as it is a numerical comparison.
    while 1: 
        connected_hosts, received_hosts, active_hosts=Counter(),Counter(),Counter() 


        
        init_datetime=int((time.time()-past_time)*1000)
        end_datetime=int(time.time()*1000)
        past= time.time() - 5 # 5 seconds
        
        
        past_files=sorted( [ filename for filename in glob.glob("output/*.txt") if os.path.getmtime(filename)>=init_datetime/1000-log_ofo_time ] , key=os.path.getmtime)[::-1]
        
 
        for filename in past_files:
            connected_hosts+=connected_to(filename,init_datetime,end_datetime,Hostname)
            received_hosts+=received_from(filename,init_datetime,end_datetime,Hostname)
            active_hosts+=most_generated_conn(filename,init_datetime,end_datetime)
        
        '''
        ## Data transformation for display : 
            #converting 2d list into 1d , and consider multiple occurences by applying collection 
        connected_hosts=collections.Counter(sum(connected_hosts,[]))
        received_hosts=collections.Counter(sum(received_hosts,[]))
            #convert to collection to include other hosts if they have similar occurences as the first one.
        active_hosts= collections.Counter(sum(active_hosts,[]))
        '''
        active_hosts=[h for h in active_hosts.most_common() if h[1]==active_hosts.most_common(1)[0][1]]
            
        
        print(" ".join(['Hosts that connected to ', Hostname ,'in the last', str(past_time),'s are: ',str(connected_hosts),'\n']))
        print(" ".join(['Hosts that received connection from', Hostname ,'in the last', str(past_time),'s are: ',str(received_hosts),'\n']))
        print(" ".join(['the hostname that generated most connections in the last', str(past_time),'s is: ',  str(active_hosts),'\n']))
        
        print('--------------------------------\n\n')
        
        print(''.join(['It is :  ', time.strftime('%X %x'),'.  the next output is in ', str(past_time), ' s. \n']))
        time.sleep(past_time)
        
    
        

In [None]:
process_log_files('Hannibal', 5000 , 0 )

In [None]:
class NamesTestCase(unittest.TestCase):
        
# Test connected_hostnames() on short and long files
    def test_connected_hostnames_sf(self):
        result = connected_hostnames("data/input_test_case_1.txt",1607880434801,1607880438820, 'Steeve')
        self.assertEqual(result, {'Hanny': 1, 'Hannibal': 2})
        
    def test_connected_hostnames_lf(self):
        result = connected_hostnames("data/input-file.txt",1565647204351,1565733598341, 'Dristen')
        self.assertEqual(result, {'Aadison': 1, 'Wilkens': 1, 'Kahlina': 1, 'Alei': 1, 'Zhanasia': 1, 'Jamor': 1, 'Joy': 1})


# Test connected_to() on short and long files
    def test_connect_to_sf(self):
        result = connected_to("data/input_test_case_1.txt",1607880434801,1607880438820, 'Steeve')
        self.assertEqual(result, {'Hannibal': 1})

    def test_connect_to_lf(self):
        result = connected_to("data/input-file.txt",1565647204351,1565733598341, 'Jadon')
        self.assertEqual(result, {'Ahmya': 1, 'Kayleann': 1, 'Shainah': 1, 'Aniyah': 1, 'Eveleigh': 1, 'Caris': 1, 'Rahniya': 1, 'Remiel': 1})


# Test received_from() on short and long files   
    def test_received_from_sf(self):
        result = received_from("data/input_test_case_1.txt",1607880434801,1607880438820, 'Steeve')
        self.assertEqual(result, {'Hannibal': 1, 'Hanny': 1})

    def test_received_from_lf(self):
        result = received_from("data/input-file.txt",1565647204351,1565733598341, 'Dristen')
        self.assertEqual(result, {'Joy': 1, 'Jamor': 1, 'Zhanasia': 1, 'Alei': 1, 'Kahlina': 1, 'Wilkens': 1, 'Aadison': 1})

# Test generated_conn
    def test_generated_conn(self):
        result = generated_conn("data/input_test_case_1.txt",1607880434801,1607880438820)
        self.assertEqual(result, {'Hannibal': 3, 'Steeve': 2, 'Hanny': 1})

if __name__ == '__main__':
    unittest.main(argv=['first-arg-is-ignored'], exit=False)


In [None]:
connected_hostnames("data/input_test_case_1.txt",1607880434801,1607880438820, 'Steeve')

In [None]:
connected_hostnames("data/input-file.txt",1565647204351,1565733598341, 'Dristen')

In [None]:
connected_to("../data/input_test_case_1.txt",1607880434801,1607880438820, 'Steeve')

In [None]:
s=connected_to("../data/input-file.txt",1565647204351,1565733598341, 'Jadon')

In [None]:
s.values()

In [None]:
received_from("data/input_test_case_1.txt",1607880434801,1607880438820, 'Steeve')

In [None]:
received_from("data/input-file.txt",1565647204351,1565733598341, 'Dristen')

In [None]:
generated_conn("data/input_test_case_1.txt",1607880434801,1607880438820)

In [None]:
most_generated_conn("data/input-file.txt",1565647204351,1565733598341).most_common(1)

In [None]:

import logging
import threading
import time

def thread_function(name):
    logging.info("Thread %s: starting", name)
    time.sleep(2)
    logging.info("Thread %s: finishing", name)

if __name__ == "__main__":
    format = "%(asctime)s: %(message)s"
    logging.basicConfig(format=format, level=logging.INFO,
                        datefmt="%H:%M:%S")

    threads = list()
    for index in range(3):
        logging.info("Main    : create and start thread %d.", index)
        x = threading.Thread(target=thread_function, args=(index,))
        threads.append(x)
        x.start()

    for index, thread in enumerate(threads):
        logging.info("Main    : before joining thread %d.", index)
        thread.join()
        logging.info("Main    : thread %d done", index)

In [None]:
def generated_conn_dash(logpath, init_datetime, end_datetime):
    hostnames=[]
    input_log=open(logpath)
    for line in reversed(list(input_log)):
        #print(''.join(['parsed line: ',line]))
        if (int(line.split()[0]) >= init_datetime and  int(line.split()[0])<= end_datetime ):
            #print(''.join(['----> considered line: ',line]))
            hostnames.append(line.split()[1])
        
        if(int(line.split()[0]) < init_datetime ):
            break
    
    #print('------------------ \n\n')
    input_log.close()
    return hostnames

In [2]:

class Dashboard:
    def __init__:
        
external_stylesheets = ['https://codepen.io/chriddyp/pen/bWLwgP.css']

app = dash.Dash(__name__, external_stylesheets=external_stylesheets)

colors = {
    'background': '#111111',
    'text': '#7FDBFF'
}



app.layout = html.Div(style={'backgroundColor': colors['background']}, children=[
    html.H1(
        children='Network log analytics',
        style={
            'textAlign': 'center',
            'color': colors['text']
        }
    ),
    

    
    html.Div(
        [
            html.Div(
                [
                    html.H2("""Select a host:""",
                            style={'margin-right': '1em', 'color': colors['text']})
                ],
            ),

        dcc.Dropdown(
            id='hosts_dropdown',
            options=[
            {'label': 'Hannibal', 'value': 'Hannibal'},
            {'label': 'Hanny', 'value': 'Hanny'},
            {'label': 'Steeve', 'value': 'Steeve'}
            ],
            placeholder="Default value 'Hannibal'",
            value="Hannibal",
            style=dict(width='40%',display='inline-block')
                    )
        ],
            style={'display': 'flex', 'align-items': 'center'}
            ),

    dcc.Graph(id='live-graphs_host'),
    dcc.Interval(id='graph-update', interval=0.5*10000)
])


#global connected_hosts, received_hosts, active_hosts



@app.callback(
    Output("live-graphs_host", "figure"),
     Input(component_id='hosts_dropdown', component_property='value'),
     Input('graph-update', 'n_intervals'))
def update_output(value, interval):
        

    log_ofo_time =0
    init_datetime=int((time.time()-4)*1000)
    end_datetime=int(time.time()*1000)
    past_files=sorted( [ filename for filename in glob.glob("../output/*.txt") if os.path.getmtime(filename)>=init_datetime/1000-log_ofo_time ] , key=os.path.getmtime)[::-1]
    

    
    for filename in past_files:
        set_connected_hosts(connected_to(filename,init_datetime,end_datetime,value))
        set_received_hosts(received_from(filename,init_datetime,end_datetime,value))
        set_active_hosts(generated_conn(filename, init_datetime, end_datetime))

        
    fig = make_subplots( rows=2, cols=2,
    specs=[[{"type": "domain"}, {"type": "domain"}],
           [{"colspan": 2}, None]],
    subplot_titles=("Generated connections","received connections", "total number of connections of all hosts")
                       )

    fig.add_trace(go.Pie(labels=list(get_connected_hosts.keys()), values=list(get_connected_hosts.values()), textinfo='label+value', name='connected to', hole=.65),
              row=1, col=1)
    fig.add_trace(go.Pie(labels=list(get_received_hosts.keys()), values=list(get_received_hosts.values()), textinfo='label+value', name='received from',  hole=.65),
              row=1, col=2)
    fig.add_trace(go.Bar(x=list(get_active_hosts.keys()), y=list(get_active_hosts.values()), name="All connections", marker=dict(color='orange', coloraxis="coloraxis") ),
              row=2, col=1)
    
    #fig.update_layout(
    #title_text="Host that connected and received connection from the selected host")


    return fig
    

SyntaxError: invalid syntax (<ipython-input-2-3d6b79c71c23>, line 2)

In [None]:
app.run_server(host='0.0.0.0', port=8080,debug=True, use_reloader=False)

In [6]:
class Dashboard:
    def __init__(self):
        external_stylesheets = ['https://codepen.io/chriddyp/pen/bWLwgP.css']
        

        
        
        self.app = dash.Dash(__name__, external_stylesheets=external_stylesheets)
        
        app.callback(Output("live-graphs_host", "figure"),
                     Input(component_id='hosts_dropdown', component_property='value'),
                      Input('graph-update', 'n_intervals')) (update_output)

        colors = {
            'background': '#111111',
            'text': '#7FDBFF'
                }


        self.app.layout = html.Div(style={'backgroundColor': colors['background']}, 
            children=[
                html.H1(
                children='Network log analytics',
                    style={
                'textAlign': 'center',
                'color': colors['text']
                        }
                    ),
    

            html.Div(
            [
                html.Div(
                    [
                    html.H2("""Select a host:""",
                            style={'margin-right': '1em', 'color': colors['text']})
                    ],
            ),

            dcc.Dropdown(
                id='hosts_dropdown',
                options=[
                    {'label': 'Hannibal', 'value': 'Hannibal'},
                    {'label': 'Hanny', 'value': 'Hanny'},
                    {'label': 'Steeve', 'value': 'Steeve'}
                        ],
                placeholder="Default value 'Hannibal'",
                value="Hannibal",
                style=dict(width='40%',display='inline-block')
                    )
            ],
                style={'display': 'flex', 'align-items': 'center'}
            ),

            dcc.Graph(id='live-graphs_host'),
            dcc.Interval(id='graph-update', interval=0.5*10000)
            ])

        
    def set_connected_hosts(self,x):
        self.connected_hosts+=x

    def get_connected_hosts(self):
        return self.connected_hosts

    def set_received_hosts(self,x):
        self.received_hosts+=x

    def get_received_hosts(self):
        return self.received_hosts

    def set_active_hosts(self,x):
        self.active_hosts+=x

    def get_active_hosts(self):
        return self.active_hosts
    
    def get_app(self):
        return self.app




In [7]:

dsh = Dashboard()

app= dsh.get_app()

@app.callback(
    Output("live-graphs_host", "figure"),
     Input(component_id='hosts_dropdown', component_property='value'),
     Input('graph-update', 'n_intervals'))
def update_output(value, interval):
        
    log_ofo_time =0
    init_datetime=int((time.time()-4)*1000)
    end_datetime=int(time.time()*1000)
    past_files=sorted( [ filename for filename in glob.glob("../output/*.txt") if os.path.getmtime(filename)>=init_datetime/1000-log_ofo_time ] , key=os.path.getmtime)[::-1]
    

    
    for filename in past_files:
        set_connected_hosts(dsh,connected_to(filename,init_datetime,end_datetime,value))
        set_received_hosts(dsh,received_from(filename,init_datetime,end_datetime,value))
        set_active_hosts(dsh,generated_conn(filename, init_datetime, end_datetime))

        
    fig = make_subplots( rows=2, cols=2,
    specs=[[{"type": "domain"}, {"type": "domain"}],
           [{"colspan": 2}, None]],
    subplot_titles=("Generated connections","received connections", "total number of connections of all hosts")
                       )

    fig.add_trace(go.Pie(labels=list(dsh.get_connected_hosts().keys()), values=list(dsh.get_connected_hosts().values()), textinfo='label+value', name='connected to', hole=.65),
              row=1, col=1)
    fig.add_trace(go.Pie(labels=list(dsh.get_received_hosts().keys()), values=list(dsh.get_received_hosts().values()), textinfo='label+value', name='received from',  hole=.65),
              row=1, col=2)
    fig.add_trace(go.Bar(x=list(dsh.get_active_hosts().keys()), y=list(dsh.get_active_hosts().values()), name="All connections", marker=dict(color='orange', coloraxis="coloraxis") ),
              row=2, col=1)
    
    #fig.update_layout(
    #title_text="Host that connected and received connection from the selected host")


    return fig

NameError: name 'app' is not defined

In [20]:
app.run_server(host='0.0.0.0', port=8080,debug=True, use_reloader=False)

NameError: name 'app' is not defined

In [None]:
import plotly.graph_objects as go
from plotly.subplots import make_subplots

In [None]:


app.layout = html.Div(
    [
        html.Label(['File processign App']),
        dcc.Dropdown(
            id='hosts_dropdown',
            options=[
            {'label': 'Hannibal', 'value': 'Hannibal'},
            {'label': 'Hanny', 'value': 'Hanny'},
            {'label': 'Steeve', 'value': 'Steeve'}
            ],
        value="Hannibal",
        ),
        dcc.Graph(id='live-graphs_host'),
        dcc.Interval(id='graph-update', interval=0.5*10000)


    ]
)

In [None]:
'''

#when ‘n_intervals’ of the ‘graph-update’ changes, I will use the update_graph_scatter(input_data) function, to update the ‘figure’ of the ‘live_graph’.

@app.callback(Output('live-graph', 'figure'),
              [Input('graph-update', 'n_intervals')])
def update_graph_scatter(input_data):
    
    global hostnames
    log_ofo_time =0
    init_datetime=int((time.time()-4)*1000)
    end_datetime=int(time.time()*1000)
    past_files=sorted( [ filename for filename in glob.glob("../output/*.txt") if os.path.getmtime(filename)>=init_datetime/1000-log_ofo_time ] , key=os.path.getmtime)[::-1]
    


    
    for filename in past_files:
        hostnames=np.concatenate((hostnames,np.array(generated_conn_dash(filename, init_datetime, end_datetime))))
        #connected_hostss+=connected_to(filename,init_datetime,end_datetime,'Hannibal')
    
    
    unique, val=np.unique(hostnames, return_counts=True)
    figure = px.histogram(hostnames, range_y=[0, max(val)+10])
    return figure

'''

In [None]:

hosnamesss= Counter() 
init_datetime=int((time.time()-1000)*1000)
end_datetime=int(time.time()*1000)
past_files=sorted( [ filename for filename in glob.glob("../output/*.txt") if os.path.getmtime(filename)>=init_datetime/1000 ] , key=os.path.getmtime)[::-1]
    


    
for filename in past_files:
    #s=np.array(generated_conn_dash(filename, init_datetime, end_datetime))
    hosnamesss+=generated_conn(filename, init_datetime, end_datetime)
        #connected_hostss+=connected_to(filename,init_datetime,end_datetime,'Hannibal')

In [None]:
hosnamesss

In [None]:

list(hosnamesss.values())

In [None]:

go.B


In [None]:

random_x

In [None]:
connectedhosts=Counter()
log_ofo_time =0
init_datetime=int((time.time()-500)*1000)
end_datetime=int(time.time()*1000)
past_files=sorted( [ filename for filename in glob.glob("../output/*.txt") if os.path.getmtime(filename)>=init_datetime/1000-log_ofo_time ] , key=os.path.getmtime)[::-1]
    
for filename in past_files:
    #hostnames=np.concatenate((hostnames,np.array(generated_conn_dash(filename, init_datetime, end_datetime))))
    #connected_hostss+=connected_to(filename,init_datetime,end_datetime,'Hannibal')
    connectedhosts+=connected_to(filename,init_datetime,end_datetime,'Hannibal')
    

In [None]:
dict(connectedhosts).items()

In [None]:

s=pd.DataFrame(dict(connectedhosts).items(), columns=['Name', 'Value'])

In [None]:
s

In [None]:
s.pivot_table(s, values='Value', columns = 'Name')

In [None]:
px.pie(data_frame=s,values='Value',names='Name')

In [None]:
connectedhosts+=Counter({'Hanny': 12, 'Steeve': 3, 'Hannibal': 4})

In [None]:
np.array(list(connectedhosts.values()))

In [None]:
px.pie(list(connectedhosts.values()),labels=list(connectedhosts.keys()))

In [None]:
px.histogram(x=list(connected_hosts.keys()),y=list(connected_hosts.values()), range_y=[0, max(connected_hosts.values())+40])

In [None]:
len(np.unique(hosnamess))

In [None]:
input1='anass'

In [None]:
print(u'Input 1 {}'.format(input1))

In [None]:
unique, val=np.unique(hosnamess, return_counts=True)

In [None]:
val

In [None]:
hosnamess

In [None]:
np.histogram(list(hosnamess),bins=range(0, 60, 5))

In [None]:
for line in reversed(list(open('../output/log_1.txt'))):
        #print(''.join(['parsed line: ',line]))
        #if (int(line.split()[0]) >= int(1607948683153) and  int(line.split()[0])<= int(1607948883412) ):
            #print(''.join(['----> considered line: ',line]))
    #hostnames.append(line.split()[1])
    print(line)

In [None]:
    X.append(X[-1]+1)
    Y.append(Y[-1]+Y[-1]*random.uniform(-0.1,0.1))

    data = plotly.graph_objs.Scatter(
            x=list(X),
            y=list(Y),
            name='Scatter',
            mode= 'lines+markers'
            )

    return {'data': [data],'layout' : go.Layout(xaxis=dict(range=[min(X),max(X)]),
                                                yaxis=dict(range=[min(Y),max(Y)]),)}

In [None]:
import numpy as np

data = np.random.normal(3, 2, size=500)


In [None]:
hosts=['Hannibal', 'Hanny', 'Steeve']

x,y=random.sample(hosts, 2)

In [None]:
y


In [None]:
    '''
    return {
        'data': [go.Pie(labels=list(connected_hosts.keys()),
                        values=list(connected_hosts.values()),
                        #marker=dict(colors=colors),
                        textinfo='label+value',
  
                        hole=.7,
                        


                        )],}


        '''


                        