In [None]:
from elasticsearch import Elasticsearch
from pandas.io.json import json_normalize
import pandas as pd
import networkx as nx
import matplotlib.pyplot as plt
import re
import warnings
warnings.filterwarnings('ignore')

es = Elasticsearch(['127.0.0.1'], port=9200, timeout=480)


In [None]:
successful_logon_query=es.search(index='winlogbeat-*', 
    body = {
        'size': 3000,
        'query': {
            'bool': {
               'must': [
                    {"match" : {'source_name': "Microsoft-Windows-Security-Auditing" }},
                    {"match": {'event_id': 4624}},
                    {"match": {'event_data.LogonType': 3}}
               ],
               'must_not': [
                    {"match" : {'event_data.IpAddress': "::1" }},
                    {"match": {'event_data.TargetUserName': "ANONYMOUS LOGON"}}
               ],
               "filter": [
                    {"range" : {"@timestamp" : {"gt": "now-30d", "lt" : "now"}}}
               ]    
            }

        }
    })

sysmon_network_query=es.search(index='winlogbeat-*', body = {
    'size': 3000,
    'query': {
        'bool': {
           'must': [
               {"match": {'source_name': 'Microsoft-Windows-Sysmon'}},
               {"match": {'event_id': 3}}
           ],
           'must_not': [
                {"match" : {'event_data.User': "NT AUTHORITY\SYSTEM"}}
           ],
           "filter": [
                {"range" : {"@timestamp" : {"gt": "now-30d", "lt" : "now"}}}
           ]    
        }
        
    }
})

In [None]:
sl_normalized=json_normalize(map(lambda x: x["_source"],successful_logon_query['hits']['hits']))

In [None]:
sn_normalized=json_normalize(map(lambda x: x["_source"],sysmon_network_query['hits']['hits']))

In [None]:
sl_df=sl_normalized[sl_normalized["event_data.IpAddress"].str.contains('^(?:(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)\.){3}(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)$')]
sl_df=sl_df[~sl_df['event_data.TargetUserName'].str.contains('.*\$')]
sl_df['user'] = sl_df['event_data.TargetDomainName'] + '\\' + sl_df['event_data.TargetUserName']
sl_df.rename(columns={"event_data.IpAddress":"sourceip", "computer_name":"target"}, inplace=True)
sl_df_clean=sl_df[['sourceip', 'target', 'user', '@timestamp']].reset_index(drop=True)
sl_df_clean.head(10)

In [None]:
sn_df=sn_normalized[sn_normalized["event_data.SourceIp"].str.contains('^(?:(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)\.){3}(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)$')]
sn_df.rename(columns={"event_data.SourceHostname":"source","event_data.SourceIp":"sourceip", "event_data.DestinationIp":"targetip", "event_data.User":"user"}, inplace=True)
sn_df_clean=sn_df[['source', 'targetip', 'user', '@timestamp']]
sn_df_clean.reset_index(drop=True)
sn_df_clean.head(10)

In [None]:
sysmon_ip_computer_reference=sn_df[['sourceip', 'computer_name']]
sysmon_ip_computer_reference = (sysmon_ip_computer_reference.drop_duplicates()).reset_index(drop=True)
sysmon_ip_computer_reference

In [None]:
sl_df_merged=pd.merge(sl_df_clean,sysmon_ip_computer_reference,how="left",left_on="sourceip",right_on="sourceip")
sl_df_merged.head(10)

In [None]:
sn_df_merged=pd.merge(sn_df_clean,sysmon_ip_computer_reference,how="left",left_on="targetip",right_on="sourceip")
sn_df_merged.head(10)

In [None]:
sl_df_resolved=sl_df_merged[['computer_name','target', 'user', '@timestamp']][sl_df_merged['computer_name'].notnull()]
sl_df_resolved.rename(columns={'computer_name':"source"}, inplace=True)
sl_df_resolved.reset_index(drop=True)
sl_df_resolved.head(10)

In [None]:
sn_df_merged.computer_name.fillna(sn_df_merged.targetip, inplace=True)
sn_df_resolved=sn_df_merged[['source','computer_name', 'user', '@timestamp']]
sn_df_resolved.rename(columns={'computer_name':'target'}, inplace=True)
sn_df_resolved.head(10)

In [None]:
sl_sn_appended = sl_df_resolved.append(sn_df_resolved)
sl_sn_appended = sl_sn_appended[['source', 'target', 'user']]
sl_sn_appended = (sl_sn_appended.drop_duplicates()).reset_index(drop=True)
sl_sn_appended

In [None]:
sl_sn_merged = pd.merge(sl_df_resolved,sn_df_resolved,how="left",left_on="target",right_on="source")
sl_sn_merged.head(10)

In [None]:
sl_sn_merged['@timestamp_x'] = pd.to_datetime(sl_sn_merged['@timestamp_x'])
sl_sn_merged['@timestamp_y'] = pd.to_datetime(sl_sn_merged['@timestamp_y'])

sl_sn_merged = sl_sn_merged[sl_sn_merged.apply(lambda x: (x['@timestamp_y'] - x['@timestamp_x']) < pd.Timedelta("20sec") and x['user_y'] == x['user_x'], axis=1)]

In [None]:
sl_df_split = sl_sn_merged[['source_x', 'target_x', 'user_x']]
sl_df_split.rename(columns={'source_x':'source', 'target_x':'target', 'user_x':'user'}, inplace=True)
sl_df_split = (sl_df_split.drop_duplicates()).reset_index(drop=True)
sl_df_split

In [None]:
sn_df_split = sl_sn_merged[['source_y', 'target_y', 'user_y']]
sn_df_split.rename(columns={'source_y':'source', 'target_y':'target', 'user_y':'user'}, inplace=True)
sn_df_split = (sn_df_split.drop_duplicates()).reset_index(drop=True)
sn_df_split

In [None]:
sl_sn_split_appended = sl_df_split.append(sn_df_split)
sl_sn_split_appended['label'] = '1'
sl_sn_split_appended

In [None]:
src_dst_merged = pd.merge(sl_sn_appended,sl_sn_split_appended,how="left",left_on=["source","target", "user"],right_on=["source","target", "user"])
src_dst_merged

In [None]:
src_dst = src_dst_merged.fillna('0')
src_dst

In [None]:
computers = pd.Index(src_dst['source'].append(src_dst['target']).reset_index(drop=True).unique())
computers

In [None]:
temp_links_list = list(src_dst.apply(lambda row: {"source": row['source'], "target": row['target'], "user": row['user'], "label": row['label']}, axis=1))
temp_links_list

In [None]:
links = []
for link in temp_links_list:
    record = {"user":link['user'],"label":link['label'], "source":computers.get_loc(link['source']), "target":computers.get_loc(link['target'])}
    links.append(record)

In [None]:
links

In [None]:
nodes = []
for c in computers:
    nodes.append({"id":c})

In [None]:
nodes

In [None]:
json_network = {"nodes":nodes, "links":links}
json_network.keys()


In [None]:
import json
json_dump = json.dumps(json_network, indent=1, sort_keys=True)
print(json_dump)

In [None]:
filename = 'graph.json'
json_out = open(filename,'w')
json_out.write(json_dump)
json_out.close()

In [None]:
indices_network = nx.from_pandas_dataframe(src_dst, 'source', 'target', 'user')

In [None]:
i = nx.degree(indices_network)
i

In [None]:
nx.draw(indices_network, with_labels=True, graph_layout='shell', nodelist=i.keys(), node_size=[v*60 for v in i.values()])

In [None]:
plt.show()