<h1>Sysmon Threat Hunting</h1>

<h2>Importing Library</h2>

In [1]:
import evtx
import json
import pandas as pd

<h2>Reading and Parsing evtx File</h2>

In [3]:
evtx_file = "Microsoft-Windows-Sysmon_4Operational.evtx"

In [4]:
parser = evtx.PyEvtxParser(evtx_file)
parse_json = list(parser.records_json())

Using json.loads will convert the data into dictionary object. If you're more comfortable doing string processing, try using RegEx instead as the content of pj['data'] will be a string.

In [5]:
events = []
        
for pj in parse_json:
    event = json.loads(pj['data'].strip())
    events.append(event)

<h2>Checkout dictionary keys and values for later use</h2>

In [6]:
events[0]

{'Event': {'#attributes': {'xmlns': 'http://schemas.microsoft.com/win/2004/08/events/event'},
  'EventData': {'CommandLine': 'C:\\Windows\\system32\\wbem\\unsecapp.exe -Embedding',
   'Company': 'Microsoft Corporation',
   'CurrentDirectory': 'C:\\Windows\\system32\\',
   'Description': 'Sink to receive asynchronous callbacks for WMI client application',
   'FileVersion': '10.0.10240.16384 (th1.150709-1700)',
   'Hashes': 'MD5=D955EA7DA223A2CABE44D80C01D7E8B3,SHA256=AB9A7F997DA81BD7202C49BF0F40AB06B89FCAFDA9F91F89A0B186291890E099,IMPHASH=5B6ED51658B303434CC9DD19394BFD80',
   'Image': 'C:\\Windows\\System32\\wbem\\unsecapp.exe',
   'IntegrityLevel': 'System',
   'LogonGuid': 'D9EF9F5E-3C82-62A3-E703-000000000000',
   'LogonId': '0x3e7',
   'OriginalFileName': 'unsecapp.dll',
   'ParentCommandLine': 'C:\\Windows\\system32\\svchost.exe -k DcomLaunch',
   'ParentImage': 'C:\\Windows\\System32\\svchost.exe',
   'ParentProcessGuid': 'D9EF9F5E-3C83-62A3-0C00-000000000500',
   'ParentProcessId

<h2>Choosing fields and populating dataframe<h/2>

The Hashes key might have several value using different algorithm. If you want to use other hash type, edit the header and hash filter under the #Hash.

In [7]:
header = ['timestamp', 'computer_name',\
          'process_path', 'parent_path',\
          'command_line', 'parent_command_line',\
          'user', 'md5',\
          'sha256', 'company', 'description']
events_list = []
for evt in events:
    new_evt = []
    try:
        new_evt.append(evt['Event']['EventData']['UtcTime'])
        new_evt.append(evt['Event']['System']['Computer'])
        new_evt.append(evt['Event']['EventData']['Image'])
        new_evt.append(evt['Event']['EventData']['ParentImage'])
        new_evt.append(evt['Event']['EventData']['CommandLine'])
        new_evt.append(evt['Event']['EventData']['ParentCommandLine'])
        new_evt.append(evt['Event']['EventData']['User'])
        #Hash
        hashes=evt['Event']['EventData']['Hashes'].split(',')
        if 'MD5=' in evt['Event']['EventData']['Hashes']:
            for hsh in hashes:
                if hsh[:4]=='MD5=': new_evt.append(hsh[4:])
        else: new_evt.append('')
        if 'SHA256=' in evt['Event']['EventData']['Hashes']:
            for hsh in hashes:
                if hsh[:7]=='SHA256=': new_evt.append(hsh[7:])
        else: new_evt.append('')
        new_evt.append(evt['Event']['EventData']['Company'])
        new_evt.append(evt['Event']['EventData']['Description'])
        events_list.append(new_evt)
    except KeyError:
        pass

df = pd.DataFrame(events_list, columns=header)
df['timestamp'] = pd.to_datetime(df['timestamp'], format='%Y-%m-%d %H:%M:%S.%f')
df.head(5)

Unnamed: 0,timestamp,computer_name,process_path,parent_path,command_line,parent_command_line,user,md5,sha256,company,description
0,2022-06-10 12:43:55.309,DESKTOP-I3TFLV3,C:\Windows\System32\wbem\unsecapp.exe,C:\Windows\System32\svchost.exe,C:\Windows\system32\wbem\unsecapp.exe -Embedding,C:\Windows\system32\svchost.exe -k DcomLaunch,NT AUTHORITY\SYSTEM,D955EA7DA223A2CABE44D80C01D7E8B3,AB9A7F997DA81BD7202C49BF0F40AB06B89FCAFDA9F91F...,Microsoft Corporation,Sink to receive asynchronous callbacks for WMI...
1,2022-06-10 12:43:53.955,DESKTOP-I3TFLV3,C:\Program Files\Windows Defender\MsMpEng.exe,C:\Windows\System32\services.exe,"""C:\Program Files\Windows Defender\MsMpEng.exe""",C:\Windows\system32\services.exe,NT AUTHORITY\SYSTEM,C5A8CE5EC5BEC57370E86B209B493022,9644BCA5827C1296342A06497AD27B54D0329C05526F69...,Microsoft Corporation,Antimalware Service Executable
2,2022-06-10 12:43:53.233,DESKTOP-I3TFLV3,C:\Windows\Sysmon64.exe,C:\Windows\System32\services.exe,C:\Windows\Sysmon64.exe,C:\Windows\system32\services.exe,NT AUTHORITY\SYSTEM,DBB70DF036B6811F1328BB06BF8671FE,373061D73B6743651050749DBA958090A954939109FC51...,Sysinternals - www.sysinternals.com,System activity monitor
3,2022-06-10 12:43:50.799,DESKTOP-I3TFLV3,C:\Windows\System32\spoolsv.exe,C:\Windows\System32\services.exe,C:\Windows\System32\spoolsv.exe,C:\Windows\system32\services.exe,NT AUTHORITY\SYSTEM,58C17D92AD61EC7A98B05F4FAD0D205A,B881134A1BD9194145A9D18BDB34D57E2C167F06C2A936...,Microsoft Corporation,Spooler SubSystem App
4,2022-06-10 12:43:49.553,DESKTOP-I3TFLV3,C:\Windows\System32\svchost.exe,C:\Windows\System32\services.exe,C:\Windows\system32\svchost.exe -k LocalService,C:\Windows\system32\services.exe,NT AUTHORITY\LOCAL SERVICE,A1AEAFC58DF7803B8AA2B09EA93C722F,8A88E067E89D1DCFCAFD842C0CB7DE5DC7E6754447F206...,Microsoft Corporation,Host Process for Windows Services


<h2>Top software vendors active in the environment</h2>

In [8]:
top_procs = df.groupby(['company', 'description'])\
            .size()\
            .sort_values(ascending=False)\
            .reset_index(name='counts')
top_procs.head(10)

Unnamed: 0,company,description,counts
0,Mozilla Corporation,Firefox,50
1,Microsoft Corporation,Microsoft Malware Protection Command Line Utility,29
2,Microsoft Corporation,Windows Command Processor,27
3,Microsoft Corporation,Windows Activation Client,20
4,Microsoft Corporation,Interactive services detection,18
5,Microsoft Corporation,Windows host process (Rundll32),17
6,Microsoft Corporation,Task Scheduler Configuration Tool,17
7,Microsoft Corporation,Net Command,16
8,Microsoft Corporation,Windows Modules Installer Worker,16
9,Microsoft Corporation,Windows Modules Installer,14


<h2>Top process path and hash</h2>

In [9]:
top_procs = df.groupby(['process_path', 'md5'])\
            .size()\
            .sort_values(ascending=False)\
            .reset_index(name='counts')
top_procs.head(10)

Unnamed: 0,process_path,md5,counts
0,C:\Program Files (x86)\Mozilla Firefox\firefox...,9B6789127BE4361B486DF0D0BC297385,35
1,C:\Program Files\Windows Defender\MpCmdRun.exe,CFAC44E2F6522F4D40553BD50E977A1F,29
2,C:\Windows\System32\cmd.exe,A6177D080759CF4A03EF837A38F62401,27
3,C:\Windows\System32\slui.exe,244729BECF53AFAE4D548275AE000A55,20
4,C:\Windows\System32\UI0Detect.exe,C844E39B900FFA46CA8DD2BBA670A077,18
5,C:\Windows\System32\rundll32.exe,5DED2A3F11AE916C8F2724947E736261,17
6,C:\Program Files (x86)\Mozilla Firefox\firefox...,795E1FB584594101A0887EB9099EEE13,15
7,C:\Windows\servicing\TrustedInstaller.exe,62D6A900C5DFF2ECF131384E5A5C85AB,14
8,C:\Windows\System32\svchost.exe,A1AEAFC58DF7803B8AA2B09EA93C722F,12
9,C:\Windows\SysWOW64\schtasks.exe,3F024CC25B2264E0D90895F456956C51,12


<h2>Checking for procdump process in the environment</h2>

In [10]:
df[df['command_line'].str.find("procdump")>=0]

Unnamed: 0,timestamp,computer_name,process_path,parent_path,command_line,parent_command_line,user,md5,sha256,company,description
434,2022-06-14 02:43:23.979,DESKTOP-I3TFLV3,C:\Procdump\procdump64.exe,C:\Windows\System32\cmd.exe,procdump64.exe -accepteula -ma lsass.exe lsas...,"""C:\Windows\system32\cmd.exe""",DESKTOP-I3TFLV3\Win10Sysmon,8CC9C90598900CECB00192DA74163250,1A107C3ECE1880CBBDC0A6C0817624B0DD033B02EBAF7F...,Sysinternals - www.sysinternals.com,Sysinternals process dump utility
449,2022-06-14 02:41:09.015,DESKTOP-I3TFLV3,C:\Procdump\procdump64.exe,C:\Windows\System32\cmd.exe,procdump64.exe -accepteula -ma lsass.exe lsas...,"""C:\Windows\system32\cmd.exe""",DESKTOP-I3TFLV3\Win10Sysmon,8CC9C90598900CECB00192DA74163250,1A107C3ECE1880CBBDC0A6C0817624B0DD033B02EBAF7F...,Sysinternals - www.sysinternals.com,Sysinternals process dump utility


<h2>Creating another dataframe to check file creation event</h2>

Notice that there is an if clause filtering out specific EventID. You can use different EventID to look for different event type.

In [11]:
header = ['timestamp', 'computer_name',\
          'process_path', 'file_name',\
          'user']
file_creation_event = []
for evt in events:
    if evt['Event']['System']['EventID'] == 11:
        new_evt = []
        try:
            new_evt.append(evt['Event']['EventData']['UtcTime'])
            new_evt.append(evt['Event']['System']['Computer'])
            new_evt.append(evt['Event']['EventData']['Image'])
            new_evt.append(evt['Event']['EventData']['TargetFilename'])
            new_evt.append(evt['Event']['EventData']['User'])
            file_creation_event.append(new_evt)
        except KeyError:
            pass
df = pd.DataFrame(file_creation_event, columns=header)
df['timestamp'] = pd.to_datetime(df['timestamp'], format='%Y-%m-%d %H:%M:%S.%f')
df.tail(10)

Unnamed: 0,timestamp,computer_name,process_path,file_name,user
1938,2022-06-14 01:29:22.379,DESKTOP-I3TFLV3,C:\Windows\Explorer.EXE,C:\Users\Win10Sysmon\Downloads\Procdump\procdu...,DESKTOP-I3TFLV3\Win10Sysmon
1939,2022-06-14 01:29:21.199,DESKTOP-I3TFLV3,C:\Windows\Explorer.EXE,C:\Users\Win10Sysmon\Downloads\Procdump\procdu...,DESKTOP-I3TFLV3\Win10Sysmon
1940,2022-06-14 01:29:21.094,DESKTOP-I3TFLV3,C:\Windows\Explorer.EXE,C:\Users\Win10Sysmon\Downloads\Procdump,DESKTOP-I3TFLV3\Win10Sysmon
1941,2022-06-14 01:29:00.555,DESKTOP-I3TFLV3,C:\Program Files (x86)\Mozilla Firefox\firefox...,C:\Users\Win10Sysmon\Downloads\Procdump.zip:Zo...,DESKTOP-I3TFLV3\Win10Sysmon
1942,2022-06-14 01:28:59.838,DESKTOP-I3TFLV3,C:\Program Files (x86)\Mozilla Firefox\firefox...,C:\Users\Win10Sysmon\Downloads\Procdump.zip,DESKTOP-I3TFLV3\Win10Sysmon
1943,2022-06-14 01:28:59.824,DESKTOP-I3TFLV3,C:\Program Files (x86)\Mozilla Firefox\firefox...,C:\Users\Win10Sysmon\Downloads\_5RKuiDp.zip.part,DESKTOP-I3TFLV3\Win10Sysmon
1944,2022-06-14 01:28:59.823,DESKTOP-I3TFLV3,C:\Program Files (x86)\Mozilla Firefox\firefox...,C:\Users\Win10Sysmon\Downloads\_5RKuiDp.zip.part,DESKTOP-I3TFLV3\Win10Sysmon
1945,2022-06-14 02:43:24.012,DESKTOP-I3TFLV3,C:\Procdump\procdump64.exe,C:\Procdump\lsass_dump.dmp,DESKTOP-I3TFLV3\Win10Sysmon
1946,2022-06-14 02:41:09.060,DESKTOP-I3TFLV3,C:\Procdump\procdump64.exe,C:\Procdump\lsass_dump.dmp,DESKTOP-I3TFLV3\Win10Sysmon
1947,2022-06-14 03:46:49.355,DESKTOP-I3TFLV3,C:\Windows\Explorer.EXE,C:\Users\Win10Sysmon\Downloads\New Text Docume...,DESKTOP-I3TFLV3\Win10Sysmon
