# A Basic Exploratory Analysis - APT 29 Detection Hackathon!!

### 1. Importing Python Libraries

In [1]:
from pyspark.sql import SparkSession

### 2. Initializing SparkSession

In [2]:
spark = SparkSession.builder.getOrCreate()
spark.conf.set("spark.sql.caseSensitive", "true")

### 3. Importing Dataset

In [3]:
!unzip datasets/day1/apt29_evals_day1_manual.zip

Archive:  apt29_evals_day1_manual.zip
  inflating: apt29_evals_day1_manual_2020-05-01225525.json  


In [4]:
df = spark.read.json('apt29_evals_day1_manual_2020-05-01225525.json')

### 4. Creating a Temporary SQL View

In [5]:
df.createTempView("apt29Table")

### 5) What processes have been created based on **Sysmon 1: Process Creation**?

In [6]:
process = spark.sql(
    '''
SELECT Image, count(*) as count
FROM apt29Table
WHERE Channel = "Microsoft-Windows-Sysmon/Operational"
    AND EventID = 1
GROUP BY Image
ORDER BY count ASC
                          ''')
process.show(40, truncate = 70)

+----------------------------------------------------------------------+-----+
|                                                                 Image|count|
+----------------------------------------------------------------------+-----+
|        C:\WindowsAzure\Packages\GuestAgent\WindowsAzureGuestAgent.exe|    1|
|                                       C:\Windows\System32\rdpclip.exe|    1|
|                                        C:\Windows\System32\hostui.exe|    1|
|                                      C:\Windows\System32\userinit.exe|    1|
|                                        C:\Windows\System32\ctfmon.exe|    1|
|C:\Windows\SystemApps\Microsoft.Windows.Cortana_cw5n1h2txyewy\Searc...|    1|
|                               C:\WindowsAzure\Packages\WaAppAgent.exe|    1|
|                                       C:\Windows\System32\TSTheme.exe|    1|
|                                 C:\Windows\System32\SearchIndexer.exe|    1|
|C:\Windows\SystemApps\Microsoft.Windows.StartMenuEx

### 6) What is the process related to "ProgramData\victim"?

In [7]:
processVictim = spark.sql(
    '''
SELECT UtcTime, ProcessGuid, ProcessId, Image, Description, CommandLine, CurrentDirectory,
        ParentProcessGuid, ParentProcessId, ParentImage, ParentCommandLine
FROM apt29Table
WHERE Channel = "Microsoft-Windows-Sysmon/Operational"
    AND EventID = 1
    AND Image LIKE "%3aka3%"
                          ''')
processVictim.show(truncate = False, vertical = True)

-RECORD 0--------------------------------------------------------
 UtcTime           | 2020-05-02 02:55:56.157                     
 ProcessGuid       | {47ab858c-e13c-5eac-a903-000000000400}      
 ProcessId         | 8524                                        
 Image             | C:\ProgramData\victim\â€®cod.3aka3.scr      
 Description       | -                                           
 CommandLine       | "C:\ProgramData\victim\â€®cod.3aka3.scr" /S 
 CurrentDirectory  | C:\ProgramData\victim\                      
 ParentProcessGuid | {47ab858c-dac4-5eac-f202-000000000400}      
 ParentProcessId   | 4440                                        
 ParentImage       | C:\Windows\explorer.exe                     
 ParentCommandLine | C:\windows\Explorer.EXE                     



### 7) Does ProcessId = 8524 have any child processes?

In [8]:
process8524 = spark.sql(
    '''
SELECT UtcTime, ParentProcessId, ProcessId, Image, CommandLine
FROM apt29Table
WHERE Channel = "Microsoft-Windows-Sysmon/Operational"
    AND EventID = 1
    AND ParentProcessId = 8524
                          ''')
process8524.show(truncate = 40, vertical = False)

+-----------------------+---------------+---------+-------------------------------+----------------------------------------+
|                UtcTime|ParentProcessId|ProcessId|                          Image|                             CommandLine|
+-----------------------+---------------+---------+-------------------------------+----------------------------------------+
|2020-05-02 02:56:04.494|           8524|     5156|C:\Windows\System32\conhost.exe|\\?\C:\windows\system32\conhost.exe -...|
|2020-05-02 02:56:04.510|           8524|     2772|    C:\Windows\System32\cmd.exe|           "C:\windows\system32\cmd.exe"|
|2020-05-02 02:57:12.374|           8524|     3152|C:\Windows\System32\conhost.exe|\\?\C:\windows\system32\conhost.exe -...|
|2020-05-02 02:57:12.376|           8524|     3480|    C:\Windows\System32\cmd.exe|           "C:\windows\system32\cmd.exe"|
+-----------------------+---------------+---------+-------------------------------+----------------------------------------+


### 8) What are the events related to ProcessId = 8524?

In [9]:
Process8524 = spark.sql(
    '''
SELECT EventID, count(*) as count
FROM apt29Table
WHERE Channel = "Microsoft-Windows-Sysmon/Operational" 
    AND ProcessId = 8524
GROUP BY EventID
ORDER BY count DESC
                          ''')
Process8524.show(truncate = False)

+-------+-----+
|EventID|count|
+-------+-----+
|7      |65   |
|12     |9    |
|17     |6    |
|18     |6    |
|10     |4    |
|22     |2    |
|1      |1    |
|11     |1    |
|5      |1    |
|3      |1    |
+-------+-----+



### 9) What event from "Sysmon 3: Network Connection" are related to ProcessId = 8524?

In [10]:
networkConnection8524 = spark.sql(
    '''
SELECT UtcTime, ProcessGuid, ProcessId, Image, SourceIp, SourcePort, DestinationIp, DestinationPort
FROM apt29Table
WHERE Channel = "Microsoft-Windows-Sysmon/Operational"
    AND EventID = 3
    AND ProcessId = 8524
                          ''')
networkConnection8524.show(truncate = False, vertical = True)

-RECORD 0-------------------------------------------------
 UtcTime         | 2020-05-02 02:55:59.631                
 ProcessGuid     | {47ab858c-e13c-5eac-a903-000000000400} 
 ProcessId       | 8524                                   
 Image           | C:\ProgramData\victim\â€®cod.3aka3.scr 
 SourceIp        | 10.0.1.4                               
 SourcePort      | 59835                                  
 DestinationIp   | 192.168.0.5                            
 DestinationPort | 1234                                   



### 10) How can we put everything TOGETHER?

In [11]:
networkConnection8524 = spark.sql(
    '''
SELECT o.`@timestamp`, o.ProcessId, a.ParentImage, o.Image, o.SourceIp, o.DestinationIp, o.DestinationPort
FROM apt29Table o
INNER JOIN (
    SELECT Description, CommandLine, CurrentDirectory, ProcessGuid, ParentImage
    FROM apt29Table
    WHERE Channel = "Microsoft-Windows-Sysmon/Operational"
        AND EventID = 1
        AND ParentImage LIKE "%explorer%"
    ) a
ON o.ProcessGuid = a.ProcessGuid
WHERE Channel = "Microsoft-Windows-Sysmon/Operational"
        AND EventID = 3
                          ''')
networkConnection8524.show(truncate = False, vertical = True)

-RECORD 0-------------------------------------------------
 @timestamp      | 2020-05-02T02:56:02.783Z               
 ProcessId       | 8524                                   
 ParentImage     | C:\Windows\explorer.exe                
 Image           | C:\ProgramData\victim\â€®cod.3aka3.scr 
 SourceIp        | 10.0.1.4                               
 DestinationIp   | 192.168.0.5                            
 DestinationPort | 1234                                   

