In [None]:
import re
import json
import pandas as pd
import math

### detect techniques in eventlogs

In [None]:
# final regexs eventlogs
tech_regexs = {
    'T1003.002': [
        r"^.*\b5857,LogAlways,Microsoft-Windows-WMI-Activity,\b.*\b0x4000000000000000\b.*$",
        r"^.*\b2010,Info,Microsoft-Windows-Windows Defender,\b.*\bClassic\b.*$",
    ],
    'T1053.005': [
        r"^.*\b11,Info,Microsoft-Windows-Sysmon,\b.*\bClassic\b.*$",
    ],
    'T1569.002': [
        r"^.*\b13,Info,Microsoft-Windows-Sysmon,\b.*\bClassic\b.*$",
        r"^.*\b11,Info,Microsoft-Windows-Sysmon,\b.*\bClassic\b.*$",
        r"^.*\b7009,Error,Service Control Manager,\b.*\bAudit success, classic\b.*$",
        r"^.*\b7000,Error,Service Control Manager,\b.*\bAudit success, classic\b.*$",
        r"^.*\b400,Info,PowerShell,\b.*\b0x80000000000000\b.*$",
        r"^.*\b600,Info,PowerShell,\b.*\b0x80000000000000\b.*$",
        r"^.*\b40962,Info,Microsoft-Windows-PowerShell,\b.*\b0x0\b.*$",
        r"^.*\b53504,Info,Microsoft-Windows-PowerShell,\b.*\b0x0\b.*$",
        r"^.*\b1002,Warning,Microsoft-Windows-KnownFolders,\b.*\bClassic\b.*$",
        r"^.*\b403,Info,PowerShell,\b.*\b0x80000000000000\b.*$",
        r"^.*\b40961,Info,Microsoft-Windows-PowerShell,\b.*\b0x0\b.*$",
        r"^.*\b7045,Info,Service Control Manager,\b.*\bAudit success, classic\b.*$",
    ],
    'T1082': [
        r"^.*\b5857,LogAlways,Microsoft-Windows-WMI-Activity,\b.*\b0x4000000000000000\b.*$",
        r"^.*\b5858,Error,Microsoft-Windows-WMI-Activity,\b.*\b0x4000000000000000\b.*$",
    ],
    'T1036': [
        r"^.*\b11,Info,Microsoft-Windows-Sysmon,\b.*\bClassic\b.*$",
        r"^.*\b5857,LogAlways,Microsoft-Windows-WMI-Activity,\b.*\b0x4000000000000000\b.*$",
        r"^.*\b400,Info,PowerShell,\b.*\b0x80000000000000\b.*$",
        r"^.*\b600,Info,PowerShell,\b.*\b0x80000000000000\b.*$",
        r"^.*\b40962,Info,Microsoft-Windows-PowerShell,\b.*\b0x0\b.*$",
        r"^.*\b53504,Info,Microsoft-Windows-PowerShell,\b.*\b0x0\b.*$",
        r"^.*\b403,Info,PowerShell,\b.*\b0x80000000000000\b.*$",
        r"^.*\b40961,Info,Microsoft-Windows-PowerShell,\b.*\b0x0\b.*$",
    ],
    'T1090.001': [
        r"^.*\b11,Info,Microsoft-Windows-Sysmon,\b.*\bClassic\b.*$",
        r"^.*\b400,Info,PowerShell,\b.*\b0x80000000000000\b.*$",
        r"^.*\b600,Info,PowerShell,\b.*\b0x80000000000000\b.*$",
        r"^.*\b40962,Info,Microsoft-Windows-PowerShell,\b.*\b0x0\b.*$",
        r"^.*\b53504,Info,Microsoft-Windows-PowerShell,\b.*\b0x0\b.*$",
        r"^.*\b403,Info,PowerShell,\b.*\b0x80000000000000\b.*$",
        r"^.*\b40961,Info,Microsoft-Windows-PowerShell,\b.*\b0x0\b.*$",
    ],
    'T1078.001': [
        r"^.*\b4798,LogAlways,Microsoft-Windows-Security-Auditing,\b.*\bAudit success\b.*$",
        r"^.*\b4722,LogAlways,Microsoft-Windows-Security-Auditing,\b.*\bAudit success\b.*$",
        r"^.*\b4738,LogAlways,Microsoft-Windows-Security-Auditing,\b.*\bAudit success\b.*$",
    ],
    'T1112-1': [
        r"^.*\b13,Info,Microsoft-Windows-Sysmon,\b.*\bClassic\b.*$",
    ],
    'T1562': [
        r"^.*\b13,Info,Microsoft-Windows-Sysmon,\b.*\bClassic\b.*$",
    ],
    'T1112-4': [
        r"^.*\b11,Info,Microsoft-Windows-Sysmon,\b.*\bClassic\b.*$",
        r"^.*\b400,Info,PowerShell,\b.*\b0x80000000000000\b.*$",
        r"^.*\b600,Info,PowerShell,\b.*\b0x80000000000000\b.*$",
        r"^.*\b40962,Info,Microsoft-Windows-PowerShell,\b.*\b0x0\b.*$",
        r"^.*\b53504,Info,Microsoft-Windows-PowerShell,\b.*\b0x0\b.*$",
        r"^.*\b403,Info,PowerShell,\b.*\b0x80000000000000\b.*$",
        r"^.*\b40961,Info,Microsoft-Windows-PowerShell,\b.*\b0x0\b.*$",
    ]
}

data = pd.read_csv("../csv_artifacts/TEST3/EventLogs/EventLogs_output_TEST3.csv")
sorted = data.sort_values(by=['TimeCreated'])
sorted.to_csv('../csv_artifacts/TEST3/EventLogs/EventLogs_output_TEST3_sorted.csv')

# params for sliding window
num_rows_to_read = 70
num_rows_to_move = 60
minimum_regexs = 1
result_dict = dict()

with open("../csv_artifacts/TEST3/EventLogs/EventLogs_output_TEST3_sorted.csv", 'r', newline='') as file:
    lines = file.read().splitlines()
    lines.pop(0)
    print(len(lines))
    start = 0
    end = start + num_rows_to_read
    while True:
        for k, v in tech_regexs.items():
            regexs_counter = 0
            temp = []
            for regex in v:
                for line in lines[start:end]:
                    result = re.findall(regex, line)
                    if result:
                        regexs_counter += 1
                        temp.append(result[0])
                        break
            # if num of found regexs is greater than part of all regexs
            if regexs_counter >= math.ceil(len(v) / minimum_regexs):
                result_dict[k] = result_dict.get(k, []) + temp
        # print("------------------------------------")
        if end == len(lines):
            break
        start = start + num_rows_to_move
        end = start + num_rows_to_read
        if end >= len(lines):
            end = len(lines)

with open('TEST3_sum_sliding_window.json', 'w') as convert_file:
    convert_file.write(json.dumps(result_dict))     


### detect techniques in MFT

In [None]:
# final regexs mft
tech_regexs = {
    'T1003.002': [
        r"^.*\bFalse,False,False,False,False,False,Archive\|RecallOnOpen,\b.*$",
        r"^.*\bFalse,False,False,False,False,False,Archive\|NotContentIndexed,\b.*$"
    ],
    'T1053.005': [
        r"^.*\bFalse,False,False,False,False,False,Archive,\b.*$",
        r"^.*\bFalse,False,False,False,False,False,Archive\|RecallOnOpen,\b.*$",
    ],
    'T1569.002': [
        r"^.*\bFalse,True,False,True,True,False,Archive,\b.*$",
        r"^.*\bFalse,False,False,True,False,False,ReadOnly\|Archive,\b.*$",
        r"^.*\bFalse,False,True,True,True,False,Archive,\b.*$",
        r"^.*\bFalse,False,False,False,False,False,ReadOnly\|Archive,\b.*$"
    ],
    'T1082': [
        r"^.*\bFalse,False,False,False,False,False,Archive,\b.*$",
        r"^.*\bTrue,False,False,False,False,False,None,\b.*$",
        r"^.*\bFalse,False,False,False,True,True,Archive,\b.*$",
        r"^.*\bTrue,False,False,True,False,False,None,\b.*$",
        r"^.*\bFalse,False,False,False,False,False,Archive\|RecallOnOpen,\b.*$",
        r"^.*\bFalse,False,False,True,False,False,Archive\|RecallOnOpen,\b.*$",
        r"^.*\bFalse,False,False,True,False,False,Archive,\b.*$"
    ],
    'T1036': [
        r"^.*\bFalse,False,False,False,False,False,Archive\|Temporary\|NotContentIndexed,\b.*$",
        r"^.*\bFalse,False,False,False,False,False,ReadOnly\|Archive\|RecallOnOpen,\b.*$",
        r"^.*\bFalse,False,False,True,False,False,ReadOnly\|Archive\|RecallOnOpen,\b.*$",
        r"^.*\bFalse,False,False,False,False,False,NotContentIndexed,\b.*$",
        r"^.*\bTrue,False,False,True,False,False,Compressed\|NotContentIndexed,\b.*$",
        r"^.*\bFalse,False,False,False,False,False,Archive\|Temporary\|Compressed\|NotContentIndexed,\b.*$",
        r"^.*\bFalse,False,False,False,False,False,Archive\|ReparsePoint,\b.*$",
        r"^.*\bFalse,False,False,True,True,False,Archive\|RecallOnOpen,\b.*$",
        r"^.*\bFalse,True,False,False,False,False,System\|Archive\|SparseFile\|ReparsePoint\|NotContentIndexed\|RecallOnOpen,\b.*$",
        r"^.*\bFalse,False,False,False,True,True,Archive\|NotContentIndexed\|RecallOnOpen,\b.*$",
        r"^.*\bFalse,False,True,False,False,False,System\|Archive\|SparseFile\|ReparsePoint\|NotContentIndexed\|RecallOnOpen,\b.*$",
        r"^.*\bTrue,False,False,True,False,False,System\|NotContentIndexed,\b.*$",
    ],
    'T1090.001': [
        r"^.*\bFalse,False,False,False,False,False,Archive,\b.*$",
        r"^.*\bTrue,False,False,False,False,False,None,\b.*$",
        r"^.*\bFalse,False,False,False,True,True,Archive,\b.*$",
        r"^.*\bFalse,False,False,False,False,False,Archive\|RecallOnOpen,\b.*$",
        r"^.*\bFalse,True,False,False,False,False,Archive\|SparseFile\|ReparsePoint\|RecallOnOpen,\b.*$",
        r"^.*\bFalse,False,True,False,False,False,Archive\|SparseFile\|ReparsePoint\|RecallOnOpen,\b.*$",
    ],
    'T1078.001': [
        r"^.*\bFalse,False,False,False,False,False,Archive,\b.*$",
        r"^.*\bTrue,False,False,False,False,False,None,\b.*$",
        r"^.*\bFalse,False,False,False,True,True,Archive,\b.*$",
        r"^.*\bTrue,False,False,True,False,False,None,\b.*$",
        r"^.*\bFalse,False,False,False,False,False,Archive\|RecallOnOpen,\b.*$",
        r"^.*\bFalse,False,False,True,False,False,Archive\|RecallOnOpen,\b.*$"
    ],
    'T1112-1': [
        r"^.*\bFalse,False,False,False,False,False,Archive,\b.*$",
        r"^.*\bTrue,False,False,False,False,False,None,\b.*$",
        r"^.*\bFalse,False,False,False,True,True,Archive,\b.*$",
        r"^.*\bTrue,False,False,True,False,False,None,\b.*$",
        r"^.*\bFalse,False,False,False,False,False,Archive\|RecallOnOpen,\b.*$",
        r"^.*\bFalse,False,False,True,False,False,Archive\|RecallOnOpen,\b.*$"
    ],
    'T1562': [
        r"^.*\bFalse,False,False,False,False,False,Archive,\b.*$",
        r"^.*\bTrue,False,False,False,False,False,None,\b.*$",
        r"^.*\bFalse,False,False,False,True,True,Archive,\b.*$",
        r"^.*\bTrue,False,False,True,False,False,None,\b.*$",
        r"^.*\bFalse,False,False,False,False,False,Archive\|RecallOnOpen,\b.*$",
        r"^.*\bFalse,False,False,True,False,False,Archive\|RecallOnOpen,\b.*$",
        r"^.*\bFalse,False,False,True,False,True,Archive,\b.*$"
    ],
    'T1112-4': [
        r"^.*\bFalse,False,False,False,False,False,Archive,\b.*$",
        r"^.*\bTrue,False,False,False,False,False,None,\b.*$",
        r"^.*\bFalse,False,False,False,True,True,Archive,\b.*$",
        r"^.*\bTrue,False,False,True,False,False,None,\b.*$",
        r"^.*\bFalse,False,False,False,False,False,Archive\|RecallOnOpen,\b.*$",
        r"^.*\bFalse,False,False,True,False,False,Archive\|RecallOnOpen,\b.*$",
        r"^.*\bFalse,False,False,True,False,False,Archive,\b.*$"
    ]
}

data = pd.read_csv("../csv_artifacts/TEST4/MFT/MFT_output_TEST4.csv")

num_rows_to_read = 70
num_rows_to_move = 60
minimum_regexs = 1
result_dict = dict()

with open("../csv_artifacts/TEST4/MFT/MFT_output_TEST4.csv", 'r', newline='') as file:
    lines = file.read().splitlines()
    lines.pop(0)
    print(len(lines))
    start = 0
    end = start + num_rows_to_read
    while True:
        for k, v in tech_regexs.items():
            regexs_counter = 0
            temp = []
            for regex in v:
                for line in lines[start:end]:
                    result = re.findall(regex, line)
                    if result:
                        regexs_counter += 1
                        temp.append(result[0])
                        break
            # if num of found regexs is greater than part of all regexs
            if regexs_counter >= math.ceil(len(v) / minimum_regexs):
                result_dict[k] = result_dict.get(k, []) + temp
        # print("------------------------------------")
        if end == len(lines):
            break
        start = start + num_rows_to_move
        end = start + num_rows_to_read
        if end >= len(lines):
            end = len(lines)

with open('TEST4_sum_sliding_window_mft.json', 'w') as convert_file:
    convert_file.write(json.dumps(result_dict))     

### detect techniques in Journal

In [None]:
# final regexs journal
tech_regexs = {
    'T1003.002': [
        r"^.*\bDataOverwrite\|DataExtend\|FileCreate\|Close,Archive,\b.*$",
        r"^.*\bDataOverwrite\|DataExtend\|FileCreate,Archive,\b.*$",
    ],
    'T1053.005': [
        r"^.*\bSecurityChange,Archive,\b.*$",
        r"^.*\bSecurityChange\|Close,Archive,\b.*$",
    ],
    'T1569.002': [
        r"^.*\bDataExtend\|DataTruncation,Archive\|NotContentIndexed,\b.*$",
        r"^.*\bDataExtend\|DataTruncation\|Close,Archive\|NotContentIndexed,\b.*$",
        r"^.*\bDataTruncation,Archive\|NotContentIndexed,\b.*$",
        r"^.*\bFileCreate\|Close,Archive,\b.*$",
    ],
    'T1082': [
        r"^.*\bDataExtend,Archive\|Compressed,\b.*$",
        r"^.*\bDataOverwrite,Archive\|SparseFile\|NotContentIndexed,\b.*$",
        r"^.*\bDataExtend,Normal,\b.*$",
        r"^.*\bDataExtend\|FileCreate\|IndexableChange\|BasicInfoChange\|Close,NotContentIndexed,\b.*$",
        r"^.*\bDataExtend\|Close,Normal,\b.*$",
        r"^.*\bDataOverwrite\|DataExtend\|FileCreate\|SecurityChange,Archive\|NotContentIndexed,\b.*$",
        r"^.*\bDataOverwrite\|DataExtend\|DataTruncation\|Close,Archive,\b.*$",
    ],
    'T1036': [
        r"^.*\bDataExtend\|FileCreate\|EaChange\|SecurityChange,Archive\|NotContentIndexed,\b.*$",
        r"^.*\bDataOverwrite\|DataExtend\|FileCreate\|EaChange\|SecurityChange,Archive\|NotContentIndexed,\b.*$",
        r"^.*\bDataOverwrite\|DataExtend\|FileCreate\|EaChange\|SecurityChange\|BasicInfoChange\|Close,Archive\|NotContentIndexed,\b.*$",
        r"^.*\bDataOverwrite\|DataExtend\|FileCreate\|EaChange\|SecurityChange\|BasicInfoChange,Archive\|NotContentIndexed,\b.*$",
        r"^.*\bFileCreate\|EaChange,Archive\|NotContentIndexed,\b.*$",
        r"^.*\bFileCreate\|EaChange\|SecurityChange,Archive\|NotContentIndexed,\b.*$",
    ],
    'T1090.001': [
        r"^.*\bDataExtend\|FileCreate,Archive,\b.*$",
        r"^.*\bFileCreate,Archive,\b.*$",
        r"^.*\bDataExtend\|FileCreate\|Close,Archive,\b.*$",
        r"^.*\bFileDelete\|Close,Archive,\b.*$",
        r"^.*\bDataExtend\|DataTruncation\|Close,Archive,\b.*$",
        r"^.*\bDataTruncation,Archive,\b.*$",
        r"^.*\bDataExtend\|DataTruncation,Archive,\b.*$",
    ],
    'T1078.001': [
        r"^.*\bDataOverwrite\|SecurityChange,Hidden\|System\|Archive,\b.*$",
        r"^.*\bDataOverwrite\|FileDelete\|Close,Archive,\b.*$",
    ],
    'T1112-1': [
        r"^.*\bFileCreate\|FileDelete\|Close,Hidden\|Archive,\b.*$",
        r"^.*\bDataTruncation,Hidden\|System\|Archive\|NotContentIndexed,\b.*$",
        r"^.*\bDataExtend\|FileCreate\|RenameNewName,Archive,\b.*$",
        r"^.*\bDataExtend\|FileCreate\|RenameNewName\|Close,Archive,\b.*$",
        r"^.*\bDataExtend\|FileCreate\|RenameOldName,Archive,\b.*$",
        r"^.*\bDataOverwrite\|DataExtend\|FileCreate\|SecurityChange\|BasicInfoChange\|Close,Archive,\b.*$",
        r"^.*\bDataOverwrite\|DataExtend\|FileCreate\|SecurityChange\|BasicInfoChange,Archive,\b.*$",
        r"^.*\bDataOverwrite\|DataExtend\|FileCreate\|SecurityChange,Archive,\b.*$",
    ],
    'T1562': [
        r"^.*\bFileCreate\|EaChange,Archive,\b.*$",
        r"^.*\bFileCreate\|EaChange\|SecurityChange,Archive,\b.*$",
        r"^.*\bRenameOldName,Archive\|SparseFile\|ReparsePoint,\b.*$",
        r"^.*\bRenameNewName,Archive\|SparseFile|ReparsePoint,\b.*$",
        r"^.*\bFileDelete\|RenameNewName\|Close,Archive\|SparseFile\|ReparsePoint,\b.*$",
        r"^.*\bDataExtend\|IndexableChange\|BasicInfoChange,Normal,\b.*$",
        r"^.*\bDataExtend\|FileCreate\|RenameOldName\|IndexableChange\|BasicInfoChange,NotContentIndexed,\b.*$",
        r"^.*\bDataExtend\|FileCreate\|RenameNewName\|IndexableChange\|BasicInfoChange,NotContentIndexed,\b.*$",
        r"^.*\bDataExtend\|FileCreate\|EaChange\|SecurityChange,Archive,\b.*$",
    ],
    'T1112-4': [
        r"^.*\bDataExtend\|FileCreate,Archive,\b.*$",
        r"^.*\bFileCreate,Archive,\b.*$",
        r"^.*\bDataExtend\|FileCreate\|Close,Archive,\b.*$",
        r"^.*\bFileDelete\|Close,Archive,\b.*$",
        r"^.*\bDataExtend\|DataTruncation\|Close,Archive,\b.*$",
        r"^.*\bDataTruncation,Archive,\b.*$",
        r"^.*\bDataExtend\|DataTruncation,Archive,\b.*$",
    ]
}

data = pd.read_csv("../csv_artifacts/TEST4/Journal/Journal_output_TEST4.csv")
sorted = data.sort_values(by=['UpdateSequenceNumber'])
sorted.to_csv('../csv_artifacts/TEST4/Journal/Journal_output_TEST4_sorted.csv')

num_rows_to_read = 70
num_rows_to_move = 60
minimum_regexs = 1
result_dict = dict()

with open("../csv_artifacts/TEST4/Journal/Journal_output_TEST4_sorted.csv", 'r', newline='') as file:
    lines = file.read().splitlines()
    lines.pop(0)
    print(len(lines))
    start = 0
    end = start + num_rows_to_read
    while True:
        for k, v in tech_regexs.items():
            regexs_counter = 0
            temp = []
            for regex in v:
                for line in lines[start:end]:
                    result = re.findall(regex, line)
                    if result:
                        regexs_counter += 1
                        temp.append(result[0])
                        break
            # if num of found regexs is greater than part of all regexs
            if regexs_counter >= math.ceil(len(v) / minimum_regexs):
                result_dict[k] = result_dict.get(k, []) + temp
        # print("------------------------------------")
        if end == len(lines):
            break
        start = start + num_rows_to_move
        end = start + num_rows_to_read
        if end >= len(lines):
            end = len(lines)

with open('TEST4_sum_sliding_window_journal.json', 'w') as convert_file:
    convert_file.write(json.dumps(result_dict))     