In [16]:
pip install python-evtx



In [17]:
# Library Imports
import pandas as pd
from Evtx.Evtx import Evtx
import xml.etree.ElementTree as ET

In Windows security logs, event **ID 4769** is generated every time the Key Distribution Center (KDC) receives a Kerberos Ticket Granting Service (TGS) ticket request.

Next file is obtained from: https://github.com/ysnakst/Dataset-for-Kerberoasting/tree/main

In [18]:

file_path = 'All4769Events.evtx'

# To store parsed event information
events = []

# Read and display the XML content of the first few records to inspect the structure
with Evtx(file_path) as log:
    for i, record in enumerate(log.records()):
        print(f"Record {i + 1}:\n{record.xml()}\n")

        # Limit to first few records to avoid large output
        if i >= 4:
            break

Record 1:
<Event xmlns="http://schemas.microsoft.com/win/2004/08/events/event"><System><Provider Name="Microsoft-Windows-Security-Auditing" Guid="{54849625-5478-4994-a5ba-3e3b0328c30d}"></Provider>
<EventID Qualifiers="">4769</EventID>
<Version>0</Version>
<Level>0</Level>
<Task>14337</Task>
<Opcode>0</Opcode>
<Keywords>0x8020000000000000</Keywords>
<TimeCreated SystemTime="2024-03-19 23:08:07.978025"></TimeCreated>
<EventRecordID>8652</EventRecordID>
<Correlation ActivityID="" RelatedActivityID=""></Correlation>
<Execution ProcessID="656" ThreadID="3868"></Execution>
<Channel>Security</Channel>
<Computer>DC.cseclab.test</Computer>
<Security UserID=""></Security>
</System>
<EventData><Data Name="TargetUserName">DC$@CSECLAB.TEST</Data>
<Data Name="TargetDomainName">CSECLAB.TEST</Data>
<Data Name="ServiceName">DC$</Data>
<Data Name="ServiceSid">S-1-5-21-876779242-1562946419-1879789669-1000</Data>
<Data Name="TicketOptions">0x40810000</Data>
<Data Name="TicketEncryptionType">0x00000012</D

In [19]:
# Define the namespace
namespace = {'ns': 'http://schemas.microsoft.com/win/2004/08/events/event'}

events = []

# Read the .evtx file and parse entries
with Evtx(file_path) as log:
    for record in log.records():
        # Parse the XML content
        xml_entry = record.xml()
        event = ET.fromstring(xml_entry)

        # Extract the relevant details by using the namespace
        event_id = event.find('.//ns:EventID', namespace).text if event.find('.//ns:EventID', namespace) is not None else None
        timestamp = event.find('.//ns:TimeCreated', namespace).attrib.get('SystemTime') if event.find('.//ns:TimeCreated', namespace) is not None else None
        target_user = event.find(".//ns:Data[@Name='TargetUserName']", namespace).text if event.find(".//ns:Data[@Name='TargetUserName']", namespace) is not None else None
        service_name = event.find(".//ns:Data[@Name='ServiceName']", namespace).text if event.find(".//ns:Data[@Name='ServiceName']", namespace) is not None else None  # Added ServiceName
        encryption_type = event.find(".//ns:Data[@Name='TicketEncryptionType']", namespace).text if event.find(".//ns:Data[@Name='TicketEncryptionType']", namespace) is not None else None
        ip_address = event.find(".//ns:Data[@Name='IpAddress']", namespace).text if event.find(".//ns:Data[@Name='IpAddress']", namespace) is not None else None
        ticket_options = event.find(".//ns:Data[@Name='TicketOptions']", namespace).text if event.find(".//ns:Data[@Name='TicketOptions']", namespace) is not None else None
        logon_guid = event.find(".//ns:Data[@Name='LogonGuid']", namespace).text if event.find(".//ns:Data[@Name='LogonGuid']", namespace) is not None else None
        status = event.find(".//ns:Data[@Name='Status']", namespace).text if event.find(".//ns:Data[@Name='Status']", namespace) is not None else None

        # Append extracted data to events list
        events.append({
            "EventID": event_id,
            "Timestamp": timestamp,
            "TargetUserName": target_user,
            "ServiceName": service_name,
            "TicketEncryptionType": encryption_type,
            "IpAddress": ip_address,
            "TicketOptions": ticket_options,
            "LogonGuid": logon_guid,
            "Status": status
        })


df1 = pd.DataFrame(events)

df1

Unnamed: 0,EventID,Timestamp,TargetUserName,ServiceName,TicketEncryptionType,IpAddress,TicketOptions,LogonGuid,Status
0,4769,2024-03-19 23:08:07.978025,DC$@CSECLAB.TEST,DC$,0x00000012,::1,0x40810000,{a9536603-17bc-d8c2-9163-880ec9407704},0x00000000
1,4769,2024-03-19 23:08:09.023266,DC$@CSECLAB.TEST,DC$,0x00000012,::1,0x40810000,{ba9ab183-254f-4a0a-cddd-ac53066c0f91},0x00000000
2,4769,2024-03-19 23:08:09.023649,DC$@CSECLAB.TEST,krbtgt,0x00000012,::1,0x60810010,{ba9ab183-254f-4a0a-cddd-ac53066c0f91},0x00000000
3,4769,2024-03-19 23:08:09.274817,DC$@CSECLAB.TEST,DC$,0x00000012,::1,0x40810000,{ba9ab183-254f-4a0a-cddd-ac53066c0f91},0x00000000
4,4769,2024-03-19 23:08:09.414671,DC$@CSECLAB.TEST,DC$,0x00000012,::1,0x40810000,{ba9ab183-254f-4a0a-cddd-ac53066c0f91},0x00000000
...,...,...,...,...,...,...,...,...,...
5979,4769,2024-04-09 21:37:50.402313,DC$@CSECLAB.TEST,DC$,0x00000012,::1,0x40810000,{3a3f3d7a-59dc-0e96-5d75-34bb1e223284},0x00000000
5980,4769,2024-04-09 21:37:50.436304,DC$@CSECLAB.TEST,DC$,0x00000012,::1,0x40810000,{3a3f3d7a-59dc-0e96-5d75-34bb1e223284},0x00000000
5981,4769,2024-04-09 21:41:34.573837,DC$@CSECLAB.TEST,DC$,0x00000012,::1,0x40810000,{c36f4d46-6f0e-eb6c-b7db-c3087ac99dc6},0x00000000
5982,4769,2024-04-09 21:46:45.458099,DC$@CSECLAB.TEST,DC$,0x00000012,::1,0x40800000,{bd10911a-ecd2-31b9-b060-70aaffb762c1},0x00000000


In [32]:
# Convert 'Timestamp' to datetime format
df1['Timestamp'] = pd.to_datetime(df1['Timestamp'])

#Filter out computer accounts (Service names ending with '$')
df1 = df1[~df1['ServiceName'].str.endswith('$', na=False)]

# Aggregate by hourly intervals
df1['Hour'] = df1['Timestamp'].dt.floor('h')  # Truncate to the hour

# Group by 'Hour', 'TargetUserName', and 'IpAddress'
# Calculate distinct service requests, total requests, and weak encryption requests
aggregated_df = (
    df1.groupby(['Hour', 'TargetUserName', 'IpAddress'])
    .agg(
        distinct_service_requests=('ServiceName', 'nunique'),  # Count of distinct services
        total_requests=('EventID', 'count'),  # Total number of requests
        weak_encryption_requests=('TicketEncryptionType', lambda x: (x == '0x00000012').sum())  # Count of weak encryption
    )
    .reset_index()
)

# Add a feature for distinct IP count per user per hour
distinct_ips_per_user = (
    df1.groupby(['Hour', 'TargetUserName'])
    .agg(distinct_ips=('IpAddress', 'nunique'))
    .reset_index()
)

# Merge distinct IP count with the aggregated DataFrame
aggregated_df = aggregated_df.merge(distinct_ips_per_user, on=['Hour', 'TargetUserName'], how='left')

# Add binary weak encryption flag
aggregated_df['weak_encryption_flag'] = aggregated_df['weak_encryption_requests'].apply(lambda x: 1 if x > 0 else 0)

aggregated_df

Unnamed: 0,Hour,TargetUserName,IpAddress,distinct_service_requests,total_requests,weak_encryption_requests,distinct_ips,weak_encryption_flag
0,2024-03-19 23:00:00,azradmin@cseclab.test,::ffff:10.0.0.5,1,2,2,2,1
1,2024-03-19 23:00:00,azradmin@cseclab.test,::ffff:10.0.0.6,1,2,2,2,1
2,2024-03-19 23:00:00,dc$@cseclab.test,::1,1,1,1,1,1
3,2024-03-19 23:00:00,server$@cseclab.test,::ffff:10.0.0.6,1,3,3,1,1
4,2024-03-19 23:00:00,win10$@cseclab.test,::ffff:10.0.0.5,1,3,3,1,1
...,...,...,...,...,...,...,...,...
198,2024-04-09 21:00:00,azradmin@cseclab.test,::ffff:10.0.0.5,1,1,0,1,0
199,2024-04-09 21:00:00,dc$@cseclab.test,::1,1,4,3,1,1
200,2024-04-09 21:00:00,john.smith@cseclab.test,::ffff:10.0.0.7,8,30,4,1,1
201,2024-04-09 21:00:00,server$@cseclab.test,::ffff:10.0.0.6,1,3,3,1,1
