Main source of the dataset files: https://github.com/splunk/attack_data/tree/master/datasets/attack_techniques/T1558.003

In [1]:
# Library Imports
import pandas as pd
import xml.etree.ElementTree as ET

In [6]:
file_name = 'windows-xml.log'  # This is only one of the files loaded for inspection

# Read the file and display it
#data = pd.read_csv(file_name, sep="\t", header=None)
#data

data = pd.read_csv(file_name, header=None, names=["log_entry"])
data.head()

Unnamed: 0,log_entry
0,<Event xmlns='http://schemas.microsoft.com/win...
1,<Event xmlns='http://schemas.microsoft.com/win...
2,<Event xmlns='http://schemas.microsoft.com/win...
3,<Event xmlns='http://schemas.microsoft.com/win...
4,<Event xmlns='http://schemas.microsoft.com/win...


The uploaded file 'windows-xml.log' contains log information of unusual number of requested Kerberos service tickets.

In [10]:
# Parse each XML log entry
def parse_event(xml_event):
    # Remove the namespace
    xml_event = xml_event.replace(" xmlns='http://schemas.microsoft.com/win/2004/08/events/event'", "")
    event = ET.fromstring(xml_event)

    # Extract fields of interest
    event_id = event.findtext('.//EventID')
    time_created = event.find('.//TimeCreated').attrib.get('SystemTime')
    computer = event.findtext('.//Computer')
    target_user = event.find(".//Data[@Name='TargetUserName']").text
    target_domain = event.find(".//Data[@Name='TargetDomainName']").text
    service_name = event.find(".//Data[@Name='ServiceName']").text
    ip_address = event.find(".//Data[@Name='IpAddress']").text
    ip_port = event.find(".//Data[@Name='IpPort']").text

    # Return a dictionary of the extracted data
    return {
        'EventID': event_id,
        'TimeCreated': time_created,
        'Computer': computer,
        'TargetUserName': target_user,
        'TargetDomainName': target_domain,
        'ServiceName': service_name,
        'IpAddress': ip_address,
        'IpPort': ip_port
    }

# Parse each log entry and store the results in a list of dictionaries
parsed_logs = [parse_event(log) for log in data["log_entry"]]

# Create a DataFrame from the parsed logs
df = pd.DataFrame(parsed_logs)

# Display the DataFrame
df

Unnamed: 0,EventID,TimeCreated,Computer,TargetUserName,TargetDomainName,ServiceName,IpAddress,IpPort
0,4769,2024-03-04T06:53:49.273165500Z,ar-win-dc.attackrange.local,AR-WIN-2$@ATTACKRANGE.LOCAL,ATTACKRANGE.LOCAL,krbtgt,::ffff:10.0.1.15,55795
1,4769,2024-03-04T04:51:59.167662600Z,ar-win-dc.attackrange.local,AR-WIN-DC$@ATTACKRANGE.LOCAL,ATTACKRANGE.LOCAL,krbtgt1,::1,0
2,4769,2024-03-04T04:25:06.207288400Z,ar-win-dc.attackrange.local,AR-WIN-DC$@ATTACKRANGE.LOCAL,ATTACKRANGE.LOCAL,krbtgt2,::1,0
3,4769,2024-03-04T04:24:07.805715800Z,ar-win-dc.attackrange.local,AR-WIN-2$@ATTACKRANGE.LOCAL,ATTACKRANGE.LOCAL,krbtgt3,::ffff:10.0.1.15,55224
4,4769,2024-03-03T21:22:44.188691400Z,ar-win-dc.attackrange.local,AR-WIN-2$@ATTACKRANGE.LOCAL,ATTACKRANGE.LOCAL,krbtgt4,::ffff:10.0.1.15,53609
...,...,...,...,...,...,...,...,...
154,4769,2024-02-29T14:52:58.473931900Z,ar-win-dc.attackrange.local,AR-WIN-DC$@ATTACKRANGE.LOCAL,ATTACKRANGE.LOCAL,krbtgt,::1,0
155,4769,2024-02-29T05:23:58.356720200Z,ar-win-dc.attackrange.local,AR-WIN-2$@ATTACKRANGE.LOCAL,ATTACKRANGE.LOCAL,krbtgt,::ffff:10.0.1.15,49749
156,4769,2024-02-29T05:23:56.911963600Z,ar-win-dc.attackrange.local,AR-WIN-2$@ATTACKRANGE.LOCAL,ATTACKRANGE.LOCAL,krbtgt,::ffff:10.0.1.15,49741
157,4769,2024-02-29T05:22:45.114780000Z,ar-win-dc.attackrange.local,AR-WIN-DC$@ATTACKRANGE.LOCAL,ATTACKRANGE.LOCAL,krbtgt,::1,0


In Windows security logs, event **ID 4769** is generated every time the Key Distribution Center (KDC) receives a Kerberos Ticket Granting Service (TGS) ticket request.