## Global parameters and notes

In [1]:
# Global Imports
import pandas as pd
from dotenv import load_dotenv

# Custom Imports
from helpers.data_helper import load_data, load_data_robust, load_true_labels, add_labels_to_dataset, split_data

In [2]:
PATH_DATASET_TO_CLASSIFY = "../AIT_LD-v2/russellmitchell"

#PATH_DATASET_TO_CLASSIFY = "../AIT_LD-v2/harrison"
#PATH_DATASET_TO_CLASSIFY = "../AIT_LD-v2/shaw"    
#PATH_DATASET_TO_CLASSIFY = "../AIT_LD-v2/wheeler" 
#PATH_DATASET_TO_CLASSIFY = "../AIT_LD-v2/wardbeck"
#PATH_DATASET_TO_CLASSIFY = "../AIT_LD-v2/fox"     
#PATH_DATASET_TO_CLASSIFY = "../AIT_LD-v2/wilson"  

### Get events from file: intranet / auth.log
* Trained using santos
* Contributes:
  * Compromised User Account -> IOC and Asset
  * Root Access Events -> Event
  * Which files were accessed -> Event
  * Which commands were executed and in which PWD -> Event
* Trained Classifiers: 
  * RandomForest 
  * GradientBoost 
  * SVM
  * MLP 
* Uses binary classification
* Features are simply booleans depending on presence of keywords

In [3]:
# Step by Step:

# 1. Import log file
# 2. Extract features from log file to be used for classification
# 3. Load trained classifier(s)
# 4. Classify log file
# 5. Keep attack-related logs

#6. TODO: Think about how to correlate them

#7. Upload Iris Report

In [4]:
# 1. Import log file

path_intranet_auth_log = "/gather/intranet_server/logs/auth.log"
df_intranet_auth = load_data_robust(PATH_DATASET_TO_CLASSIFY + path_intranet_auth_log)

#df_intranet_auth.head()

In [5]:
# 2. Extract features from log file to be used for classification

from helpers.intranet_auth_log_helper import extract_features, load_models_from_disk, classify_unseen_data

df_intranet_auth_features = extract_features(df_intranet_auth)

#df_intranet_auth_features.head()

In [6]:
# 3. Load trained classifier(s)

models = load_models_from_disk("trained-models/intranet_auth_log")

Loaded intr_auth_gradientboost from trained-models/intranet_auth_log\intr_auth_gradientboost.joblib
Loaded intr_auth_mlp from trained-models/intranet_auth_log\intr_auth_mlp.joblib
Loaded intr_auth_randomforest from trained-models/intranet_auth_log\intr_auth_randomforest.joblib
Loaded intr_auth_svc from trained-models/intranet_auth_log\intr_auth_svc.joblib


In [7]:
# 4. Classify log file
all_predictions = classify_unseen_data(models, df_intranet_auth_features)

all_predictions


{'intr_auth_gradientboost': array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
        1, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0,
        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
        0, 0, 0, 0, 0, 0, 0, 0]),
 'intr_auth_mlp': array([0, 0, 0, 0, 0, 0, 0, 0, 0,

In [8]:
# 5. Keep attack-related logs

#6. TODO: Think about how to correlate them

#7. Upload Iris Report

### Get suspicious events and info from the dnsmasq logs

* Includes DNS Exfiltration Attack Step