In [1]:
%load_ext autoreload
%autoreload 2
%matplotlib

Using matplotlib backend: agg


In [2]:
import warnings
warnings.filterwarnings('ignore')

# Imports

In [3]:
import time
from tqdm import tqdm

In [4]:
from waad.utils.clustering import PerDomainAssetClustering
from waad.heuristics.H3.select_valid_accounts import SelectValidAccounts, FilterOnSID


from waad.utils.asset import Account, Machine
from waad.utils.anomalous_asset import ComputeAnomalousAssets
from waad.utils.fait_notable import ComputeFaitNotablesFromIndicators
from waad.utils.indicators import ComputeIndicators, Indicators
from waad.utils.postgreSQL_utils import Database, Table
from waad.utils.rule import Link, Probability, Relation, Rule

# Pipeline

### Retrieve accounts from the dataset

In [5]:
HOST = '127.0.0.1'
PORT = '5432'
USER = '' #To fill
PASSWORD = '' #To fill 
DB_NAME = '' #To fill
TABLE_NAME = '' #To fill

In [6]:
db = Database(host=HOST, port=PORT, user=USER, password=PASSWORD, db_name=DB_NAME)
table = Table(db, TABLE_NAME)

### 0.1

In [7]:
data = table.get_command(f"SELECT DISTINCT eventid, subjectusersid, subjectdomainname, subjectusername, targetusersid, targetdomainname, targetusername FROM {table.table_name}")

### 0.2

In [8]:
sva = SelectValidAccounts(data)
sva.run()
valid_accounts = sva.valid_accounts

In [9]:
spva = SelectValidAccounts(data, target_eventid_filter=(4624, 4634, 4648))
spva.run()
potentially_valid_accounts = spva.valid_accounts

### 0.3

In [10]:
fosid = FilterOnSID(valid_accounts)
fosid.run()
non_standard_valid_accounts = fosid.non_standard_accounts

### 1.1.1

In [11]:
rule = Rule(
    relation=Relation(link=Link.SE_CONNECTE_SUR, probability=Probability.PROBABLE),
    conditions=[
        {
            'pre_filters': {'eventid': [4624, 4634, 4648]},
            'filter_function': lambda row: row['targetusersid'].startswith('S-1-5-21-') and row['host'] != '?',
            'asset_1': lambda row: Account(sid=row['targetusersid']),
            'asset_2': lambda row: Machine(name=row['host'].split('.')[0], domain=row['host'].split('.')[1]),
        },
        {
            'pre_filters': {'eventid': 4672},
            'filter_function': lambda row: row['subjectusersid'].startswith('S-1-5-21-') and row['host'] != '?',
            'asset_1': lambda row: Account(sid=row['subjectusersid']),
            'asset_2': lambda row: Machine(name=row['host'].split('.')[0], domain=row['host'].split('.')[1]),
        }
    ]
)

In [None]:
start = time.time()
ci = ComputeIndicators(table=table, rule=rule, indicator_objects=[Indicators.NB_AUTHENTICATIONS.value, Indicators.NB_ASSETS_REACHED.value, Indicators.NB_NEW_ASSETS_REACHED.value, Indicators.NB_PRIVILEGES_GRANTED.value])
ci.run()
print(time.time() - start)

### 1.1.2

In [None]:
cfnfi = ComputeFaitNotablesFromIndicators(ci.indicators)
cfnfi.run()

### 2.1

In [14]:
caa = ComputeAnomalousAssets(cfnfi.faits_notables)
caa.run()

In [None]:
caa.get_summary().head(30)

In [None]:
for aa in caa.anomalous_assets[:10]:
    aa.display()

### 2.2

Mapping per domain of the usernames of all potentially valid accounts 

In [None]:
pdac_potentially_valid = PerDomainAssetClustering([account for account in potentially_valid_accounts if not account.name.endswith('$')])
pdac_potentially_valid.run()
pdac_potentially_valid.plot_clusters(firsts_n=5);

**SubjectUserName / TargetUserName / TargetOutboundUserName**
* if format is XYZ$ then XYZ is a MachineName
* if format is ABC/XYZ then ABC is a DomainName and XYZ is a Username
* if format is ABC@XYZ then ABC is a Username and XYZ is a DomainName
* else it is a Username