In [1]:
%load_ext autoreload
%autoreload 2
%matplotlib

Using matplotlib backend: agg


In [2]:
import warnings
warnings.filterwarnings('ignore')

# Imports

In [3]:
import time

In [4]:
from waad.heuristics.H2.machines_clustering import H2SpecificClustering
from waad.heuristics.H2.machines_processing import MachinesProcessing

from waad.utils.anomalous_asset import ComputeAnomalousAssets
from waad.utils.asset import Machine
from waad.utils.clustering import LongestCommonSubstringClustering, PerDomainAssetClustering
from waad.utils.fait_notable import ComputeFaitNotablesFromH2SpecificClustering, ComputeFaitNotablesFromIndicators
from waad.utils.indicators import ComputeIndicators, Indicators
from waad.utils.rule import Link, Probability, Relation, Rule
from waad.utils.postgreSQL_utils import Database, Table

### Variables

In [5]:
faits_notables = []

### 0.1. Load machine names

In [6]:
HOST = '127.0.0.1' 
PORT = '5432'
USER = ''   # To fill
PASSWORD = ''   # To fill
DB_NAME = ''   # To fill
TABLE_NAME = ''  # To fill

In [7]:
db = Database(host=HOST, port=PORT, user=USER, password=PASSWORD, db_name=DB_NAME)
table = Table(db, table_name=TABLE_NAME)

In [8]:
lwh = table.get_command(f"SELECT DISTINCT logontype, workstationname, host FROM {table.table_name} WHERE logontype is not NULL and workstationname <> '';")

In [9]:
mp = MachinesProcessing(workstationname=set(lwh['workstationname'].values), host=set(lwh['host'].values))
mp.run()

### 1.1

In [10]:
rule = Rule(
    relation=Relation(link=Link.SE_CONNECTE_SUR, probability=Probability.CERTAIN),
    conditions=[
        {
            'pre_filters': {'eventid': 4624},
            'filter_function': lambda row: row['workstationname'] != '?' and row['host'] != '?' and row['workstationname'] != row['host'],
            'asset_1': lambda row: Machine(name=row['workstationname']),
            'asset_2': lambda row: Machine(name=row['host'].split('.')[0], domain=row['host'].split('.')[1]),
        },
    ]
)

In [None]:
start = time.time()
ci = ComputeIndicators(table=table, rule=rule, indicator_objects=[Indicators.NB_AUTHENTICATIONS.value, Indicators.NB_ASSETS_REACHED.value, Indicators.NB_NEW_ASSETS_REACHED.value])
ci.run()
print(time.time() - start)

### 1.1.2

Look for anomalous machines

In [None]:
cfnfi = ComputeFaitNotablesFromIndicators(ci.indicators)
cfnfi.run()
faits_notables.extend(cfnfi.faits_notables)

### 1.2.1

In [None]:
ac = LongestCommonSubstringClustering(list(set().union([m.name for m in mp.workstations], [m.name for m in mp.hosts])))
ac.run()
ac.plot_clusters()

In [None]:
pdac = PerDomainAssetClustering(list(set().union(mp.workstations, mp.hosts)))
res = pdac.run()

In [None]:
pdac.get_domains_summary()

In [None]:
pdac.plot_clusters(firsts_n = 3)

### 1.2.2

In [None]:
h2sc = H2SpecificClustering(lwh[['logontype', 'workstationname']], mp.workstations, mp.hosts)
h2sc.run()
h2sc.plot_clusters()

### 1.2.3

In [18]:
cfnfh2sc = ComputeFaitNotablesFromH2SpecificClustering(h2sc.clusters, table)
cfnfh2sc.run()
faits_notables.extend(cfnfh2sc.faits_notables)

# 2

In [19]:
caa = ComputeAnomalousAssets(faits_notables)
caa.run()

In [None]:
caa.get_summary().head(20)

In [None]:
for aa in caa.anomalous_assets[:4]:
    aa.display();