In [1]:
%load_ext autoreload
%autoreload 2
%matplotlib

Using matplotlib backend: agg


In [2]:
import warnings
warnings.filterwarnings('ignore')

# Imports

In [3]:
import pandas as pd
import time

In [4]:
pd.set_option('display.max_columns', 500)
pd.set_option('display.max_colwidth', 150)
pd.set_option('display.max_rows', 500)

In [5]:
from waad.utils.anomalous_asset import ComputeAnomalousAssets
from waad.utils.asset import Account, Asset, IP, Machine
from waad.utils.fait_notable import ComputeFaitNotablesFromIndicators
from waad.utils.indicators import Indicators, ComputeIndicators
from waad.utils.postgreSQL_utils import Database, Table
from waad.utils.rule import Link, Probability, Relation, Rule

## 0. Initialisation de la base de données

In [6]:
HOST = '127.0.0.1' 
PORT = '5432'
USER = ''   # To fill
PASSWORD = ''   # To fill
DB_NAME = ''   # To fill
TABLE_NAME = ''  # To fill

In [7]:
db = Database(host=HOST, port=PORT, user=USER, password=PASSWORD, db_name=DB_NAME)
table = Table(db, TABLE_NAME)

## 1.1 Définition des règles permettant de compter les bons éléments dans les indicateurs

Les règles sont des objects de la librairie qui définissent des relations entre 2 assets sous certaines conditions. Pour chaque ligne d'authentifications de l'asset source, on vérifie si la règle est appliquée. Elle l'est si l'une des `conditions` au moins est appliquée (`ou` logique). Les conditions sont définies sous la forme de dictionnaires avec une structure de ce type :

```
{
    'pre_filters' : {'field_i': <possible values>, 'field_j': <possible values>},
    'filter_function': <function(row) -> bool>,
    'asset_1': <function(row) -> Asset>,
    'asset_2': <function(row) -> Asset>,
}
```

Les conditions dans `pre_filters` et `filter_functions` sont des conditions `et`.

In [8]:
rule = Rule(
    relation=Relation(link=Link.SE_CONNECTE_SUR, probability=Probability.CERTAIN),
    conditions=[
        {
            'pre_filters': {'eventid': 4624},
            'filter_function': lambda row: row['targetusersid'].startswith('S-1-5-21-') and row['host'] != '?',
            'asset_1': lambda row: Account(sid=row['targetusersid']),
            'asset_2': lambda row: Machine(name=row['host'].split('.')[0], domain=row['host'].split('.')[1]),
        }
    ]
)

## 1.2 Calcul des indicateurs à partir de la ``Rule``

In [None]:
start = time.time()
ci = ComputeIndicators(table=table, rule=rule, indicator_objects=[Indicators.NB_AUTHENTICATIONS.value, Indicators.NB_ASSETS_REACHED.value, Indicators.NB_NEW_ASSETS_REACHED.value, Indicators.NB_PRIVILEGES_GRANTED.value])
ci.run()
print(time.time() - start)

## 1.3 Calcul des FaitsNotables associés

In [None]:
cfnfi = ComputeFaitNotablesFromIndicators(ci.indicators)
cfnfi.run()

## 2. Calcul des AnomalousAssets

Calcule et ordonne les AnomalousAssets à partir de tous les faits notables 

In [11]:
caa = ComputeAnomalousAssets(cfnfi.faits_notables)
caa.run()

In [None]:
caa.get_summary().head(30)

In [None]:
for aa in caa.anomalous_assets[:6]:
    aa.display()

## Exemple d'inputs pour étudier les IPs privées (H1)

In [14]:
rule = Rule(
    relation=Relation(link=Link.SE_CONNECTE_SUR, probability=Probability.CERTAIN),
    conditions=[
        {
            'pre_filters': {'eventid': 4624},
            'filter_function': lambda row: row['ipaddress'] != '?' and row['host'] != '?',
            'asset_1': lambda row: IP(row['ipaddress']),
            'asset_2': lambda row: Machine(name=row['host'].split('.')[0], domain=row['host'].split('.')[1]),
        }
    ]
)

## Exemple d'inputs pour étudier les workstations (H2)

In [16]:
rule = Rule(
    relation=Relation(link=Link.SE_CONNECTE_SUR, probability=Probability.CERTAIN),
    conditions=[
        {
            'pre_filters': {'eventid': 4624},
            'filter_function': lambda row: row['workstationname'] != '?' and row['host'] != '?' and row['workstationname'] != row['host'],
            'asset_1': lambda row: Machine(name=row['workstationname']),
            'asset_2': lambda row: Machine(name=row['host'].split('.')[0], domain=row['host'].split('.')[1]),
        },
    ]
)

## Exemple d'inputs pour étudier les authentifications potentielles des comptes (H7)

In [18]:
rule = Rule(
    relation=Relation(link=Link.SE_CONNECTE_SUR, probability=Probability.PROBABLE),
    conditions=[
        {
            'pre_filters': {'eventid': 4624},
            'filter_function': lambda row: row['targetusersid'].startswith('S-1-5-21-') and row['host'] != '?',
            'asset_1': lambda row: Account(sid=row['targetusersid']),
            'asset_2': lambda row: Machine(name=row['host'].split('.')[0], domain=row['host'].split('.')[1]),
        },
        {
            'pre_filters': {'eventid': 4672},
            'filter_function': lambda row: row['subjectusersid'].startswith('S-1-5-21-') and row['host'] != '?',
            'asset_1': lambda row: Account(sid=row['subjectusersid']),
            'asset_2': lambda row: Machine(name=row['host'].split('.')[0], domain=row['host'].split('.')[1]),
        }
    ]
)