# Imports/Vars/Functions

In [1]:
import copy
import random
from datetime import datetime, timedelta
from importlib.resources import files
from pprint import pprint, pformat
from time import time

from ledhntr import LEDHNTR
from ledhntr.data_classes import(
    Attribute, Entity, Relation, Thing, Query
)

led = LEDHNTR()
tdb = led.plugins['typedb_client']
SCHEMA = str(files('ledhntr').joinpath('schemas/schema.tql'))
nukeit=True

# ; Generators

def gen_ips(num, existing_ips=[]):
    for i in range(num):
        ip = ".".join(str(random.randint(0,255)) for _ in range(4))
        if ip not in existing_ips:
            existing_ips.append(ip)
    if len(existing_ips) >= num:
        return existing_ips
    missing = num-len(existing_ips)
    if missing:
        print(f"Missing {missing} IPs - generating more...")
    existing_ips = gen_ips(missing, existing_ips)
    return existing_ips

def gen_lorem(words, num_sentences=1):
    sentences = []
    for s in range(num_sentences):
        lorem_ipsum_text = (
            "Lorem ipsum dolor sit amet consectetur adipiscing elit sed do eiusmod tempor incididunt ut labore et dolore "
            "magna aliqua Ut enim ad minim veniam quis nostrud exercitation ullamco laboris nisi ut aliquip ex ea commodo "
            "consequat Duis aute irure dolor in reprehenderit in voluptate velit esse cillum dolore eu fugiat nulla pariatur "
            "Excepteur sint occaecat cupidatat non proident sunt in culpa qui officia deserunt mollit anim id est laborum"
        )
        words_list = lorem_ipsum_text.split()
        output_text = ' '.join(random.choice(words_list) for _ in range(words))
        output_text += "."
        sentences.append(f"{output_text}")
    return ' '.join(sentences)

def roll_die(sides=6):
    return random.randint(1,sides)

def gen_hunts(num_hunts=1, num_ips=50, player_conn=True):
    all_things = {'attributes':[], 'entities':[], 'relations':[], }
    for nh in range(num_hunts):
        ips = gen_ips(num_ips)
        now = datetime.now()
        yesterday = now - timedelta(days=1)
        tomorrow = now + timedelta(days=1)

        
        date_discovered = Attribute(label='date-discovered', value=yesterday)
        date_seen_yest = Attribute(label='date-seen', value=yesterday)
        date_seen_today = Attribute(label='date-seen', value=now)
        date_seen_tom = Attribute(label='date-seen', value=tomorrow)
        
        asn = Attribute(label='note', value='TRASH-PANDA-SERVERS')

        ipents = []
        ipattrs = []

        for ip in ips:
            ipattr = Attribute(label='ip-address', value=ip)
            ipent = Entity(label='ip', has=[ipattr, date_discovered, date_seen_yest, date_seen_today, date_seen_tom])
            # ipent = Entity(label='ip', has=[ipattr])
            roll = roll_die(3)
            if roll==3:
                ipent.has.append(asn)
            else:
                randasn = gen_lorem(3).replace(' ', '-').upper()
                randasnattr = Attribute(label='note', value=randasn)
                ipent.has.append(randasnattr)
            ipents.append(ipent)
            ipattrs.append(ipattr)

        nowint = int(time())
        hunt_name = Attribute(label='hunt-name', value=f'HUNT-{nh}-{nowint}')
        hunt_active = Attribute(label='hunt-active', value=True)
        if player_conn:
            hunt = Relation(
                label='hunt', 
                has=[hunt_name, hunt_active], 
                players={'related': ipents}
            )
            hunt.has += ipattrs
        else:
            emptyent = Entity(label='empty-ent')
            hunt = Relation(
                label='hunt', 
                has=[hunt_name, hunt_active], 
                players={'related': emptyent}
            )
            hunt.has += ipattrs
            all_things['entities'].append(emptyent)
        # pprint(hunt)
        # pprint(hunt.players['related'][0].to_dict())
        all_things['relations'].append(hunt)
        all_things['entities']+=(ipents)
    return all_things


def gen_timestamps(count, start_year=2015, end_year=2023):
    start_date = datetime(start_year, 1, 1)
    end_date = datetime(end_year, 12, 31)
    time_diff = end_date - start_date

    # return [(start_date + timedelta(days=random.randint(0, time_diff.days), 
    #                                 seconds=random.randint(0, 86399))).strftime('%Y-%m-%d %H:%M:%S') 
    return [(start_date + timedelta(days=random.randint(0, time_diff.days), 
                                    seconds=random.randint(0, 86399)))
            for _ in range(count)]

def ts_to_ents(timestamps):
    time_ents = []
    for ts in timestamps:
        year = Attribute(label='year', value=ts.year)
        month = Attribute(label='month', value=ts.month)
        day = Attribute(label='day', value=ts.day)
        hour = Attribute(label='hour', value=ts.hour)
        minute = Attribute(label='minute', value=ts.minute)
        second = Attribute(label='second', value=ts.second)
        time_ent = Entity(label='utc', has=[year,month,day,hour,minute,second])
        time_ents.append(time_ent)
    return time_ents

def ts_to_attrs(timestamps):
    attrs = []
    for ts in timestamps:
        attr = Attribute(label='date-seen', value=ts)
        attrs.append(attr)
    return attrs

2024-05-09 12:21:11 [INFO] ledhntr[56692] > core.py > (_reload_all_plugins) [158] >  Loading auto_hunter...
2024-05-09 12:21:11 [INFO] ledhntr[56692] > core.py > (_reload_all_plugins) [167] >  Successfully loaded auto_hunter!
2024-05-09 12:21:11 [INFO] ledhntr[56692] > core.py > (_reload_all_plugins) [158] >  Loading censys...
2024-05-09 12:21:11 [INFO] ledhntr[56692] > core.py > (_reload_all_plugins) [167] >  Successfully loaded censys!
2024-05-09 12:21:11 [INFO] ledhntr[56692] > core.py > (_reload_all_plugins) [158] >  Loading compare_things...
2024-05-09 12:21:11 [INFO] ledhntr[56692] > core.py > (_reload_all_plugins) [167] >  Successfully loaded compare_things!
2024-05-09 12:21:11 [INFO] ledhntr[56692] > core.py > (_reload_all_plugins) [158] >  Loading hyas...
2024-05-09 12:21:11 [INFO] ledhntr[56692] > core.py > (_reload_all_plugins) [167] >  Successfully loaded hyas!
2024-05-09 12:21:11 [INFO] ledhntr[56692] > core.py > (_reload_all_plugins) [158] >  Loading jsonflats_client...
2

# Generate Databases

## Test Datetime Attributes vs Datetime Entities

In [None]:
# DateTime Attributes (current method for TypeDB)
DB_NAME = "DATETIME-ATTRIBUTES"
tdb.db_name=DB_NAME
if nukeit:
    tdb.delete_db(DB_NAME)
if not tdb.check_db(DB_NAME):
    tdb.create_db()
    tdb.write_tql_file(file=SCHEMA)

# FAST-INDEX method
DB_NAME = "FAST-INDEX-TIMES"
tdb.db_name=DB_NAME
if nukeit:
    tdb.delete_db(DB_NAME)
if not tdb.check_db(DB_NAME):
    tdb.create_db()
    tdb.write_tql_file(file=SCHEMA)

# Create hunt
hunts = gen_hunts(1, 1)
attr_hunt = copy.deepcopy(hunts[0])
ents_hunt = copy.deepcopy(hunts[0])

# Generate timestamps
timestamps = gen_timestamps(250, start_year=2021, end_year=2023)

# Generate DateTime Attributes
dtattrs = ts_to_attrs(timestamps)

# Generate FastIndex UTC Entities
dtents = ts_to_ents(timestamps)

# Attach attributes to hunt
attr_hunt.has += dtattrs

# Attach fast-index time entities to hunt
ents_hunt.players['related']+=dtents

# Write hunt to attr database
tdb.db_name = "DATETIME-ATTRIBUTES"
tdb.add_relation(attr_hunt)

# Write hunt to fast database
tdb.db_name = "FAST-INDEX-TIMES"
tdb.add_relation(ents_hunt)

### Test Query Times

In [None]:
so = Relation(label='hunt')

def clock_search(search_object=None, client=None):
    stime = time()
    led.logger.info(f"#### {tdb.db_name} ####")
    led.logger.info(f"START SEARCH {stime}")
    res = tdb.find_things(search_object)
    etime = time()
    led.logger.info(f"END SEARCH {etime}")
    led.logger.info(f"### {tdb.db_name} RESULTS ###")
    led.logger.info(f"### {etime-stime} SECONDS ###")
    return res

tdb.db_name = "DATETIME-ATTRIBUTES"
attr_res = clock_search(so, tdb)

tdb.db_name = "FAST-INDEX-TIMES"
ent_res = clock_search(so, tdb)

tdb.db_name = "DATETIME-ATTRIBUTES"
attr_res = clock_search(so, tdb)

tdb.db_name = "FAST-INDEX-TIMES"
ent_res = clock_search(so, tdb)

### VERDICT
It appears to be significantly slower to query Relations attached to DateTime 
Entities than it does to query Relations attached to DateTime Entities.

This suggests that fewer Relation -> Entity connections via Roles may significantly
speed up the retrieval process. Instead you would connect a Hunt Relation to an
Entity by way of the shared IP-Address attribute, as opposed to defining the 
"Discovered" role.

The downside of this is the Hunt Relation would feasibly never connect to anything
other than attributes in order to make the process as speedy as possible... But
you need something to connect to in order for a Relation to exist in the first place.
Otherwise the whole graph quickly becomes almost entirely Attributes and Entities.

## Test Relation Hunts -> Discover Role -> Entity IPs vs Entity Hunts -> Attribute IP-Addresses

In [2]:
# DateTime Attributes (current method for TypeDB)
DB_NAME = "HUNT-IP-ATTRIBUTES"
tdb.db_name=DB_NAME
if nukeit:
    tdb.delete_db(DB_NAME)
if not tdb.check_db(DB_NAME):
    tdb.create_db()
    tdb.write_tql_file(file=SCHEMA)

# FAST-INDEX method
DB_NAME = "HUNT-IP-ENTITIES"
tdb.db_name=DB_NAME
if nukeit:
    tdb.delete_db(DB_NAME)
if not tdb.check_db(DB_NAME):
    tdb.create_db()
    tdb.write_tql_file(file=SCHEMA)

# Create hunts
hunts_attrs = gen_hunts(5, 50, player_conn=False)
hunts_ents = gen_hunts(5, 50)

print(type(hunts_attrs))


# Write hunt to attr database
tdb.db_name = "HUNT-IP-ATTRIBUTES"
tdb.bulk_add(hunts_attrs)

# Write hunt to fast database
tdb.db_name = "HUNT-IP-ENTITIES"
tdb.bulk_add(hunts_ents)

2024-05-08 15:52:14 [INFO] ledhntr[7964] > typedb_client.py > (delete_db) [1728] >  Deleting HUNT-IP-ATTRIBUTES...
2024-05-08 15:52:14 [INFO] ledhntr[7964] > typedb_client.py > (write_tql_file) [3713] >  Writing TQL file C:\Users\drive\OneDrive\Documents\GitHub\ledhntr-suite-public\ledhntr\ledhntr\schemas\schema.tql...
2024-05-08 15:52:14 [INFO] ledhntr[7964] > typedb_client.py > (delete_db) [1728] >  Deleting HUNT-IP-ENTITIES...
2024-05-08 15:52:15 [INFO] ledhntr[7964] > typedb_client.py > (write_tql_file) [3713] >  Writing TQL file C:\Users\drive\OneDrive\Documents\GitHub\ledhntr-suite-public\ledhntr\ledhntr\schemas\schema.tql...
2024-05-08 15:52:15 [INFO] ledhntr[7964] > typedb_client.py > (bulk_add) [669] >  Processing 755 entities...


<class 'dict'>


2024-05-08 15:52:15 [INFO] ledhntr[7964] > typedb_client.py > (bulk_check) [898] >  Searching for 755 existing things...
2024-05-08 15:52:16 [INFO] ledhntr[7964] > typedb_client.py > (bulk_check) [977] >  thing <Entity(label=empty-ent,has=1) not in remote things!
2024-05-08 15:52:16 [INFO] ledhntr[7964] > typedb_client.py > (bulk_check) [979] >  First time seeing new thing <Entity(label=empty-ent,has=1)! {'abstract': False, 'iid': None, 'inferred': False, '_label': 'empty-ent', 'thingtype': 'entity', 'has': [{'abstract': False, 'iid': None, 'inferred': False, '_label': 'ledid', 'thingtype': 'attribute', '_value': 'empty-ent_1715197935206_e266d4'}], '_ledid': {'abstract': False, 'iid': None, 'inferred': False, '_label': 'ledid', 'thingtype': 'attribute', '_value': 'empty-ent_1715197935206_e266d4'}, 'keyattr': '', 'owns': [], 'relations': [], 'plays': []}
2024-05-08 15:52:16 [INFO] ledhntr[7964] > typedb_client.py > (bulk_check) [977] >  thing <Entity(label=empty-ent,has=1) not in remote

In [8]:
hunts_attrs.keys()

dict_keys(['attributes', 'relations', 'entities'])

### Test Query Times

In [5]:
so = Relation(label='hunt')

def clock_search(search_object=None, client=None, search_mode='full'):
    stime = time()
    led.logger.info(f"#### {tdb.db_name} ####")
    led.logger.info(f"START SEARCH {stime}")
    res = tdb.find_things(search_object, search_mode=search_mode)
    etime = time()
    led.logger.info(f"END SEARCH {etime}")
    led.logger.info(f"### {tdb.db_name} RESULTS ###")
    led.logger.info(f"### {etime-stime} SECONDS ###")
    return res

'''
tdb.db_name = "HUNT-IP-ATTRIBUTES"
attr_res = clock_search(so, tdb)

tdb.db_name = "HUNT-IP-ENTITIES"
ent_res = clock_search(so, tdb)
'''

tdb.db_name = "HUNT-IP-ATTRIBUTES"
led.logger.setLevel
attr_res = clock_search(so, tdb, search_mode='no_backtrace')

tdb.db_name = "HUNT-IP-ENTITIES"
print(f"LITE")
ent_res = clock_search(so, tdb, search_mode='lite')

tdb.db_name = "HUNT-IP-ENTITIES"
print(f"NO-BACKTRACE")
ent_res = clock_search(so, tdb, search_mode='no_backtrace')

2024-05-09 12:23:07 [INFO] ledhntr[56692] > 685427122.py > (clock_search) [5] >  #### HUNT-IP-ATTRIBUTES ####
2024-05-09 12:23:07 [INFO] ledhntr[56692] > 685427122.py > (clock_search) [6] >  START SEARCH 1715271787.1946583
2024-05-09 12:23:07 [INFO] ledhntr[56692] > 685427122.py > (clock_search) [9] >  END SEARCH 1715271787.6373692
2024-05-09 12:23:07 [INFO] ledhntr[56692] > 685427122.py > (clock_search) [10] >  ### HUNT-IP-ATTRIBUTES RESULTS ###
2024-05-09 12:23:07 [INFO] ledhntr[56692] > 685427122.py > (clock_search) [11] >  ### 0.44271087646484375 SECONDS ###
2024-05-09 12:23:07 [INFO] ledhntr[56692] > 685427122.py > (clock_search) [5] >  #### HUNT-IP-ENTITIES ####
2024-05-09 12:23:07 [INFO] ledhntr[56692] > 685427122.py > (clock_search) [6] >  START SEARCH 1715271787.6414468
2024-05-09 12:23:07 [INFO] ledhntr[56692] > typedb_client.py > (check_tx) [1260] >  Session mismatch. Looking for HUNT-IP-ENTITIES got HUNT-IP-ATTRIBUTES! Creating new session.


LITE


2024-05-09 12:23:07 [INFO] ledhntr[56692] > 685427122.py > (clock_search) [9] >  END SEARCH 1715271787.9545968
2024-05-09 12:23:07 [INFO] ledhntr[56692] > 685427122.py > (clock_search) [10] >  ### HUNT-IP-ENTITIES RESULTS ###
2024-05-09 12:23:07 [INFO] ledhntr[56692] > 685427122.py > (clock_search) [11] >  ### 0.31314992904663086 SECONDS ###
2024-05-09 12:23:07 [INFO] ledhntr[56692] > 685427122.py > (clock_search) [5] >  #### HUNT-IP-ENTITIES ####
2024-05-09 12:23:07 [INFO] ledhntr[56692] > 685427122.py > (clock_search) [6] >  START SEARCH 1715271787.9575987


NO-BACKTRACE


2024-05-09 12:23:39 [INFO] ledhntr[56692] > 685427122.py > (clock_search) [9] >  END SEARCH 1715271819.6261542
2024-05-09 12:23:39 [INFO] ledhntr[56692] > 685427122.py > (clock_search) [10] >  ### HUNT-IP-ENTITIES RESULTS ###
2024-05-09 12:23:39 [INFO] ledhntr[56692] > 685427122.py > (clock_search) [11] >  ### 31.66855549812317 SECONDS ###


In [10]:
ent_res[0].players['related'][0].has

[<Attribute(label=date-seen,value=2024-05-07 19:52:15+00:00),
 <Attribute(label=date-seen,value=2024-05-08 19:52:15+00:00),
 <Attribute(label=date-seen,value=2024-05-09 19:52:15+00:00),
 <Attribute(label=note,value=AUTE-EX-ESSE.),
 <Attribute(label=note,value=FUGIAT-EU-SED.),
 <Attribute(label=note,value=FUGIAT-MINIM-ID.),
 <Attribute(label=note,value=MOLLIT-INCIDIDUNT-ID.),
 <Attribute(label=note,value=PARIATUR-IPSUM-VOLUPTATE.),
 <Attribute(label=ledid,value=ip_1715197935224_c4516a),
 <Attribute(label=date-discovered,value=2024-05-07 19:52:15+00:00),
 <Attribute(label=date-discovered,value=2024-05-08 23:54:24+00:00),
 <Attribute(label=ip-address,value=171.146.35.244),
 <Attribute(label=confidence)]

### VERDICT

Welp... That's pretty MF'ing definitive.

Relations with a boatload of Entities or Relations attached cause everything to
slow down significantly.

So focus on more Attributes, less of everything else.

**HUNT**
    - Hunt-1
    - String
    - Dates
    - IP Addys
**IP**
    - Hunt names
    - Dates
    - IP Addy @key
**SERVICE**
    - Hunt names
    - Dates
    - IP Addy
    - Banners
    - Proto
    - Port

...This is gonna be a shitshow to refactor, but at least we finally know how to 
speed it up!

In [9]:

tdb.db_name = "HUNT-IP-ENTITIES"
# attr_res = clock_search(so, tdb)
res = tdb.find_things(so, search_mode='lite')

2024-05-08 16:10:46 [INFO] ledhntr[7964] > typedb_client.py > (check_tx) [1260] >  Session mismatch. Looking for HUNT-IP-ENTITIES got HUNT-IP-ATTRIBUTES! Creating new session.


In [13]:
res[0]

<Relation(label=hunt,con=0.0,iid=0x847080088000000000000000,hunt-name=HUNT-4-1715197935,has=505)

In [16]:
so2 = Entity(label='ip', has=[Attribute(label='ip-address', value='78.0.58.7')])
r2 = tdb.find_things(so2)
pprint(r2)

[<Entity(label=ip,con=0.0,iid=0x826e80028000000000000089,ip-address=78.0.58.7,has=12)]


In [19]:
pprint(r2[0])
pprint(r2[0].has)

<Entity(label=ip,con=0.0,iid=0x826e80028000000000000089,ip-address=78.0.58.7,has=12)
[<Attribute(label=date-seen,value=2024-05-07 19:52:15+00:00),
 <Attribute(label=date-seen,value=2024-05-08 19:52:15+00:00),
 <Attribute(label=date-seen,value=2024-05-09 19:52:15+00:00),
 <Attribute(label=note,value=UT-EU-QUIS.),
 <Attribute(label=note,value=EU-UT-NULLA.),
 <Attribute(label=note,value=AUTE-LABORE-EU.),
 <Attribute(label=note,value=TRASH-PANDA-SERVERS),
 <Attribute(label=ledid,value=ip_1715197935226_6fac76),
 <Attribute(label=date-discovered,value=2024-05-07 19:52:15+00:00),
 <Attribute(label=date-discovered,value=2024-05-08 23:54:24+00:00),
 <Attribute(label=ip-address,value=78.0.58.7),
 <Attribute(label=confidence)]


In [20]:
so3 = Entity(label='ip')
tdb.db_name = "HUNT-IP-ATTRIBUTES"
r3 = tdb.find_things(so3)
pprint(r3)

2024-05-08 16:13:35 [INFO] ledhntr[7964] > typedb_client.py > (check_tx) [1260] >  Session mismatch. Looking for HUNT-IP-ATTRIBUTES got HUNT-IP-ENTITIES! Creating new session.


[<Entity(label=ip,con=0.0,iid=0x826e80028000000000000000,ip-address=171.146.35.244,has=13),
 <Entity(label=ip,con=0.0,iid=0x826e80028000000000000001,ip-address=127.18.58.62,has=11),
 <Entity(label=ip,con=0.0,iid=0x826e80028000000000000002,ip-address=69.104.255.91,has=11),
 <Entity(label=ip,con=0.0,iid=0x826e80028000000000000003,ip-address=48.94.81.158,has=12),
 <Entity(label=ip,con=0.0,iid=0x826e80028000000000000004,ip-address=61.72.108.138,has=12),
 <Entity(label=ip,con=0.0,iid=0x826e80028000000000000005,ip-address=243.69.49.179,has=12),
 <Entity(label=ip,con=0.0,iid=0x826e80028000000000000006,ip-address=158.168.180.86,has=12),
 <Entity(label=ip,con=0.0,iid=0x826e80028000000000000007,ip-address=255.184.146.16,has=12),
 <Entity(label=ip,con=0.0,iid=0x826e80028000000000000008,ip-address=49.230.153.210,has=11),
 <Entity(label=ip,con=0.0,iid=0x826e80028000000000000009,ip-address=136.125.90.233,has=13),
 <Entity(label=ip,con=0.0,iid=0x826e8002800000000000000a,ip-address=12.247.191.30,has=1

In [22]:
r4 = tdb.find_things(so)
pprint(r4)

[<Relation(label=hunt,con=0.0,iid=0x847080088000000000000000,hunt-name=HUNT-3-1715197935,has=205,active_roles=1,total_players=1),
 <Relation(label=hunt,con=0.0,iid=0x847080088000000000000001,hunt-name=HUNT-4-1715197935,has=255,active_roles=1,total_players=1),
 <Relation(label=hunt,con=0.0,iid=0x847080088000000000000002,hunt-name=HUNT-1-1715197935,has=105,active_roles=1,total_players=1),
 <Relation(label=hunt,con=0.0,iid=0x847080088000000000000003,hunt-name=HUNT-2-1715197935,has=155,active_roles=1,total_players=1),
 <Relation(label=hunt,con=0.0,iid=0x847080088000000000000004,hunt-name=HUNT-0-1715197935,has=55,active_roles=1,total_players=1)]
