In [2]:
import pickle
import os
import concurrent.futures
from tqdm import tqdm
def create_folder(directory):
    if not os.path.exists(directory):
        os.makedirs(directory)

def dump(path, data):
    with open(path+'.pickle', 'wb') as f:
        pickle.dump(data, f, protocol=2)

def dump_dict(d, base_path, depth, current_depth = 1, base_progress = ""):
    create_folder(base_path)
    i = 0
    futures = []
    with concurrent.futures.ThreadPoolExecutor(max_workers=1000) as executor:
        for key in d:
            progress = base_progress + f"({i} / {len(d)-1})"
            path = os.path.join(base_path, str(key))
            if isinstance(d[key], dict) and current_depth < depth:
                print(progress)
                dump_dict(d[key], path, depth, current_depth+1, progress + " - ")
            else:
                futures.append(executor.submit(dump, path, d[key]))
            i+=1
        concurrent.futures.wait(futures)

In [2]:
import pandas as pd
def buildOffsets(path):
    df = pd.read_csv(path, sep='\t', header=None)
    df.head()
    df[0] = df[0].apply(lambda x: str(int(x)))
    df[1] = df[1].apply(lambda x: (x.lower()))
    return {
        "skyrim-to-address": dict(zip(df[1], df[0])),
        "address-to-skyrim": dict(zip(df[0], df[1])),
    }

offsets318 = buildOffsets("offsets-1-6-318-0.txt")
offsets97 = buildOffsets("offsets-se-1.5.97.0.txt")

In [3]:

import pandas as pd
import json 


df2 = pd.read_csv("known-addresses.txt", sep=';', header=None)

result_obj = {}


for index, row in df2.iterrows():
    result_obj[row[1]] = {
        "name":row[0]
    }

In [4]:
import re
import json
import pickle
with open('input.txt', 'r') as file:
    text = file.read()

pattern = r"(NameAddr|TypeAddr)\s*\(\s*([^,]+)\s*,\s*\"(.*)\"\s*\)\s*"

for match in re.finditer(pattern, text):
    if(match.group(1)):
        key = offsets97["skyrim-to-address"][match.group(2)[4:].lstrip("0").lower()]
        if(key not in result_obj):
            result_obj[key] = {}
        if(match.group(1) == "NameAddr"):
            result_obj[key]["name"] = re.sub(r"_14[^_]+$", "", match.group(3))
        else:
            result_obj[key]["definition"] = match.group(3)


dump_dict(result_obj,"..\\data\\definition\\",1)

# with open("..\\data\\definition.pickle", "wb") as f:
#     pickle.dump(result_obj, f, protocol=2)

In [5]:
import re

with open('outputsorted.txt', 'r') as file:
    text = file.read()

pattern = r"^.*?14([a-zA-Z0-9]+)\s+14([a-zA-Z0-9]+)"

result_obj = {"ae":{},"se":{}}



for match in re.finditer(pattern, text, re.MULTILINE):
    if(match.group(1)):
        seid = match.group(1).lstrip('0').lower()
        aeid = match.group(2).lstrip('0').lower()

        if(aeid in offsets318["skyrim-to-address"] and seid in offsets97["skyrim-to-address"]):
            ae = offsets318["skyrim-to-address"][aeid]
            se = offsets97["skyrim-to-address"][seid]
            result_obj["ae"][ae] = se
            result_obj["se"][se] = ae

print(len(result_obj["se"].keys()))

259868


In [6]:

import pandas as pd
import json 
import pickle

df2 = pd.read_csv("known-addresses.txt", sep=';', header=None)

for index, row in df2.iterrows():
    result_obj["ae"][str(row[2])] = str(row[1])
    result_obj["se"][str(row[1])] = str(row[2])

print(len(result_obj["se"].keys()))

dump_dict(result_obj,"..\\data\\addresses_match\\",2)

# with open("..\\data\\addresses_match.pickle", "wb") as f:
#     pickle.dump(json.dumps(result_obj), f, protocol=2)



259918
(0 / 1)
(1 / 1)


In [8]:

import pandas as pd
import json 
import pickle

ae = pd.read_csv("function-call-offsets-ae.csv", sep=';', header=None)
se = pd.read_csv("function-call-offsets-se.csv", sep=';', header=None)
# se = pd.read_csv("function-call-offsets-se.csv", sep=';', header=None)

result = {"ae":{},"se":{}}


def read(version, ds):
    for index, row in ds.iterrows():

        if version not in result:
            result[version] = {}
        
        if str(row[0]) not in result[version]:
            result[version][str(row[0])] = {}

        if str(row[1]) not in result[version][str(row[0])]:
            result[version][str(row[0])][str(row[1])] = []

        result[version][str(row[0])][str(row[1])].append(str(row[2]))

read("ae",ae)
read("se",se)

print(result)

dump_dict(result,"..\\data\\function_call_offsets\\",2)




{'ae': {'150': {'11040': ['0x3f'], '109608': ['0x6f']}, '1050': {'69161': ['0x12'], '109608': ['0x22']}, '106710': {'108319': ['0x3a'], '109588': ['0x4c']}, '106711': {'108265': ['0x9']}, '106712': {'108266': ['0x149']}, '106713': {'108268': ['0x15']}, '106714': {'108269': ['0x15'], '464275': ['0x81']}, '106715': {'68216': ['0x3a', '0x5b', '0x7c', '0x9d'], '108270': ['0x29b'], '68219': ['0x2b4']}, '10045': {'69161': ['0x12'], '109608': ['0x22']}, '106717': {'175091': ['0x42']}, '106721': {'70693': ['0xe1']}, '106722': {'70692': ['0xe1']}, '106723': {'11141': ['0x39', '0xa4', '0xf5', '0x15b', '0x1c1', '0x212', '0x263', '0x2b4', '0x32b', '0x37c', '0x3cd', '0x41e', '0x46f'], '68088': ['0x43', '0xae', '0xff', '0x165', '0x1cb', '0x21c', '0x26d', '0x2be', '0x335', '0x386', '0x3d7', '0x428', '0x479'], '68144': ['0x5a', '0xc5', '0x116', '0x17c', '0x1e2', '0x233', '0x284', '0x2d5', '0x34c', '0x39d', '0x3ee', '0x43f', '0x490'], '109694': ['0x76'], '106711': ['0x7e', '0x12a', '0x190', '0x2e9', '0