### Variables from Gnosis data
* ```python
  creatorToSafe = creatorAddr: {(safeAddr_1, txHash_1), ..., (safeAddr_n, txHash_n)}
  ```
* 
  ```python
  ownerToSafe = ownerAddr: {safeAddr_1, ..., safeAddr_n}
  ```
* ```python
  safeToInfo = safeAddr: {
      'creator': (creatorAddr, txHash),
      'owner': {owner_1, ..., owner_n},
      'transfer': [
          (from, to, isNativeTokenTransfer, txHash),
          ...,
          (from, to, isNativeTokenTransfer, txHash)
      ]
  }
  ```

* ```python
  allCreatorAddrs = set(creatorToSafe.keys())
  ```
* ```python
  allOwnerAddrs = set(ownerToSafe.keys())
  ```
* ```python
  allSafeAddrs = set(safeToInfo.keys())
  ```
* ```python
  allGnosisAddrs = allSafeAddrs.union(allOwnerAddrs).union(allCreatorAddrs)
  ```

### Variables from distributeEth data
* ```python
  distributes # '../static/distributeEth_15503366.json'
  ```
* ```python
  involvedAddrInfo # '../static/distributeEth_involvedAddrInfo_15503366.json'
  ```

* ```python
  allDistributeAddrs: set(involvedAddrInfo.keys())
  ```
* ```python
  suspects = {suspect_1, ..., suspect_n} # selected from allDistributeAddrs
  ```

### Variables from both
* ```python 
  intersections = list(suspects.intersection(allGnosisAddrs))
  ```


In [1]:
from collections import defaultdict

creatorToSafe = defaultdict(lambda: set())
ownerToSafe = defaultdict(lambda: set())
safeToInfo = defaultdict(lambda: {
    'creator': None,
    'owner': set(),
    'transfer': set()
})

In [None]:
# load data file: "transaction_histories"

import json
from web3 import Web3

with open('../static/transaction_histories', 'r') as fp:
    for line in fp:
        safeData = json.loads(line.rstrip())
        safeAddr = list(safeData.keys())[0].lower()
        allTxs = list(safeData.values())[0]
        for tx in allTxs:
            txInfo = tx['transaction']['txInfo']
            # txType in {'Custom', 'Transfer', 'Creation', 'SettingsChange'}
            txType = txInfo['type']
            if txType == 'Creation':
                txHash = txInfo['transactionHash'].lower()
                creatorAddr = txInfo['creator']['value'].lower()
                creatorToSafe[creatorAddr].add((safeAddr, txHash))
                safeToInfo[safeAddr]['creator'] = (creatorAddr, txHash)
            if txType == 'Transfer': 
                txHash = tx['transaction']['id']
                if txHash.startswith('ethereum'):
                    txHash = txHash.split('_')[2].lower()
                tokenSymbol = ''
                if txInfo['transferInfo']['type'] == 'NATIVE_COIN':
                    tokenSymbol = 'ETH'
                if txInfo['transferInfo']['type'] == 'ERC20':
                    tokenSymbol = txInfo['transferInfo']['tokenSymbol']
                    if not tokenSymbol:
                        tokenSymbol = 'None'
                    # if tokenSymbol not in {'USDC', 'USDT', 'WBTC', 'DAI', 'WETH', 'SHIB', 'HEX'}:
                    #     continue
                fromAddr = txInfo['sender']['value'].lower()
                toAddr = txInfo['recipient']['value'].lower()
                safeToInfo[safeAddr]['transfer'].add((fromAddr, toAddr, tokenSymbol, txHash + '_' + tokenSymbol))

In [None]:
# load data file: "SafeOwners/success"

import json
from web3 import Web3

with open('../static/SafeOwners/success', 'r') as fp:
    for line in fp:
        safe = json.loads(line.rstrip())
        safeAddr = safe['address']['value'].lower()
        for owner in safe['owners']:
            ownerAddr = owner['value'].lower()
            ownerToSafe[ownerAddr].add(safeAddr)
            safeToInfo[safeAddr]['owner'].add(ownerAddr)

In [None]:
allSafeAddrs = set(safeToInfo.keys())
allOwnerAddrs = set(ownerToSafe.keys())
allCreatorAddrs = set(creatorToSafe.keys())
allGnosisAddrs = allSafeAddrs.union(allOwnerAddrs).union(allCreatorAddrs)

print(len(allGnosisAddrs)) # 159223
print(len(allSafeAddrs.intersection(allOwnerAddrs))) # 294
print(len(allSafeAddrs.intersection(allCreatorAddrs))) # 66

In [None]:
# load data file: "distributeEth_15503366.json"
# load data file: "distributeEth_involvedAddrInfo_15503366.json"

import json

with open('../static/distributeEth_15503366.json', 'r') as fp1:
    distributes = json.load(fp1)


with open('../static/distributeEth_involvedAddrInfo_15503366.json', 'r') as fp1:
    involvedAddrInfo = json.load(fp1)
    allDistributeAddrs = set(involvedAddrInfo.keys())

print(len(distributes)) # 74683
print(len(allDistributeAddrs)) # 588150

In [None]:
suspects = set()
for addr, info in involvedAddrInfo.items():
    if info['balance'] == '0' and info['nonce'] == 0:
        suspects.add(addr)
        
print(len(suspects)) # 328536

In [None]:
intersections = list(suspects.intersection(allGnosisAddrs))
intersecitons_owner = list(suspects.intersection(allOwnerAddrs))
# intersections_tmp = list(allDistributeAddrs.intersection(allGnosisAddrs))

print(len(intersections)) # 336
print(len(intersecitons_owner)) # 334
# print(len(intersections_tmp)) # 5233

In [None]:
import networkx as nx

distributeGraph = nx.Graph()
distributeGraph.add_nodes_from(allDistributeAddrs)
for distribute in distributes:
    txHash = distribute['tx_hash'].lower()
    fromAddr = distribute['sender'].lower()
    for toAddr in distribute['recipients']:
        toAddr = toAddr.lower()
        distributeGraph.add_edge(fromAddr, toAddr)
        if 'tx_hash' in distributeGraph.edges[fromAddr, toAddr]:
            distributeGraph.edges[fromAddr, toAddr]['tx_hash'].add(txHash)
        else:
            distributeGraph.edges[fromAddr, toAddr]['tx_hash'] = {txHash}

allCcs = [set(cc) for cc in sorted(nx.connected_components(distributeGraph), key=len, reverse=True)]

def isConnected(addr_i, addr_j): 
    for cc in allCcs:
        if addr_i in cc and addr_j in cc:
          return True
    return False
  
print(distributeGraph.number_of_nodes(), distributeGraph.number_of_edges()) # 588150 709997

In [None]:
import networkx as nx

safeGraph = nx.Graph()
safeGraph.add_nodes_from(allSafeAddrs)

def getAllRelatedSafes(addr):
    safes = []
    if addr in ownerToSafe:
        safes.extend(ownerToSafe[addr])
    if addr in creatorToSafe:
        safes.extend([t[0] for t in creatorToSafe[addr]])
    if addr in allSafeAddrs:
        safes.append(addr)
    return safes

def addEdgeBetweenRelatedSafes(addr_i, addr_j, typeName, txHashes): 
    safes_i = getAllRelatedSafes(addr_i)
    safes_j = getAllRelatedSafes(addr_j)
        
    for safe_i in safes_i:
        for safe_j in safes_j:
            safeGraph.add_edge(safe_i, safe_j)
            safeGraph.edges[safe_i, safe_j][typeName] = True
            if txHashes:
                keyName = '{}-tx_hash'.format(typeName)
                if keyName in safeGraph.edges[safe_i, safe_j]:
                    safeGraph.edges[safe_i, safe_j][keyName].update(txHashes)
                else:
                    safeGraph.edges[safe_i, safe_j][keyName] = txHashes

for (ownerAddr, safeAddrs) in ownerToSafe.items():
    safeAddrs = list(safeAddrs)
    for i in range(len(safeAddrs) - 1):
        for j in range(i + 1, len(safeAddrs)):
            safe_i = safeAddrs[i]
            safe_j = safeAddrs[j]
            owners_i = safeToInfo[safe_i]['owner'] # {owner 1}
            owners_j = safeToInfo[safe_j]['owner'] # {}
            if owners_i == owners_j:
                safeGraph.add_edge(safe_i, safe_j)
                safeGraph.edges[safe_i, safe_j]['same_owner'] = True
    
for (creatorAddr, safeAddrs) in creatorToSafe.items():
    if len(safeAddrs) > 1000:
        print(creatorAddr, len(safeAddrs))
        continue
    safeAddrs = list(safeAddrs)
    for i in range(len(safeAddrs) - 1):
        for j in range(i + 1, len(safeAddrs)):
            (safe_i, txHash_i) = safeAddrs[i]
            (safe_j, txHash_j) = safeAddrs[j]
            safeGraph.add_edge(safe_i, safe_j)
            safeGraph.edges[safe_i, safe_j]['creator'] = True
            safeGraph.edges[safe_i, safe_j]['creator-tx_hash'] = {txHash_i, txHash_j}


for (safeAddr, safeInfo) in safeToInfo.items():
    safeTransfer = safeInfo['transfer']
    for transfer in safeTransfer:
        (fromAddr, toAddr, isNativeTransfer, txHash) = transfer
        addEdgeBetweenRelatedSafes(fromAddr, toAddr, 'transfer', {txHash})
    
print(safeGraph.number_of_nodes(), safeGraph.number_of_edges()) # 55717 908956

In [None]:
for i in range(len(intersections) - 1):
    for j in range(i + 1, len(intersections)):
        addr_i = intersections[i]
        addr_j = intersections[j]
        
        # a -> b -> c
        # suspects { balance == 0 & nonce == 0 }
        # intersections = list(suspects.intersection(allGnosisAddrs))
        # intersecitons_owner = list(suspects.intersection(allOwnerAddrs))
        
        if isConnected(addr_i, addr_j):
            if (addr_i, addr_j) in distributeGraph.edges:
                txHashes = distributeGraph.edges[addr_i, addr_j]['tx_hash']
                addEdgeBetweenRelatedSafes(addr_i, addr_j, 'distributeEth', txHashes)
            else:
                addEdgeBetweenRelatedSafes(addr_i, addr_j, 'distributeEth', None)
                
print(safeGraph.number_of_nodes(), safeGraph.number_of_edges())

In [None]:
suspects_by_owner = set()
intersecitons_owner = set(intersecitons_owner)
for safe in safeGraph.nodes():
    owners = safeToInfo[safe]['owner']
    if len(owners) == 0: 
        continue
    if owners.issubset(intersecitons_owner):
        subgraph = distributeGraph.subgraph(owners)
        if nx.is_connected(subgraph):
            suspects_by_owner.add(safe)
            safeGraph.nodes[safe]['similar_owner'] = True

print(len(suspects_by_owner))

# safe1 {o1, o2, o3} 
# safe2 {o4, o5}
# safe3 {o1, o2, o3} 
# safe4 ...
# {o1, o2, o3, o4, o5}

suspects_by_owner = list(suspects_by_owner)
for i in range(len(suspects_by_owner) - 1):
    for j in range(i + 1, len(suspects_by_owner)):
        addr_i = suspects_by_owner[i]
        addr_j = suspects_by_owner[j]
        owners_i = list(safeToInfo[addr_i]['owner'])
        owners_j = list(safeToInfo[addr_j]['owner'])
        
        if isConnected(owners_i[0], owners_j[0]):
            safeGraph.add_edge(addr_i, addr_j)
            safeGraph.edges[addr_i, addr_j]['similar_owner'] = True
            if (owners_i[0], owners_j[0]) in distributeGraph.edges:
                safeGraph.edges[addr_i, addr_j]['similar_owner-tx_hash'] = distributeGraph.edges[owners_i[0], owners_j[0]]['tx_hash']

print(safeGraph.number_of_nodes(), safeGraph.number_of_edges())

In [12]:
def complementEdgeTypeNames(typeNames):
    for edge in safeGraph.edges():
        edgeType = safeGraph.edges[edge]
        for typeName in typeNames:
            if typeName not in edgeType:
                edgeType[typeName] = False

complementEdgeTypeNames(['creator', 'similar_owner', 'same_owner', 'transfer', 'distributeEth'])

def complementNodeTypeNames(typeNames):
    for node in safeGraph.nodes():
        nodeType = safeGraph.nodes[node]
        for typeName in typeNames:
            if typeName not in nodeType:
                nodeType[typeName] = False

complementNodeTypeNames(['similar_owner'])

In [13]:
import pickle

with open('../static/networks/distributeGraph.pkl', 'wb') as f:
    pickle.dump(distributeGraph, f)
with open('../static/networks/safeGraph.pkl', 'wb') as f:
    pickle.dump(safeGraph, f)