In [1]:
import pandas as pd
import numpy as np
import json
import os
import sys
sys.path.append(os.path.dirname(sys.path[0]))

from web3 import HTTPProvider
from web3._utils.request import make_post_request
from multithread_processing.base_job import BaseJob

In [2]:
contract_interact = pd.read_csv("../data/submit_data/contract_interact.csv")
user_interact = pd.read_csv("../data/submit_data/user_interact.csv")


In [3]:
other_user = user_interact.other_user.unique().tolist()

In [53]:
len(other_user)

1693454

In [50]:
def _generate_get_code_json_rpc(contract_addresses, block='latest'):
    for idx, contract_address in enumerate(contract_addresses):
        yield _generate_json_rpc(
            method='eth_getCode',
            params=[contract_address, hex(block) if isinstance(block, int) else block],
            request_id=idx
        )


def _generate_json_rpc(method, params, request_id=1):
    return {
        'jsonrpc': '2.0',
        'method': method,
        'params': params,
        'id': request_id,
    }


class _BatchHTTPProvider(HTTPProvider):
    def make_batch_request(self, text):
        self.logger.debug("Making request HTTP. URI: %s, Request: %s",
                          self.endpoint_uri, text)
        request_data = text.encode('utf-8')
        raw_response = make_post_request(
            self.endpoint_uri,
            request_data,
            **self.get_request_kwargs()
        )
        response = self.decode_rpc_response(raw_response)
        self.logger.debug("Getting response HTTP. URI: %s, "
                          "Request: %s, Response: %s",
                          self.endpoint_uri, text, response)
        return response


def _rpc_response_to_result(response):
    try:
        result = response.get('result')
        return result
    except Exception as e:
        print(e)


def check_if_contracts(addresses: list[str],
                       provider_url: str='https://mainnet.infura.io/v3/d8f0e3b2f0814d1c963f660286ad1783') -> dict[str, bool]:
    """
    The check_if_contracts function takes a list of addresses and returns a dictionary with the same keys as the input
    list, but with values that are booleans indicating whether or not each address is a contract.
    Args:
        addresses: list[str]: Specify the list of addresses to check
        provider_url: str: Specify the url of the node to connect to
    Returns:
        A dictionary with the address as the key and a boolean as value
    Doc Author:
        T
    """
    returned_dict = {addr.lower(): True for addr in addresses}
    json_rpc = list(_generate_get_code_json_rpc(returned_dict.keys()))

    _batch_provider = _BatchHTTPProvider(provider_url)
    response_batch = _batch_provider.make_batch_request(json.dumps(json_rpc))

    for response in response_batch:
        request_id = response['id']  # request id is the index of the contract in contracts list
        if not response.get('result'):
            returned_dict[addresses[request_id]] = False
        bytecode = _rpc_response_to_result(response)
        if bytecode == "0x":
            returned_dict[addresses[request_id]] = False

    return returned_dict




class detectContract(BaseJob):
    def __init__(self, output_path, listIndex: list,
                 max_workers= 4, batch_size=50):

        self.isContract = dict()
        self.output = output_path
        #self.subgraph = list()

        
        super().__init__(work_iterable=listIndex,
                         max_workers=max_workers,
                         batch_size=batch_size)

    def _execute_batch(self, works):
        try: 
            contract = check_if_contracts(addresses=works)
        except Exception as e:
            print(f"Out of request with err: {e}")
        self.isContract.update(contract)

    def _end(self):
        super()._end()
        df = pd.DataFrame(list(self.isContract.items()), columns=['address', 'IsContract'])
        df.to_csv(f"{self.output}", index = 'False')     




In [None]:
job1 = detectContract("../data/filter_data/contract_user.csv",listIndex=other_user
                            )

job1.run()

In [14]:
is_contract = pd.read_csv("../data/filter_data/is_contract.csv")

In [49]:
is_contract[is_contract['address'] =='0x0000000000007f150bd6f54c40a34d7c3d5e9f56']

Unnamed: 0.1,Unnamed: 0,address,IsContract
428877,428877,0x0000000000007f150bd6f54c40a34d7c3d5e9f56,False


In [22]:
user_address_list = is_contract[is_contract['IsContract'] ==False].address.unique().tolist()

In [25]:
user_interact_filter = user_interact[user_interact['other_user'].isin(user_address_list)]

In [28]:
user_interact_filter.to_parquet("../data/filter_data/user_interact.parquet")

In [29]:
additional_contract_interact = user_interact[~user_interact['other_user'].isin(user_address_list)]

In [38]:
additional_contract_interact.rename({"other_user":"contract_address"},axis=1, inplace=True)

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  additional_contract_interact.rename({"other_user":"contract_address"},axis=1, inplace=True)


In [39]:
contract_interact

Unnamed: 0,user,contract_address,count
0,0x978aad63fe35f7a72573c411c3dff53608aaeac9,0x5d3a536e4d6dbd6114cc1ead35777bab948e3643,5
1,0xdfdbc9fe64f3020b8440deb6c5db544a89aaaf04,0x39aa39c021dfbae8fac545936693ac917d5e7563,1
2,0xba07e5cb42ca05d2fc3d96514461d7245050eb7b,0x39aa39c021dfbae8fac545936693ac917d5e7563,1
3,0x50aeee21369e2a4d95eb8784da49645f927adf54,0xb3319f5d18bc0d84dd1b4825dcde5d5f7266d407,1
4,0xe6a6ee4196d361ec4f6d587c7ebe20c50667fb39,0x8e595470ed749b85c6f7669de83eae304c2ec68f,1
...,...,...,...
22962,0xd0000d82238adc78c8ce213a30f3ae32de7b000d,0x1b3e95e8ecf7a7cab6c4de1b344f94865abd12d5,5
22963,0x4a10b1e56109ecebd9e3d823bf47e7e88ebf3cae,0x2409af0251dcb89ee3dee572629291f9b087c668,13
22964,0x2b384212edc04ae8bb41738d05ba20e33277bf33,0x158079ee67fce2f58472a96584a73c7ab9ac95c1,2
22965,0x9e6e344f94305d36ea59912b0911fe2c9149ed3e,0x4ddc2d193948926d02f9b1fe9e1daa0718270ed5,1


In [40]:
contract_interact_prep = pd.concat([contract_interact,additional_contract_interact.reset_index(drop=True)])

In [43]:
contract_interact_prep

Unnamed: 0,user,contract_address,count
0,0x978aad63fe35f7a72573c411c3dff53608aaeac9,0x5d3a536e4d6dbd6114cc1ead35777bab948e3643,5
1,0xdfdbc9fe64f3020b8440deb6c5db544a89aaaf04,0x39aa39c021dfbae8fac545936693ac917d5e7563,1
2,0xba07e5cb42ca05d2fc3d96514461d7245050eb7b,0x39aa39c021dfbae8fac545936693ac917d5e7563,1
3,0x50aeee21369e2a4d95eb8784da49645f927adf54,0xb3319f5d18bc0d84dd1b4825dcde5d5f7266d407,1
4,0xe6a6ee4196d361ec4f6d587c7ebe20c50667fb39,0x8e595470ed749b85c6f7669de83eae304c2ec68f,1
...,...,...,...
123553,0xffffc801a3e4ceddd58b6280006819e14129145b,0x0000000000c2d145a2526bd8c716263bfebe1a72,1
123554,0xffffc801a3e4ceddd58b6280006819e14129145b,0x3fc91a3afd70395cd496c647d5a6cc9d4b2b7fad,6
123555,0xffffc801a3e4ceddd58b6280006819e14129145b,0x881d40237659c251811cec9c364ef91dc08d300c,8
123556,0xffffffff7a5cb56bbccb9dfaff17bb2b26a3d71c,0x00000000000000adc04c56bf30ac9d3c0aaf14dc,1


In [44]:
contract_interact_prep.to_parquet("../data/filter_data/contract_interact.parquet")

In [45]:
user_interact_filter

Unnamed: 0,user,other_user,count
1,0x00000000000030e5959659622cb7eb50aa20ee52,0x01949723055a451229c7ba3a817937c966748f76,2
2,0x00000000000030e5959659622cb7eb50aa20ee52,0x058d79a4c6eb5b11d0248993ffa1faa168ddd3c0,4
3,0x00000000000030e5959659622cb7eb50aa20ee52,0x06b1655b9d560de112759b4f0bf57d6f005e72fe,2
4,0x00000000000030e5959659622cb7eb50aa20ee52,0x06bfac47528d0d6a2de3b1ae5d8214ad45b4b945,4
5,0x00000000000030e5959659622cb7eb50aa20ee52,0x0928592f80d63d474257a7b797120e301ba2d987,10
...,...,...,...
3522866,0xffffffff7a5cb56bbccb9dfaff17bb2b26a3d71c,0xe22a8b0fd4bf0a25b2af2f9ae0dbd7c5600e84a0,1
3522867,0xffffffff7a5cb56bbccb9dfaff17bb2b26a3d71c,0xe51aac67b09eaed6d3d43e794d6bae679cbe09d8,1
3522868,0xffffffff7a5cb56bbccb9dfaff17bb2b26a3d71c,0xf2c1d994e90d542de587ab91731cae80b393cd9b,2
3522869,0xffffffff7a5cb56bbccb9dfaff17bb2b26a3d71c,0xf92c47757a69a6a0d917dbc6e1a21a8c7f436e37,1
