# SEMESO 2023 - Mini Curso - Data Science no Mundo das Criptomoedas: da Blockchain ao MLOps 

Felipe Lana Machado

## Etapas:

1. Obtenção dos dados da moeda Illuvium (ILV) na rede ethereum.
    - Utilização da lib web3.py.
    - Utilização da API Alchemy. 
2. Pre-processamento dos dados obtidos.
3. Desenvolvimento de features dentro dos dados obtidos.
4. CI/CD com github actions.
5. Princípios do MLOps.

In [1]:
import warnings
import json
import os
from typing import List, Dict
from urllib3.exceptions import NotOpenSSLWarning
warnings.simplefilter('ignore', NotOpenSSLWarning)
from src.blockchain.get_ethereum_data import Blockchain
from src.utils.project_paths import DATA_RAW



## Obtenção dos dados

In [2]:
blockchain = Blockchain(abi='illuvium')
illuvium_started = 12084123
data = blockchain._get_events(start_block=illuvium_started)

list_of_files = os.listdir(DATA_RAW)
size = len([file for file in list_of_files if f'transfer_data_' in file])
with open(os.path.join(DATA_RAW, f'transfer_data_{size}.json'), 'w') as json_file:
    json.dump(data, json_file, indent=4)

100%|██████████| 613/613 [09:52<00:00,  1.04it/s]


## Pre-Processamento

In [3]:
def remove_duplicates_by_field(field: str, data: List[Dict]) -> List[Dict]:
    """This funtion is responsible to take a field from a list of
    dicts and remove all dicts that have that field duplicated.

    Args:
        field (str): The features that couldn't be duplicate.
        data (List[Dict]): The list of dicts.

    Returns:
        List[Dict]: The list of dicts without the duplicated in the field.
    """    
    seen_ids = set()
    data = [seen_ids.add(x[field]) or x for x in data if x[field] not in seen_ids]
    return data

data = remove_duplicates_by_field(field="txhash", data=data)
len(data)

502130

In [5]:
def create_ilv_value_field(data: List[Dict]) -> List[Dict]:
    """Transform the value in wei to ether.

    Args:
        data (List[Dict]): The transactions list.

    Returns:
        List[Dict]: The transactions list with the new field.
    """    
    data = [{**transaction, 'value_ilv': blockchain.w3.from_wei(transaction['value'],'ether')} for transaction in data]
    return data

data = create_ilv_value_field(data=data)

In [7]:
def create_timestamp_field(data: List[Dict]) -> List[Dict]:
    """Create a timestamp from the blocknumber.

    Args:
        data (List[Dict]): The transactions list.

    Returns:
        List[Dict]: The transactions list with the new field.
    """    
    data = [{**transaction, 'timestamp': blockchain.get_block_timestamp(transaction['blockNumber'])} for transaction in data]
    return data

data = create_timestamp_field(data=data)

KeyboardInterrupt: 

In [None]:
list_of_files = os.listdir(DATA_RAW)
size = len([file for file in list_of_files if f'transfer_data_' in file])
with open(os.path.join(DATA_RAW, f'transfer_data_{size}.json'), 'w') as json_file:
    json.dump(data, json_file, indent=4)

In [None]:
data

[{'txhash': '0xa5b7f915aa6cd5789a79d2905540a08490bc215ac931dd48ded7ab214bcd84d0',
  'blockNumber': 12084124,
  'from': '0x0000000000000000000000000000000000000000',
  'to': '0x0691f5804d4227925F19b031821b530b48FFf38f',
  'value': 7000000000000000000000000,
  'value_ilv': Decimal('7000000')},
 {'txhash': '0x8360ccf47f03b5983ac4c56b0ff155fbcf7f55fef265de4c196638beb59e5dfd',
  'blockNumber': 12084211,
  'from': '0x0691f5804d4227925F19b031821b530b48FFf38f',
  'to': '0x58C37A622cdf8aCe54d8b25c58223f61d0d738aA',
  'value': 6000000000000000000000000,
  'value_ilv': Decimal('6000000')},
 {'txhash': '0x8e9236819f4479b6270002e32615dfb50efd9dd7c50d0a08a80c548077fda35d',
  'blockNumber': 12131640,
  'from': '0x0691f5804d4227925F19b031821b530b48FFf38f',
  'to': '0x58C37A622cdf8aCe54d8b25c58223f61d0d738aA',
  'value': 1000000000000000000000000,
  'value_ilv': Decimal('1000000')},
 {'txhash': '0x7e421e47cef42dcb4669174684ea565e7fd4f9f01a05d63ed09fc407dc01f031',
  'blockNumber': 12132136,
  'from': '0

## Desenvolvimento de Features

## CI / CD - Github Actions

## MLOPs

In [1]:
import os

def create_test_files(src_directory, dest_directory):
    """
    Cria arquivos de teste em dest_directory baseado nos arquivos de src_directory.

    Args:
        src_directory (str): O caminho da pasta de origem.
        dest_directory (str): O caminho da pasta de destino.
    """

    # Se a pasta de destino não existir, crie-a
    if not os.path.exists(dest_directory):
        os.makedirs(dest_directory)

    # Listar todos os arquivos na pasta de origem
    for filename in os.listdir(src_directory):
        # Ignora subdiretórios
        if os.path.isfile(os.path.join(src_directory, filename)):
            # Divide o nome do arquivo e a extensão
            base, ext = os.path.splitext(filename)
            # Cria o nome do novo arquivo com "_test" antes da extensão
            new_filename = f"{base}_test{ext}"
            new_filepath = os.path.join(dest_directory, new_filename)

            # Cria o arquivo na pasta de destino (ele estará vazio)
            with open(new_filepath, 'w') as f:
                pass

# Exemplo de uso:
create_test_files('src/features', 'tests/features')
