# Investigating series sizes

## Testing the ICOParser class

In [1]:
import pandas as pd
from ico_parser import ICOParser
# Imprimir todos valores da célula, não somente o último
from IPython.core.interactiveshell import InteractiveShell

InteractiveShell.ast_node_interactivity = "all"

### Load table with information about ICOs

In [2]:
path_to_csvs = '/home/gabriel/Documents/Repos/time_series_study/data_and_models/all_icos/'

In [3]:
ico_table = pd.read_csv('lista_ico_2020-08-17_tab3.csv', sep=';')
ico_table.tail()

Unnamed: 0,ico,adress,fraud,start_date,market_start_date,diff_days,market_days,size_ok,price_zero,Market Cap,o_concur,biggest_holder,exchange,contract,date_analysis,code,site,social_media
253,TRADEXTOKEN,0x5432c580e34f590f4dd901b825ddeb92e905e826,1,24/01/2020,26/03/2020,62,186,,,,,98,0,0,22/09/2020,,,
254,TRONCLASSIC,0xad5fe5b0b8ec8ff4565204990e4405b2da117d8e,1,05/08/2018,10/08/2018,153,721,x,x,,,30,13,1,04/06/2019,x,x,
255,WALTONCHAIN,0xb7cb1c96db6b22b0d3d9536e0108d062bd488f74,1,21/07/2017,23/07/2017,2,1163,x,,,,61,4,0,19/01/2018,x,x,x
256,ZPAY,0xeffea57067e02999fdcd0bb45c0f1071a29472d9,1,25/07/2018,20/08/2018,26,770,x,,,,99,0,0,16/02/2019,,,
257,ZYNECOIN,0xe65ee7c03bbb3c950cfd4895c24989afa233ef01,1,21/01/2019,23/01/2019,2,614,x,,,,15,2,0,22/07/2019,x,x,x


In [4]:
ico_table = ico_table.astype({'start_date': 'datetime64', 'market_start_date': 'datetime64', 'date_analysis':'datetime64'})
ico_table.set_index('ico', inplace=True)
ico_table.dtypes

adress                       object
fraud                         int64
start_date           datetime64[ns]
market_start_date    datetime64[ns]
diff_days                     int64
market_days                   int64
size_ok                      object
price_zero                   object
Market Cap                  float64
o_concur                     object
biggest_holder                int64
exchange                      int64
contract                      int64
date_analysis        datetime64[ns]
code                         object
site                         object
social_media                 object
dtype: object

In [5]:
import pandas as pd
from datetime import datetime, timedelta
from exchange_addresses import ADRESS_LIST
import pytz
import requests
import json
import time

## Updated Performance

In [6]:
%%time
ico = "COMPOUND"
ico_csv= f'{path_to_csvs}{ico}.csv'
market_start_date = str(ico_table.at[ ico, 'market_start_date'].date())
is_fraud = ico_table.at[ ico, 'fraud']
ico_parser = ICOParser(ico_csv,
                     date_column='BLOCK_TIMESTAMP',
                     value_column='VALUE',
                     ico_start_date=market_start_date, 
                     fraud_flag=is_fraud,
                          len_time_series=20)

CPU times: user 33.2 s, sys: 230 ms, total: 33.4 s
Wall time: 33.5 s


In [7]:
%%time
ico_parser.pipeline()

Running method: get_newbiers_dataframe ... 
Running method: get_balance ... 
2020-06-15 2020-07-05
2020-03-04
123
Running method: get_cumsum_balance ... 
Running method: get_cumsum_daily_percentage ... 
Running method: get_daily_number_of_new_holder ... 
Running method: get_array_daily_transactions ... 
Running method: get_array_perc_new_holders ... 
Running method: get_biggest_holder_dict ... 
Number of API calls 4
List adresses checked: dict_keys(['0x1449e0687810bddd356ae6dd87789244a46d9adb', '0x19bc62ff7cd9ffd6bdced9802ff718f09f7259f1', '0x2775b1c75658be0f640272ccb8c72ac986009e38', '0xc89b6f0146642688bb254bf93c28fccf1e182c81'])
Running method: get_biggest_holder_array ... 
Running method: get_newbiers_ratio_dict ... 
Running method: get_newbiers_array ... 
Running method: get_gas_ratio_array ... 
CPU times: user 1min 56s, sys: 79.8 ms, total: 1min 56s
Wall time: 2min 1s


In [12]:
len(ico_parser.array_daily_transactions)
len(ico_parser.array_perc_new_holders)
len(ico_parser.array_biggest_holder)
len(ico_parser.array_newbiers)
len(ico_parser.array_gas_ratio)

20

20

20

20

20

## Creating Dataset

## Arrays of lenght 20

In [20]:
for ico in ico_table.index.to_list():
    if '-' in ico or '_' in ico or ' ' in ico:
        print(ico)

In [21]:
list_icos = ico_table.index.to_list()

In [22]:
dict_arrays_20 = {}
list_bad_icos_20 = []

In [23]:
dict_arrays_20

{}

In [None]:
for ico in list_icos:
    ico_csv= f'{path_to_csvs}{ico}.csv'
    market_start_date = str(ico_table.at[ ico, 'market_start_date'].date())
    is_fraud = ico_table.at[ ico, 'fraud']
    try:
        print(ico)
        ico_parser = ICOParser(ico_csv,
                     date_column='BLOCK_TIMESTAMP',
                     value_column='VALUE',
                     ico_start_date=market_start_date, 
                     fraud_flag=is_fraud,
                          len_time_series=20)
        ico_parser.pipeline()
        dict_arrays_20[ico] = (ico_parser.array_daily_transactions,
                              ico_parser.array_perc_new_holders,
                              ico_parser.array_biggest_holder,
                              ico_parser.array_newbiers,
                              ico_parser.array_gas_ratio)
    except:
        print(f'PROBLEM WITH: {ico}')
        list_bad_icos_20.append(ico)

In [26]:
# Verificando o tamanho dos arrays
for ico in dict_arrays_20.keys():
    if len(dict_arrays_20.get(ico)[0]) != 20:
        print(ico)

AIDCOIN
ALLME
BELANCE
BLOCKMALL
CRYPTONIAPOKER
DIAM
EXIMCHAIN
HONEYSHARECOIN
ICON
INDAHASH
LOLIGO
MONEYTOKEN
MULTILEVEL
PLANETMOBILETOKEN
PROMETEUS
SHARERING
SINGULARITYNET
SOCIALMEDIAPAY
SPARKSTER


## Arrays of lenght 40

In [28]:
dict_arrays_40 = {}
list_bad_icos_40 = []
dict_arrays_40['arrays'] = ('array_daily_transactions',
                              'array_perc_new_holders',
                              'array_biggest_holder',
                              'array_newbiers',
                              'array_gas_ratio',
                              'array_daily_transactions_ma',
                              'array_perc_new_holders_ma',
                              'array_biggest_holder_ma',
                              'array_newbiers_ma',
                              'array_gas_ratio_ma')

In [29]:
dict_arrays_40

{'arrays': ('array_daily_transactions',
  'array_perc_new_holders',
  'array_biggest_holder',
  'array_newbiers',
  'array_gas_ratio',
  'array_daily_transactions_ma',
  'array_perc_new_holders_ma',
  'array_biggest_holder_ma',
  'array_newbiers_ma',
  'array_gas_ratio_ma')}

In [None]:
%%time
for ico in list_icos:
    ico_csv= f'{path_to_csvs}{ico}.csv'
    market_start_date = str(ico_table.at[ ico, 'market_start_date'].date())
    is_fraud = ico_table.at[ ico, 'fraud']
    try:
        print(ico)
        ico_parser = ICOParser(ico_csv,
                     date_column='BLOCK_TIMESTAMP',
                     value_column='VALUE',
                     ico_start_date=market_start_date, 
                     fraud_flag=is_fraud,
                          len_time_series=40)
        ico_parser.pipeline()
        dict_arrays_40[ico] = (ico_parser.array_daily_transactions,
                              ico_parser.array_perc_new_holders,
                              ico_parser.array_biggest_holder,
                              ico_parser.array_newbiers,
                              ico_parser.array_gas_ratio,
                              ico_parser.array_daily_transactions_ma,
                              ico_parser.array_perc_new_holders_ma,
                              ico_parser.array_biggest_holder_ma,
                              ico_parser.array_newbiers_ma,
                              ico_parser.array_gas_ratio_ma)
    except:
        print(f'PROBLEM WITH: {ico}')
        list_bad_icos_40.append(ico)

0X
Running method: get_newbiers_dataframe ... 
Running method: get_balance ... 
2017-08-15 2017-09-24
2017-08-11
44
Running method: get_cumsum_balance ... 
Running method: get_cumsum_daily_percentage ... 
Running method: get_daily_number_of_new_holder ... 
Running method: get_array_daily_transactions ... 
Running method: get_array_perc_new_holders ... 
Running method: get_biggest_holder_dict ... 
Number of API calls 8
List adresses checked: dict_keys(['0x5694185964caccc6281ac2799d402b27e7efe565', '0x7f33036d984f67a864c7f413012c31329d4193a2', '0x606af0bd4501855914b50e2672c5926b896737ef', '0x206376e8940e42538781cd94ef024df3c1e0fd43', '0xdb63d40c033d35e79cdbb21430f0fe10e9d97303', '0xe4abc54f5a6288b60c18b361442a151fc4911da6', '0x5e575279bf9f4acf0a130c186861454247394c06', '0xead6be34ce315940264519f250d8160f369fa5cd'])
Running method: get_biggest_holder_array ... 
Running method: get_newbiers_ratio_dict ... 
Running method: get_newbiers_array ... 
Running method: get_gas_ratio_array ... 
AEL

## Arrays of lenght 60

In [None]:
dict_arrays_60 = {}

In [None]:
dict_arrays_60

In [None]:
for ico in list_icos:
    #print(f'{ico} size: {len(list_icos_bad_size.get(ico))}')
    ico_csv= f'{path_to_csvs}{ico}.csv'
    market_start_date = str(ico_table.at[ ico, 'market_start_date'].date())
    is_fraud = ico_table.at[ ico, 'fraud']
    try:
        print(ico)
        ico_parser = ICOParser(ico_csv,
                     date_column='BLOCK_TIMESTAMP',
                     value_column='VALUE',
                     ico_start_date=market_start_date, 
                     fraud_flag=is_fraud,
                          len_time_series=60)
        print('Running method: define_ico_start_date ... ')
        ico_parser.define_ico_start_date()
        print('Running method: get_newbiers_dataframe ... ')
        ico_parser.get_newbiers_dataframe()
        print('Running method: get_balance ... ')
        ico_parser.get_balance()
        dict_arrays_60[ico] = (ico_parser.array_daily_transactions,
                              ico_parser.array_perc_new_holders,
                              ico_parser.array_biggest_holder,
                              ico_parser.array_newbiers,
                              ico_parser.array_gas_ratio)
    except:
        print(f'PROBLEM WITH: {ico}')
        #list_bad_icos.append(ico)


### Loading dictionary with right sizes 

In [None]:
import pickle

In [None]:
with open('ico_arrays_2020-10-12.pickle', 'rb') as handle:
    dict_array_pickle= pickle.load(handle)

In [None]:
list_icos_bad_size = {k:v[1] for k,v in dict_array_pickle.items() if len(v[1]) != 60}

In [None]:
list_icos_bad_size.keys()

In [None]:
for ico in list_icos_bad_size.keys():
    print(f'{ico} size: {len(list_icos_bad_size.get(ico))}')

### 4NEW 

In [None]:
path_to_csvs = '/home/gabriel/Documents/Repos/time_series_study/data_and_models/all_icos/'
ico_ = '4NEW'
ico_csv= f'{path_to_csvs}{ico_}.csv'

In [None]:
ico_table.loc[ico_,:]

In [None]:
market_start_date = str(ico_table.at[ ico_, 'market_start_date'].date())
market_start_date

In [None]:
is_fraud = ico_table.at[ ico_, 'fraud']
is_fraud

In [None]:
str(ico_table.at[ ico_, 'market_start_date'].date())

In [None]:
ico_parser = ICOParser(ico_csv,
                 date_column='BLOCK_TIMESTAMP',
                 value_column='VALUE',
                 ico_start_date=market_start_date, 
                 fraud_flag=is_fraud,
                      len_time_series=20)

In [None]:
print('Running method: define_ico_start_date ... ')
ico_parser.define_ico_start_date()
print('Running method: get_newbiers_dataframe ... ')
ico_parser.get_newbiers_dataframe()
print('Running method: get_balance ... ')
ico_parser.get_balance()

In [None]:
ico_table.head(200)

In [None]:
ico_table.at['AIDCOIN', 'market_start_date']

## Validating size for bad size ICOs

In [None]:
list_bad_icos = []

In [None]:
bad_icos = ['AIDCOIN', 'ANATOMIA', 'BANKERA', 'BELANCE', 'BITCOINMAX', 'BLISSEXCHANGE', 'BUDBO', 'ETHEREUMCASHPRO', 'EXIMCHAIN', 'GADIUNTRUSTWALLET', 'HUOBI', 'ICON',  'LITECOINRED', 'MONEYTOKEN', 'PKGTOKEN', 'QUANTSTAMP', 'REMICOIN', 'SINGULARITYNET', 'SOCIALMEDIAPAY', 'SPARKSTER', 'TEFOOD', 'TIERION', 'TOMO', 'ULTRA']

In [None]:

for ico in bad_icos:
    print(f'{ico} size: {len(list_icos_bad_size.get(ico))}')
    ico_csv= f'{path_to_csvs}{ico}.csv'
    market_start_date = str(ico_table.at[ ico, 'market_start_date'].date())
    is_fraud = ico_table.at[ ico, 'fraud']
    try:
        ico_parser = ICOParser(ico_csv,
                     date_column='BLOCK_TIMESTAMP',
                     value_column='VALUE',
                     ico_start_date=market_start_date, 
                     fraud_flag=is_fraud,
                          len_time_series=20)
        print('Running method: define_ico_start_date ... ')
        ico_parser.define_ico_start_date()
        print('Running method: get_newbiers_dataframe ... ')
        ico_parser.get_newbiers_dataframe()
        print('Running method: get_balance ... ')
        ico_parser.get_balance()
    except:
        print(f'PROBLEM WITH: {ico}')
        list_bad_icos.append(ico)


In [None]:
list_bad_icos

In [None]:
ico_table.loc['ANATOMIA']

In [None]:
ico_parser.pipeline()

In [None]:
len(list_icos_bad_size.get('4NEW'))

In [None]:
['AIDCOIN',
'ANATOMIA',
'BANKERA',
'BELANCE',
'BITCOINMAX',
'BLISSEXCHANGE',
'BUDBO',
'ETHEREUMCASHPRO',
'EXIMCHAIN',
'GADIUNTRUSTWALLET',
'HUOBI',
'ICON',
'LITECOINRED',
'MONEYTOKEN',
'PKGTOKEN',
'QUANTSTAMP',
'REMICOIN',
'SINGULARITYNET',
'SOCIALMEDIAPAY',
'SPARKSTER']

### Testing `ICOParser` individually

In [None]:
ico_table = pd.read_csv('lista_ico_2020-08-17_tab3.csv')
ico_table = ico_table.astype({'start_date': 'datetime64', 'market_start_date': 'datetime64', 'date_analysis':'datetime64'})


In [None]:
ico_table.dtypes

In [None]:
ico_table.set_index('ico', inplace=True)
ico_table.head()

In [None]:
path_to_csvs = '/home/gabriel/Documents/Repos/time_series_study/data_and_models/all_icos/'

#ico_ = 'AMPLEFORTH'
ico_ = 'TERRAMINER'
ico_csv= f'{path_to_csvs}{ico_}.csv'
market_start_date = str(ico_table.at[ ico_, 'market_start_date'].date())
is_fraud = ico_table.at[ ico_, 'fraud']

In [None]:
str(ico_table.at[ ico_, 'market_start_date'].date())

In [None]:
ico_parser = ICOParser(ico_csv,
                 date_column='BLOCK_TIMESTAMP',
                 value_column='VALUE',
                 ico_start_date=market_start_date, 
                 fraud_flag=is_fraud,
                      len_time_series=20)

# TESTANDO PIPELINE

In [None]:
ico_parser.define_ico_start_date()
"""
print('Running method: get_newbiers_dataframe ... ')
ico_parser.get_newbiers_dataframe()
print('Running method: get_balance ... ')
ico_parser.get_balance()
print('Running method: get_cumsum_balance ... ')
ico_parser.get_cumsum_balance()
print('Running method: get_cumsum_daily_percentage ... ')
ico_parser.get_cumsum_daily_percentage()
print('Running method: get_daily_number_of_new_holder ... ')
ico_parser.get_daily_number_of_new_holder()
"""
print('Running method: get_array_daily_transactions ... ')
ico_parser.get_array_daily_transactions()
ico_parser.array_daily_transactions

In [None]:
%%time
ico_parser.pipeline()

In [None]:
df_resample_func = ico_parser.df_resample_day.reset_index()
df_resample_func['BLOCK_TIMESTAMP'] = df_resample_func[
            'BLOCK_TIMESTAMP'
        ].dt.date

In [None]:
ico_parser.df_newbiers_resample['GAS_RATIO'] = (
                ico_parser.df_newbiers_resample['RECEIPT_GAS_USED']
                / ico_parser.df_newbiers_resample['GAS']
            )
ico_parser.array_gas_ratio = (
                ico_parser.df_newbiers_resample.GAS_RATIO.to_list()
            )[-ico_parser.len_time_series :]

# Tamanho dos Arrays

In [None]:
ico_parser.get_newbiers_ratio_dict()
ico_parser.get_newbiers_array()