# Validating ICOParser class

## Testing the ICOParser class

In [1]:
import pandas as pd
from ico_parser import ICOParser
# Imprimir todos valores da célula, não somente o último
from IPython.core.interactiveshell import InteractiveShell

InteractiveShell.ast_node_interactivity = "all"

### Load table with information about ICOs

In [2]:
ico_table = pd.read_csv('lista_ico_2020-08-17_tab3.tsv', sep='\t')
ico_table.tail()

Unnamed: 0,ico,adress,fraud,start_date,market_start_date,diff_days,market_days,size_ok,price_zero,Market Cap,o_concur,biggest_holder,exchange,contract,date_analysis,code,site,social_media
253,TradeX Token,0x5432c580e34f590f4dd901b825ddeb92e905e826,1,24/01/2020,26/03/2020,62,186,,,,,98,0,0,22/09/2020,,,
254,tronclassic,0xad5fe5b0b8ec8ff4565204990e4405b2da117d8e,1,08/05/2018,08/10/2018,153,721,x,x,,,30,13,1,06/04/2019,x,x,
255,WALTONCHAIN,0xb7cb1c96db6b22b0d3d9536e0108d062bd488f74,1,21/07/2017,23/07/2017,2,1163,x,,,,61,4,0,19/01/2018,x,x,x
256,ZPAY,0xeffea57067e02999fdcd0bb45c0f1071a29472d9,1,25/07/2018,20/08/2018,26,770,x,,,,99,0,0,16/02/2019,,,
257,ZYNECOIN,0xe65ee7c03bbb3c950cfd4895c24989afa233ef01,1,21/01/2019,23/01/2019,2,614,x,,,,15,2,0,22/07/2019,x,x,x


In [3]:
ico_table = ico_table.astype({'start_date': 'datetime64', 'market_start_date': 'datetime64', 'date_analysis':'datetime64'})
#ico_table.set_index('ico', inplace=True)
ico_table.dtypes

ico                          object
adress                       object
fraud                         int64
start_date           datetime64[ns]
market_start_date    datetime64[ns]
diff_days                     int64
market_days                   int64
size_ok                      object
price_zero                   object
Market Cap                  float64
o_concur                     object
biggest_holder                int64
exchange                      int64
contract                      int64
date_analysis        datetime64[ns]
code                         object
site                         object
social_media                 object
dtype: object

In [4]:
ico_table.shape

(258, 18)

In [5]:
ico_table.ico = [ico.upper().replace(' ', '') for ico in ico_table.ico]

In [6]:
ico_table.head()

Unnamed: 0,ico,adress,fraud,start_date,market_start_date,diff_days,market_days,size_ok,price_zero,Market Cap,o_concur,biggest_holder,exchange,contract,date_analysis,code,site,social_media
0,0X,0xe41d2489571d322189246dafa5ebde1f4699f498,0,2017-11-08,2017-08-15,4,1140,,,45.0,x,2,6,14,2018-11-02,,,
1,AELF,0xbf2179859fc6d5bee9bf9158632dc51678a4100e,0,2017-12-18,2017-12-21,3,1012,,,154.0,x,13,10,0,2018-06-19,,,
2,AMPLEFORTH,0xd46ba6d942050d489dbd938a2c909a5d5039a161,0,2019-06-14,2019-06-27,13,459,,,47.0,x,3,4,20,2019-12-24,,,
3,ANKR,0x8290333cef9e6d528dd5618fb97a76f268f3edd4,0,2019-02-21,2019-05-03,12,573,,,170.0,x,11,7,41,2019-01-09,,,
4,BANCOR,0x1f573d6fb3f13d689ff844b4ce37794d79a7ff1c,0,2017-10-06,2017-12-06,2,1204,,,61.0,x,2,11,7,2017-09-12,,,


In [7]:
ico_table.to_csv('lista_ico_2020-08-17_tab3.csv', index=False, sep=',')

### Testing `ICOParser` individually

In [8]:
ico_table = pd.read_csv('lista_ico_2020-08-17_tab3.csv')
ico_table = ico_table.astype({'start_date': 'datetime64', 'market_start_date': 'datetime64', 'date_analysis':'datetime64'})


In [9]:
ico_table.dtypes

ico                          object
adress                       object
fraud                         int64
start_date           datetime64[ns]
market_start_date    datetime64[ns]
diff_days                     int64
market_days                   int64
size_ok                      object
price_zero                   object
Market Cap                  float64
o_concur                     object
biggest_holder                int64
exchange                      int64
contract                      int64
date_analysis        datetime64[ns]
code                         object
site                         object
social_media                 object
dtype: object

In [10]:
ico_table.set_index('ico', inplace=True)
ico_table.head()

Unnamed: 0_level_0,adress,fraud,start_date,market_start_date,diff_days,market_days,size_ok,price_zero,Market Cap,o_concur,biggest_holder,exchange,contract,date_analysis,code,site,social_media
ico,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1
0X,0xe41d2489571d322189246dafa5ebde1f4699f498,0,2017-11-08,2017-08-15,4,1140,,,45.0,x,2,6,14,2018-11-02,,,
AELF,0xbf2179859fc6d5bee9bf9158632dc51678a4100e,0,2017-12-18,2017-12-21,3,1012,,,154.0,x,13,10,0,2018-06-19,,,
AMPLEFORTH,0xd46ba6d942050d489dbd938a2c909a5d5039a161,0,2019-06-14,2019-06-27,13,459,,,47.0,x,3,4,20,2019-12-24,,,
ANKR,0x8290333cef9e6d528dd5618fb97a76f268f3edd4,0,2019-02-21,2019-05-03,12,573,,,170.0,x,11,7,41,2019-01-09,,,
BANCOR,0x1f573d6fb3f13d689ff844b4ce37794d79a7ff1c,0,2017-10-06,2017-12-06,2,1204,,,61.0,x,2,11,7,2017-09-12,,,


In [11]:
path_to_csvs = '/home/gabriel/Documents/Repos/time_series_study/data_and_models/all_icos/'

#ico_ = 'AMPLEFORTH'
ico_ = 'TERRAMINER'
ico_csv= f'{path_to_csvs}{ico_}.csv'
market_start_date = str(ico_table.at[ ico_, 'market_start_date'].date())
is_fraud = ico_table.at[ ico_, 'fraud']

In [12]:
str(ico_table.at[ ico_, 'market_start_date'].date())

'2018-01-25'

In [13]:
ico_parser = ICOParser(ico_csv,
                 date_column='BLOCK_TIMESTAMP',
                 value_column='VALUE',
                 ico_start_date=market_start_date, 
                 fraud_flag=is_fraud)

# TESTANDO PIPELINE

In [14]:
%%time
ico_parser.pipeline()

Running method: define_ico_start_date ... 
Running method: get_newbiers_dataframe ... 
Running method: get_balance ... 
2017-12-23
92
Running method: get_cumsum_balance ... 
Running method: get_cumsum_daily_percentage ... 
Running method: get_daily_number_of_new_holder ... 
Running method: get_array_daily_transactions ... 
Running method: get_array_perc_new_holders ... 
Running method: get_biggest_holder_dict ... 
Running method: get_biggest_holder_array ... 
Running method: get_newbiers_ratio_dict ... 
Running method: get_newbiers_array ... 
Running method: get_gas_ratio_array ... 
CPU times: user 9.27 s, sys: 200 ms, total: 9.47 s
Wall time: 4min 25s


In [20]:
ico_parser.df_newbiers_resample['GAS_RATIO'] = (
                ico_parser.df_newbiers_resample['RECEIPT_GAS_USED']
                / ico_parser.df_newbiers_resample['GAS']
            )
ico_parser.array_gas_ratio = (
                ico_parser.df_newbiers_resample.GAS_RATIO.to_list()
            )[-ico_parser.len_time_series :]

# Tamanho dos Arrays

In [18]:
ico_parser.get_newbiers_ratio_dict()
ico_parser.get_newbiers_array()

In [23]:
[list(ico_parser.array_daily_transactions),
ico_parser.array_perc_new_holders,
ico_parser.array_biggest_holder,
ico_parser.array_newbiers_ratio,
ico_parser.array_gas_ratio]

[[157,
  57,
  146,
  84,
  53,
  60,
  96,
  119,
  138,
  36,
  14,
  31,
  18,
  20,
  22,
  38,
  37,
  58,
  36,
  17,
  29,
  38,
  24,
  17,
  19,
  18,
  15,
  36,
  9,
  11,
  33,
  12,
  18,
  21,
  38,
  33,
  33,
  29,
  17,
  21,
  36,
  53,
  76,
  137,
  146,
  82,
  63,
  44,
  22,
  56,
  27,
  21,
  11,
  9,
  18,
  35,
  10,
  2,
  10,
  6],
 [0.30205479452054795,
  0.3363013698630137,
  0.35753424657534244,
  0.3815068493150685,
  0.4212328767123288,
  0.44246575342465755,
  0.4691780821917808,
  0.5082191780821917,
  0.5335616438356164,
  0.547945205479452,
  0.560958904109589,
  0.5671232876712329,
  0.5815068493150685,
  0.5876712328767123,
  0.5965753424657534,
  0.6075342465753425,
  0.6205479452054794,
  0.6294520547945206,
  0.6376712328767123,
  0.6554794520547945,
  0.660958904109589,
  0.6712328767123288,
  0.6842465753424658,
  0.6917808219178082,
  0.6986301369863014,
  0.7041095890410959,
  0.7109589041095891,
  0.7164383561643836,
  0.7342465753424657,

In [20]:
len(ico_parser.array_daily_transactions)
len(ico_parser.array_perc_new_holders)
len(ico_parser.array_biggest_holder)
len(ico_parser.array_newbiers_ratio)
len(ico_parser.array_gas_ratio)

60

60

60

60

60

In [15]:
ico_parser.ico_end_date
ico_parser.df_newbiers
ico_parser.df_newbiers_resample
ico_parser.dict_balance
ico_parser.dict_cumsum_balance
ico_parser.dict_percentage_holders
ico_parser.dict_daily_new_holders
ico_parser.dict_perc_biggest_holder
ico_parser.dict_newbiers_ratio
ico_parser.array_daily_transactions
ico_parser.array_perc_new_holders
ico_parser.array_biggest_holder
ico_parser.array_newbiers
ico_parser.array_gas_ratio

datetime.date(2019, 2, 14)

In [16]:
ico_parser.get_newbiers_dataframe()

In [45]:
ico_parser.ico_end_date
ico_parser.df_newbiers
ico_parser.df_newbiers_resample
ico_parser.dict_balance
ico_parser.dict_cumsum_balance
ico_parser.dict_percentage_holders
ico_parser.dict_daily_new_holders
ico_parser.dict_perc_biggest_holder
ico_parser.dict_newbiers_ratio
ico_parser.array_daily_transactions
ico_parser.array_perc_new_holders
ico_parser.array_biggest_holder
ico_parser.array_newbiers
ico_parser.array_gas_ratio

datetime.date(2019, 8, 26)

Unnamed: 0,BLOCK_TIMESTAMP,FROM_ADDRESS,TO_ADDRESS,VALUE,TRANSACTION_HASH,NONCE,FROM_ADDRESS_BLOCKCHAIN,TO_ADDRESS_BLOCKCHAIN,GAS,RECEIPT_GAS_USED,transactions
5491,2019-06-27,0x383fee30ed41c28125385cb52c82e1208781d04c,0x876eabf441b2ee5b5b0554fd502a8e0600950cfa,1020300000000,0xdcf687e2fb8f49c5652c1702a12e786fd42b07179af5...,0,0x383fee30ed41c28125385cb52c82e1208781d04c,0xd46ba6d942050d489dbd938a2c909a5d5039a161,150000,23963,1
5662,2019-06-27,0xe904904c7462568dfb03660c8536df70a9a26361,0x876eabf441b2ee5b5b0554fd502a8e0600950cfa,1020342500000,0x550067fba80ed8d5114f576bf8756af6436a8550cf16...,0,0xe904904c7462568dfb03660c8536df70a9a26361,0xd46ba6d942050d489dbd938a2c909a5d5039a161,150000,24027,1
5500,2019-06-27,0x4f1a2f0a7ce75adc80064fd614e181e18f9a22d1,0x876eabf441b2ee5b5b0554fd502a8e0600950cfa,1052272500000,0xdef1d633d935c5dd9f9bf09f3a75d6286bc604eafc7d...,0,0x4f1a2f0a7ce75adc80064fd614e181e18f9a22d1,0xd46ba6d942050d489dbd938a2c909a5d5039a161,150000,23963,1
5152,2019-06-27,0x4d58870ea12fbad9d5db0f3ce73b6d94f7e82ee8,0x876eabf441b2ee5b5b0554fd502a8e0600950cfa,1692434210526,0xd60953bbca596be0574e8032fc72b0ea27a58b842bd9...,0,0x4d58870ea12fbad9d5db0f3ce73b6d94f7e82ee8,0xd46ba6d942050d489dbd938a2c909a5d5039a161,150000,24091,1
5129,2019-06-27,0x6323110d4f78a9225d1f7fc2fc39d18af38b240b,0x876eabf441b2ee5b5b0554fd502a8e0600950cfa,2135005795303,0xb08d06a1db7e485480be394e8e8a4b83cf3ac6d38d62...,0,0x6323110d4f78a9225d1f7fc2fc39d18af38b240b,0xd46ba6d942050d489dbd938a2c909a5d5039a161,150000,24091,1
...,...,...,...,...,...,...,...,...,...,...,...
5861,2019-12-13,0x4e6751c079d77ff95e36cadedebdfd0a695f7e25,0x1f48edbe6cc895a676dd3a2f0d9fcade3eca4385,68414819877,0x5c73ede146797156e4ff9d991424a12f485213372dc2...,6,0x4e6751c079d77ff95e36cadedebdfd0a695f7e25,0xd46ba6d942050d489dbd938a2c909a5d5039a161,84964,56183,1
1277,2019-12-15,0xb8822e11f80988792ff36f1764a1927c1e174fd3,0x689c56aef474df92d44a1b70850f808488f9769c,64832486972,0x547a75c30487452ee53a1b966e276ad09345be4a3412...,0,0xb8822e11f80988792ff36f1764a1927c1e174fd3,0xd46ba6d942050d489dbd938a2c909a5d5039a161,134964,26183,1
5035,2019-12-16,0x2d6ef7046b9a8f8ccba5801b9da85b56aee140ea,0x69e0e2b3d523d3b247d798a49c3fa022a46dd6bd,443824555470,0x8b188795643b102d6e1330a935e0da0f6fab7323f38b...,7,0x2d6ef7046b9a8f8ccba5801b9da85b56aee140ea,0xd46ba6d942050d489dbd938a2c909a5d5039a161,41405,41183,1
9190,2019-12-18,0x8d57ee1fedc7bdcfe57cfdc30da1dd5238969edf,0x689c56aef474df92d44a1b70850f808488f9769c,399012440750,0xf242ce5366b94749b92cbf833ba4d8be840718579980...,34,0x8d57ee1fedc7bdcfe57cfdc30da1dd5238969edf,0xd46ba6d942050d489dbd938a2c909a5d5039a161,112107,26183,1


Unnamed: 0_level_0,VALUE,NONCE,GAS,RECEIPT_GAS_USED,transactions
BLOCK_TIMESTAMP,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2019-06-27,530180190908163,481,23479460,5824206,186
2019-06-28,97444592820312,1276,10111956,2914630,82
2019-06-29,21719562290018,75,3171529,679194,24
2019-06-30,7865610228579,8,1500000,240142,10
2019-07-01,7545766538820,9,1704513,303388,12
...,...,...,...,...,...
2019-12-16,443824555470,7,41405,41183,1
2019-12-17,0,0,0,0,0
2019-12-18,399012440750,34,112107,26183,1
2019-12-19,0,0,0,0,0


In [46]:
ico_parser.get_newbiers_dataframe()
ico_parser.get_balance()

2019-06-14
72


In [48]:
ico_parser.ico_start_date
ico_parser.ico_end_date

datetime.date(2019, 6, 27)

datetime.date(2019, 8, 26)

In [50]:
len(ico_parser.dict_balance)


72

In [51]:
ico_parser.get_cumsum_balance()
ico_parser.get_cumsum_daily_percentage()
ico_parser.get_daily_number_of_new_holder()

In [54]:
len(ico_parser.dict_percentage_holders)

72

In [55]:
ico_parser.get_array_daily_transactions()
ico_parser.get_array_perc_new_holders()

In [56]:
ico_parser.array_daily_transactions
ico_parser.array_perc_new_holders

array([ 735,  322,   85,   59,   53,   47,   46,   26,   46,   35,   33,
         60,   32,   46,   47,   37,   33,   35,   53,   44,   38,   23,
       1728,   83,   32,   44,   36,   34,   22,   24,   10,   25,   39,
         16,   60,   13,   23,   11,   22,   36,   33,   41,   22,   31,
          8,   10,   13,    6,   17,   10, 1096,  679,   55,   33,    7,
         18,   49,   66,   33,   47])

[0.003756830601092896,
 0.164275956284153,
 0.18442622950819673,
 0.19262295081967212,
 0.19808743169398907,
 0.20252732240437157,
 0.2062841530054645,
 0.20969945355191258,
 0.2103825136612022,
 0.2148224043715847,
 0.21755464480874318,
 0.22028688524590165,
 0.22404371584699453,
 0.22575136612021857,
 0.22780054644808742,
 0.23224043715846995,
 0.23394808743169399,
 0.23599726775956284,
 0.23736338797814208,
 0.2383879781420765,
 0.23975409836065573,
 0.24146174863387979,
 0.24453551912568305,
 0.5966530054644809,
 0.6007513661202186,
 0.6010928961748634,
 0.6028005464480874,
 0.6038251366120219,
 0.6048497267759563,
 0.6068989071038251,
 0.6099726775956285,
 0.6103142076502732,
 0.6113387978142076,
 0.6133879781420765,
 0.6144125683060109,
 0.6185109289617486,
 0.6188524590163934,
 0.6212431693989071,
 0.6226092896174863,
 0.6232923497267759,
 0.6256830601092896,
 0.6273907103825137,
 0.6290983606557377,
 0.6308060109289617,
 0.6362704918032787,
 0.6369535519125683,
 0.6379781420765