# Validating ICOParser class

## Testing the ICOParser class

In [1]:
import pandas as pd
from ico_parser import ICOParser
# Imprimir todos valores da célula, não somente o último
from IPython.core.interactiveshell import InteractiveShell

InteractiveShell.ast_node_interactivity = "all"

### Load table with information about ICOs

In [2]:
ico_table = pd.read_csv('lista_ico_2020-08-17_tab3.tsv', sep='\t')
ico_table.tail()

Unnamed: 0,ico,adress,fraud,start_date,market_start_date,diff_days,market_days,size_ok,price_zero,Market Cap,o_concur,biggest_holder,exchange,contract,date_analysis,code,site,social_media
253,TradeX Token,0x5432c580e34f590f4dd901b825ddeb92e905e826,1,24/01/2020,26/03/2020,62,186,,,,,98,0,0,22/09/2020,,,
254,tronclassic,0xad5fe5b0b8ec8ff4565204990e4405b2da117d8e,1,08/05/2018,08/10/2018,153,721,x,x,,,30,13,1,06/04/2019,x,x,
255,WALTONCHAIN,0xb7cb1c96db6b22b0d3d9536e0108d062bd488f74,1,21/07/2017,23/07/2017,2,1163,x,,,,61,4,0,19/01/2018,x,x,x
256,ZPAY,0xeffea57067e02999fdcd0bb45c0f1071a29472d9,1,25/07/2018,20/08/2018,26,770,x,,,,99,0,0,16/02/2019,,,
257,ZYNECOIN,0xe65ee7c03bbb3c950cfd4895c24989afa233ef01,1,21/01/2019,23/01/2019,2,614,x,,,,15,2,0,22/07/2019,x,x,x


In [3]:
ico_table = ico_table.astype({'start_date': 'datetime64', 'market_start_date': 'datetime64', 'date_analysis':'datetime64'})
#ico_table.set_index('ico', inplace=True)
ico_table.dtypes

ico                          object
adress                       object
fraud                         int64
start_date           datetime64[ns]
market_start_date    datetime64[ns]
diff_days                     int64
market_days                   int64
size_ok                      object
price_zero                   object
Market Cap                  float64
o_concur                     object
biggest_holder                int64
exchange                      int64
contract                      int64
date_analysis        datetime64[ns]
code                         object
site                         object
social_media                 object
dtype: object

In [4]:
ico_table.shape

(258, 18)

In [5]:
ico_table.ico = [ico.upper().replace(' ', '') for ico in ico_table.ico]

In [6]:
ico_table.head()

Unnamed: 0,ico,adress,fraud,start_date,market_start_date,diff_days,market_days,size_ok,price_zero,Market Cap,o_concur,biggest_holder,exchange,contract,date_analysis,code,site,social_media
0,0X,0xe41d2489571d322189246dafa5ebde1f4699f498,0,2017-11-08,2017-08-15,4,1140,,,45.0,x,2,6,14,2018-11-02,,,
1,AELF,0xbf2179859fc6d5bee9bf9158632dc51678a4100e,0,2017-12-18,2017-12-21,3,1012,,,154.0,x,13,10,0,2018-06-19,,,
2,AMPLEFORTH,0xd46ba6d942050d489dbd938a2c909a5d5039a161,0,2019-06-14,2019-06-27,13,459,,,47.0,x,3,4,20,2019-12-24,,,
3,ANKR,0x8290333cef9e6d528dd5618fb97a76f268f3edd4,0,2019-02-21,2019-05-03,12,573,,,170.0,x,11,7,41,2019-01-09,,,
4,BANCOR,0x1f573d6fb3f13d689ff844b4ce37794d79a7ff1c,0,2017-10-06,2017-12-06,2,1204,,,61.0,x,2,11,7,2017-09-12,,,


In [7]:
ico_table.to_csv('lista_ico_2020-08-17_tab3.csv', index=False, sep=',')

### Testing `ICOParser` individually

In [8]:
ico_table = pd.read_csv('lista_ico_2020-08-17_tab3.csv')
ico_table = ico_table.astype({'start_date': 'datetime64', 'market_start_date': 'datetime64', 'date_analysis':'datetime64'})


In [9]:
ico_table.dtypes

ico                          object
adress                       object
fraud                         int64
start_date           datetime64[ns]
market_start_date    datetime64[ns]
diff_days                     int64
market_days                   int64
size_ok                      object
price_zero                   object
Market Cap                  float64
o_concur                     object
biggest_holder                int64
exchange                      int64
contract                      int64
date_analysis        datetime64[ns]
code                         object
site                         object
social_media                 object
dtype: object

In [10]:
ico_table.set_index('ico', inplace=True)
ico_table.head()

Unnamed: 0_level_0,adress,fraud,start_date,market_start_date,diff_days,market_days,size_ok,price_zero,Market Cap,o_concur,biggest_holder,exchange,contract,date_analysis,code,site,social_media
ico,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1
0X,0xe41d2489571d322189246dafa5ebde1f4699f498,0,2017-11-08,2017-08-15,4,1140,,,45.0,x,2,6,14,2018-11-02,,,
AELF,0xbf2179859fc6d5bee9bf9158632dc51678a4100e,0,2017-12-18,2017-12-21,3,1012,,,154.0,x,13,10,0,2018-06-19,,,
AMPLEFORTH,0xd46ba6d942050d489dbd938a2c909a5d5039a161,0,2019-06-14,2019-06-27,13,459,,,47.0,x,3,4,20,2019-12-24,,,
ANKR,0x8290333cef9e6d528dd5618fb97a76f268f3edd4,0,2019-02-21,2019-05-03,12,573,,,170.0,x,11,7,41,2019-01-09,,,
BANCOR,0x1f573d6fb3f13d689ff844b4ce37794d79a7ff1c,0,2017-10-06,2017-12-06,2,1204,,,61.0,x,2,11,7,2017-09-12,,,


In [11]:
path_to_csvs = '/home/gabriel/Documents/Repos/time_series_study/data_and_models/all_icos/'

#ico_ = 'AMPLEFORTH'
ico_ = 'TERRAMINER'
ico_csv= f'{path_to_csvs}{ico_}.csv'
market_start_date = str(ico_table.at[ ico_, 'market_start_date'].date())
is_fraud = ico_table.at[ ico_, 'fraud']

In [12]:
str(ico_table.at[ ico_, 'market_start_date'].date())

'2018-01-25'

In [13]:
ico_parser = ICOParser(ico_csv,
                 date_column='BLOCK_TIMESTAMP',
                 value_column='VALUE',
                 ico_start_date=market_start_date, 
                 fraud_flag=is_fraud)

# TESTANDO PIPELINE

In [18]:
ico_parser.define_ico_start_date()
"""
print('Running method: get_newbiers_dataframe ... ')
ico_parser.get_newbiers_dataframe()
print('Running method: get_balance ... ')
ico_parser.get_balance()
print('Running method: get_cumsum_balance ... ')
ico_parser.get_cumsum_balance()
print('Running method: get_cumsum_daily_percentage ... ')
ico_parser.get_cumsum_daily_percentage()
print('Running method: get_daily_number_of_new_holder ... ')
ico_parser.get_daily_number_of_new_holder()
"""
print('Running method: get_array_daily_transactions ... ')
ico_parser.get_array_daily_transactions()
ico_parser.array_daily_transactions

In [14]:
%%time
ico_parser.pipeline()

Running method: define_ico_start_date ... 
Running method: get_newbiers_dataframe ... 
Running method: get_balance ... 
2017-12-23
92
Running method: get_cumsum_balance ... 
Running method: get_cumsum_daily_percentage ... 
Running method: get_daily_number_of_new_holder ... 
Running method: get_array_daily_transactions ... 
Running method: get_array_perc_new_holders ... 
Running method: get_biggest_holder_dict ... 
Running method: get_biggest_holder_array ... 
Running method: get_newbiers_ratio_dict ... 
Running method: get_newbiers_array ... 
Running method: get_gas_ratio_array ... 
CPU times: user 8.94 s, sys: 170 ms, total: 9.11 s
Wall time: 4min 28s


In [24]:
df_resample_func = ico_parser.df_resample_day.reset_index()
df_resample_func['BLOCK_TIMESTAMP'] = df_resample_func[
            'BLOCK_TIMESTAMP'
        ].dt.date

In [15]:
ico_parser.df_newbiers_resample['GAS_RATIO'] = (
                ico_parser.df_newbiers_resample['RECEIPT_GAS_USED']
                / ico_parser.df_newbiers_resample['GAS']
            )
ico_parser.array_gas_ratio = (
                ico_parser.df_newbiers_resample.GAS_RATIO.to_list()
            )[-ico_parser.len_time_series :]

# Tamanho dos Arrays

In [16]:
ico_parser.get_newbiers_ratio_dict()
ico_parser.get_newbiers_array()

In [17]:
[list(ico_parser.array_daily_transactions),
ico_parser.array_perc_new_holders,
ico_parser.array_biggest_holder,
ico_parser.array_newbiers_ratio,
ico_parser.array_gas_ratio]

[[157,
  57,
  146,
  84,
  53,
  60,
  96,
  119,
  138,
  36,
  14,
  31,
  18,
  20,
  22,
  38,
  37,
  58,
  36,
  17,
  29,
  38,
  24,
  17,
  19,
  18,
  15,
  36,
  9,
  11,
  33,
  12,
  18,
  21,
  38,
  33,
  33,
  29,
  17,
  21,
  36,
  53,
  76,
  137,
  146,
  82,
  63,
  44,
  22,
  56,
  27,
  21,
  11,
  9,
  18,
  35,
  10,
  2,
  10,
  6],
 [0.30205479452054795,
  0.3363013698630137,
  0.35753424657534244,
  0.3815068493150685,
  0.4212328767123288,
  0.44246575342465755,
  0.4691780821917808,
  0.5082191780821917,
  0.5335616438356164,
  0.547945205479452,
  0.560958904109589,
  0.5671232876712329,
  0.5815068493150685,
  0.5876712328767123,
  0.5965753424657534,
  0.6075342465753425,
  0.6205479452054794,
  0.6294520547945206,
  0.6376712328767123,
  0.6554794520547945,
  0.660958904109589,
  0.6712328767123288,
  0.6842465753424658,
  0.6917808219178082,
  0.6986301369863014,
  0.7041095890410959,
  0.7109589041095891,
  0.7164383561643836,
  0.7342465753424657,