# Propdesk Data Pipeline - Seasonality

In [2]:
%load_ext autoreload
%autoreload 2
from propdesk_tardis.tardis_transfero import tardis_transfero as tardis
from propdesk_estimators.exchange_storage import ExchangeStorage
from propdesk_azure_services.azure_databricks import single_run_job
from propdesk_azure_services.azure_databricks import list_databricks_src_files

### Job Type and Dataset Type (don't need to change this)

In [3]:
job_type = 'io_intensive'
dataset_type = 'seasonality'
script_to_run = 'spark_seasonality.py'

### Quick and Dirty - Get Data

In [19]:
from propdesk_estimators.exchange_storage import get_dataframe_by_params
exchange_str = 'binance'

query_dict = {
    'dataset_type': 'seasonality',
    'pair': 'btcusdt', 
    'date_from': '2022-01-01', 
    'date_to': '2022-02-01', 
    'resampling_rule': '60S',
}
query_dict

{'dataset_type': 'seasonality',
 'pair': 'btcusdt',
 'date_from': '2022-01-01',
 'date_to': '2022-02-01',
 'resampling_rule': '60S'}

In [20]:
# pass the flag keep_local to keep raw files instead of downloading them again if needed
# tip: use params_dict define above after running the job to get that data
seasonality_df = get_dataframe_by_params(exchange_str=exchange_str, params_dict=query_dict, keep_local=True)
seasonality_df

files saved to: /tmp/tmpgwz9ovbv


Unnamed: 0,datetime,seasonality_estimation
0,2022-01-01 00:00:00,32.935380
1,2022-01-01 00:01:00,28.756630
2,2022-01-01 00:02:00,42.499830
3,2022-01-01 00:03:00,40.220665
4,2022-01-01 00:04:00,24.296400
...,...,...
44635,2022-01-31 23:55:00,10.551465
44636,2022-01-31 23:56:00,12.749150
44637,2022-01-31 23:57:00,12.870900
44638,2022-01-31 23:58:00,9.528180


### Checking for missing data to compute

In [21]:
exchange_str = 'binance'
exchange_stg = ExchangeStorage(exchange_str) # -- ExchangeStorage('binance')

Check for datasets that were already computed

In [22]:
exchange_stg.amend_datasets_by_params(query_dict)


Missing datasets: []


False

### Quick and dirty - Process data: paste params_dict here and go

In [23]:
params_dict = {'dataset_type': 'seasonality', 
               'pair': 'btcusdt', 
               'start_date': '2022-02-01', 
               'end_date': '2022-02-15', 
               'exchange': 'binance'}

job_name = f'{params_dict["pair"]}_{params_dict["exchange"]}_{params_dict["dataset_type"]}'

# single_run_job(job_name, script_to_run, params_dict, job_type=job_type)

{'run_id': 542339,
 'job_id': 15088,
 'run_page_url': 'https://adb-3928083337264192.12.azuredatabricks.net/?o=3928083337264192#job/15088/run/542339'}

# Success :)

## That's it. **check Databricks UI to make sure everything is ok**

### Have fun, move fast, break things, buy btc (or dcr or algorand) ⚡.
#### -- Propdesk Transfero