In [1]:
### Requirements ### 
import pandas as pd
import numpy as np 
import time
import sys

import matplotlib.pyplot as plt
import seaborn as sns

from datetime import datetime, timedelta 
from pycoingecko import CoinGeckoAPI
cg = CoinGeckoAPI()

### Function defined in other file ###
import ipynb.fs  
from .defs.preOperations import tsMaker

# Collect and Store
* By this time, we have locally accumulated 3 years of daily data, which required 14 hours of API request-handling process
* This script was used during-development, with the simple objective of updating the lastest version of all time-series as days went by | So we dont need the 14-hours-process again. 



## 2nd | Network lapsus update 

### Defining network lapsus updating function
* **Approach 1** | Test for 3 ids
* **Objectives** | create Xdays-df to be concatenated to coins final_CSV version (**'treated_CSVs' folder** | created in the 'Database building workflow' last script) 

In [2]:
def tsLapsus(ids, days):
    
    '''
    Takes an array of ids, request up to 100 days/min worth of social media, github and market data from coingeckoAPI().
    Read outdated CSVs, concat collected data into new, updated CSVs and export it to a new folder. 
    
    : tsLapsus('bitcoin', 10)
    > df([features], index = days range)
    '''
    
    ############ Create updated df #######################
    
    for id in ids.id:
        
        # Call the tsMaker function from preOperations.ipynb
        ts_df = tsMaker(id, days)
        
        # Interpolate
        ts_df.interpolate(inplace=True)

        # backfill first entry if there's any NaN
        ts_df.fillna(method='bfill', inplace=True)
        
        # Reset index to 'dates' columns
        ts_df.reset_index(inplace = True)
        ts_df.rename(columns={'index': 'dates'}, inplace=True)
        
    ############### Concat Part #######################
        
        # Read outdated csvs 
        from_path = f'/Users/gabrielgomes/ProjetoAwariCripto/Updating workflow/treated_CSVs/{id}_final.csv'
        out_dated = pd.read_csv(from_path).drop(columns=['Unnamed: 0'])
        
        # Updated data
        up_dated = pd.concat([ts_df, out_dated])
        
        # Updated CSVs download
        to_path = f'/Users/gabrielgomes/ProjetoAwariCripto/Updating workflow/updated_CSVs/{id}_updated.csv'
        up_dated.to_csv(to_path)
        
        # Communicate
        print(f'{id}_updated is in the updated_CSVs file.')
        
    return up_dated
        
    

### Simulations

In [3]:
# Create folder for updated CSVs
import os  
os.makedirs('updated_CSVs', exist_ok = True)

In [4]:
ids=pd.read_csv('main50.csv', index_col = 'Unnamed: 0')

In [11]:
rem_ids = ids[9:]

### For real

In [12]:
tsLapsus(rem_ids, 33)

All done for stellar.
stellar_updated is in the updated_CSVs file.
All done for chainlink.
chainlink_updated is in the updated_CSVs file.
429 Client Error: Too Many Requests for url: https://api.coingecko.com/api/v3/coins/steem/history?date=25-03-2022 
Program will wait for a minute and try again. 

All done for steem.
steem_updated is in the updated_CSVs file.
All done for steem-dollars.
steem-dollars_updated is in the updated_CSVs file.
429 Client Error: Too Many Requests for url: https://api.coingecko.com/api/v3/coins/dash/history?date=10-03-2022 
Program will wait for a minute and try again. 

All done for dash.
dash_updated is in the updated_CSVs file.
All done for verge.
verge_updated is in the updated_CSVs file.
All done for the-graph.
the-graph_updated is in the updated_CSVs file.
429 Client Error: Too Many Requests for url: https://api.coingecko.com/api/v3/coins/hiveterminal/history?date=27-03-2022 
Program will wait for a minute and try again. 

All done for hiveterminal.
hiv

Unnamed: 0,dates,twitter_followers,reddit_subs,reddit_avg_posts_48h,reddit_avg_comments_48h,forks,stars,github_subs,total_issues,closed_issues,...,pull_request_contributors,usd_cp,eur_cp,brl_cp,usd_mc,eur_mc,brl_mc,usd_tv,eur_tv,brl_tv
0,03-04-2022,159524.000000,10658.0,0.60,5.80,1.0,0.0,2.0,0.0,0.0,...,3.0,11.45,10.37,53.36,3.452665e+09,3.125998e+09,1.609223e+10,212641323.0,192522689.0,9.910814e+08
1,02-04-2022,159204.000000,10639.0,1.44,10.89,1.0,0.0,2.0,0.0,0.0,...,3.0,12.42,11.24,57.85,3.737113e+09,3.382461e+09,1.741079e+10,343190689.0,310621893.0,1.598887e+09
2,01-04-2022,158800.666667,10628.0,1.22,14.11,1.0,0.0,2.0,0.0,0.0,...,3.0,11.44,10.33,54.23,3.426232e+09,3.094360e+09,1.623794e+10,479296986.0,432836333.0,2.271532e+09
3,31-03-2022,158397.333333,10605.0,1.56,15.11,1.0,0.0,2.0,0.0,0.0,...,3.0,11.96,10.72,57.06,3.610861e+09,3.235996e+09,1.722850e+10,351805759.0,315237310.0,1.678536e+09
4,30-03-2022,157994.000000,10563.0,1.40,10.60,1.0,0.0,2.0,0.0,0.0,...,3.0,11.90,10.73,56.61,3.567777e+09,3.216601e+09,1.697441e+10,526412547.0,474617237.0,2.504513e+09
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1090,28-02-2019,4766.000000,1406.0,0.00,0.00,7.0,25.0,13.0,0.0,0.0,...,3.0,0.03,0.02,0.10,1.184196e+06,1.054728e+06,4.439433e+06,847137.0,754520.0,3.175834e+06
1091,27-02-2019,4766.000000,1406.0,0.00,0.00,7.0,25.0,13.0,0.0,0.0,...,3.0,0.03,0.02,0.10,1.184196e+06,1.054728e+06,4.439433e+06,847137.0,754520.0,3.175834e+06
1092,26-02-2019,4766.000000,1406.0,0.00,0.00,7.0,25.0,13.0,0.0,0.0,...,3.0,0.03,0.02,0.10,1.184196e+06,1.054728e+06,4.439433e+06,847137.0,754520.0,3.175834e+06
1093,25-02-2019,4766.000000,1406.0,0.00,0.00,7.0,25.0,13.0,0.0,0.0,...,3.0,0.03,0.02,0.10,1.184196e+06,1.054728e+06,4.439433e+06,847137.0,754520.0,3.175834e+06


## What we've done
* **Our previous workflow** retrieved 1095 days of daily metrics, from February-2019 to February-2022
* As the deployment of this database will be finallized latter, tsLapsus(ids, days) localizes the work only to needed dates. 