In [1]:
### Requirements ### 
import pandas as pd
import numpy as np 
import time
import sys

import matplotlib.pyplot as plt
import seaborn as sns

from datetime import datetime, timedelta 
from pycoingecko import CoinGeckoAPI

**Read watchlist**

In [3]:
main_ids = pd.read_csv('main50.csv', index_col = 'Unnamed: 0')

* We will create a **database** to store **time series** for **20 different metrics** for eachone off all 50 projects.
* We will collect **3 years worth of daily data**, save it as a **CSV file**, and **transfer to Cloud Storage** as an initial data feed. 

---

# Collect and Store
## 1st step | Network historical data range, 3 years

### Defining network range function 
* **Approach 1** | Test for 1 id 
* **Objectives** | create time seried dataframe for community, development and market daily metrics. 

In [2]:
def tsMaker(id, days):
    '''
    Takes an id, request up to 100 days/min worthy of social media and github data from coingeckoAPI().
    
    : chartMaker('bitcoin', 10)
    > df([columns], index = days range)
    '''
    
    # set social media appendable lists
    twitter_foll_list        = []
    reddit_subs_list         = [] 
    reddit_avg_posts_list    = []
    reddit_avg_comments_list = []
    
    # set developers appendable lists
    forks_list          = []
    stars_list          = []
    subscribers_list    = []
    total_issues_list   = []
    closed_issues_list  = []
    pull_rqst_merg_list = []
    pull_rqst_cont_list = []
    
    # set current_price appendable lists   
    usd_cp_list = []
    eur_cp_list = []
    brl_cp_list = []

    # set market_cap appendable lists   
    usd_mc_list = []
    eur_mc_list = []
    brl_mc_list = [] 
    
    # set total_volume appendable lists   
    usd_tv_list = []
    eur_tv_list = []
    brl_tv_list = []
    
    # date range (days serie, ascending)
    today = datetime.now() - timedelta(days = 1) # Feb, 22, 2022. 
    dates = pd.Series([(today - timedelta(days = x)).strftime('%d-%m-%Y') for x in range(days)])
    
    # Set variables
    cg = CoinGeckoAPI()
    x  = 1
    
    for date in dates:
        while True:
            #try request
            try:
                request_dict = cg.get_coin_history_by_id(id = id, date = date)
                
            # Handle error
            except Exception as error:
                
                # Communicate error and handle
                print(f'{error} \nProgram will wait for a minute and try again. \n')
                time.sleep(70)
                continue
                
            # if no error, break while loop, continue 
            break
        
        ########################## get community data ###########################################
        # Try to request community data, if it doesn't exist, retriavable dates ended
        
        try:
            rqst_community_data = request_dict['community_data']

        # Handle error
        except Exception as error:
            if x is 1:
                print(f'{error} | {id} has no community data for {date}. Filling missing values with None.')
                rqst_community_data = request_dict_copy = {'community_data':{
                                    'twitter_followers': None,
                                    'reddit_average_posts_48h': 0.0,
                                    'reddit_average_comments_48h': 0.0,
                                    'reddit_subscribers': 0.0,
                                    'reddit_accounts_active_48h': '0.0'}} 
                
            else:
                print(f'{error} | {id} has no community data before {date}. Breaking loop.')
                break
                

        # If looping hasn't been broken, proceed 
        twitter_foll            = rqst_community_data.get('twitter_followers')
        reddit_subs             = rqst_community_data.get('reddit_subscribers')
        reddit_avg_posts_48h    = rqst_community_data.get('reddit_average_posts_48h')
        reddit_avg_comments_48h = rqst_community_data.get('reddit_average_comments_48h')
    
        # append to community lists
        twitter_foll_list.append(twitter_foll)
        reddit_subs_list.append(reddit_subs)
        reddit_avg_posts_list.append(reddit_avg_posts_48h)
        reddit_avg_comments_list.append(reddit_avg_comments_48h)
            
        ########################## get developing data ##########################################
        rqst_developer_data       = request_dict['developer_data']
        
        forks                     = rqst_developer_data.get('forks')
        stars                     = rqst_developer_data.get('stars')
        subscribers               = rqst_developer_data.get('subscribers')
        total_issues              = rqst_developer_data.get('total_issues')
        closed_issues             = rqst_developer_data.get('closed_issues')
        pull_rqst_merged          = rqst_developer_data.get('pull_requests_merged')
        pull_request_contributors = rqst_developer_data.get('pull_request_contributors')
        
        # append to developer lists
        forks_list.append(forks)
        stars_list.append(stars)
        subscribers_list.append(subscribers)
        total_issues_list.append(total_issues)
        closed_issues_list.append(closed_issues)
        pull_rqst_merg_list.append(pull_rqst_merged)
        pull_rqst_cont_list.append(pull_request_contributors)
        
        
        ######################### get current price data ##########################################
        rqst_price_data   = request_dict['market_data']['current_price']
        
        usd_current_price = rqst_price_data.get('usd')
        eur_current_price = rqst_price_data.get('eur')
        brl_current_price = rqst_price_data.get('brl')
        
        # append to current_price_lists lists
        usd_cp_list.append(usd_current_price)
        eur_cp_list.append(eur_current_price)
        brl_cp_list.append(brl_current_price)
                
        
        ########################## get market cap data ###########################################
        rqst_mktcap_data = request_dict['market_data']['market_cap']

        usd_market_cap   = rqst_mktcap_data.get('usd')
        eur_market_cap   = rqst_mktcap_data.get('eur')
        brl_market_cap   = rqst_mktcap_data.get('brl')
        
        # append to current_price_lists lists
        usd_mc_list.append(usd_market_cap)
        eur_mc_list.append(eur_market_cap)
        brl_mc_list.append(brl_market_cap)
        
        ######################### get total volume data ##########################################
        rqst_volume_data = request_dict['market_data']['total_volume']

        usd_total_volume = rqst_volume_data.get('usd')
        eur_total_volume = rqst_volume_data.get('eur')
        brl_total_volume = rqst_volume_data.get('brl')
        
        # append to current_price_lists lists
        usd_tv_list.append(usd_total_volume)
        eur_tv_list.append(eur_total_volume)
        brl_tv_list.append(brl_total_volume)
        
        # Set list of lists for error handling 
        lst_lst = [twitter_foll_list, reddit_subs_list ,reddit_avg_posts_list ,reddit_avg_comments_list,
               forks_list, stars_list, subscribers_list, total_issues_list, closed_issues_list,
               pull_rqst_merg_list, pull_rqst_cont_list, usd_cp_list, eur_cp_list,
               brl_cp_list, usd_mc_list, eur_mc_list, brl_mc_list,
               usd_tv_list, eur_tv_list, brl_tv_list]
        
        # Add to x
        x+=1
        
        # Communicate
        if len(twitter_foll_list)%100==0:
            print(f'{len(twitter_foll_list)} days processed')
        
    
    #create empty list for unavailable data
    empty_lst    = [np.nan for i in range(0,(days-x))]
        
    #condition to append
    if len(empty_lst)>0:
        
        # Append empty_lst to filled metrics lists
        lst_lst  = [(lst+empty_lst) for lst in lst_lst]
    
    # Set dataframe using the lists
    extracted_df = pd.DataFrame({
                               # Community data
                               'twitter_followers':lst_lst[0],
                               'reddit_subs':lst_lst[1],
                               'reddit_avg_posts_48h':lst_lst[2],
                               'reddit_avg_comments_48h':lst_lst[3],
                               
                               # developer data        
                               'forks':lst_lst[4],
                               'stars':lst_lst[5],
                               'github_subs':lst_lst[6],
                               'total_issues':lst_lst[7],
                               'closed_issues':lst_lst[8],
                               'pull_rqst_merged':lst_lst[9],
                               'pull_request_contributors':lst_lst[10],
                                
                               # current_price data
                               'usd_cp':lst_lst[11],
                               'eur_cp':lst_lst[12],
                               'brl_cp':lst_lst[13],
        
                               # Market_cap data
                               'usd_mc':lst_lst[14],
                               'eur_mc':lst_lst[15],
                               'brl_mc':lst_lst[16],
        
                               # total_volume data
                               'usd_tv':lst_lst[17],
                               'eur_tv':lst_lst[18],
                               'brl_tv':lst_lst[19]},
                            
                               # Indexing 
                               index = dates)
    
        
    # Rounding lists
    round_zero_list = ['twitter_followers','reddit_subs',
                       'forks','stars','github_subs',
                       'total_issues','closed_issues',
                       'pull_rqst_merged','pull_request_contributors',
                       'usd_mc', 'eur_mc', 'brl_mc',
                       'usd_tv', 'eur_tv', 'brl_tv']
    round_two_list = [col for col in extracted_df.columns if col not in round_zero_list]
    
    # Rounding
    extracted_df[round_zero_list] = round(extracted_df[round_zero_list])
    extracted_df[round_two_list] = round(extracted_df[round_two_list],2)
    
    # Communicate
    print(f'All done for {id}.')
    

    return extracted_df

# Pull all

In [6]:
def extractAll(ids):
    
    # loop through all ids
    for id in ids:
        # extract time series
        df = tsMaker(id, 1096)
        
        # upload as csv
        df.to_csv(f'{id}_daily_data.csv')
    
    return df

#### For real 

In [185]:
sizes = extractAll(main_ids)

429 Client Error: Too Many Requests for url: https://api.coingecko.com/api/v3/coins/electroneum/history?date=08-12-2021 
Program will wait for a minute and try again. 

100 days processed
429 Client Error: Too Many Requests for url: https://api.coingecko.com/api/v3/coins/electroneum/history?date=02-10-2021 
Program will wait for a minute and try again. 

200 days processed
429 Client Error: Too Many Requests for url: https://api.coingecko.com/api/v3/coins/electroneum/history?date=12-07-2021 
Program will wait for a minute and try again. 

429 Client Error: Too Many Requests for url: https://api.coingecko.com/api/v3/coins/electroneum/history?date=08-05-2021 
Program will wait for a minute and try again. 

300 days processed
429 Client Error: Too Many Requests for url: https://api.coingecko.com/api/v3/coins/electroneum/history?date=26-02-2021 
Program will wait for a minute and try again. 

400 days processed
429 Client Error: Too Many Requests for url: https://api.coingecko.com/api/v3/c

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  **kwargs,


429 Client Error: Too Many Requests for url: https://api.coingecko.com/api/v3/coins/platon-network/history?date=14-12-2021 
Program will wait for a minute and try again. 

100 days processed
429 Client Error: Too Many Requests for url: https://api.coingecko.com/api/v3/coins/platon-network/history?date=28-09-2021 
Program will wait for a minute and try again. 

200 days processed
429 Client Error: Too Many Requests for url: https://api.coingecko.com/api/v3/coins/platon-network/history?date=23-07-2021 
Program will wait for a minute and try again. 

'community_data' | platon-network has no community data before 11-05-2021. Breaking loop.
All done for platon-network.
429 Client Error: Too Many Requests for url: https://api.coingecko.com/api/v3/coins/eos/history?date=18-02-2022 
Program will wait for a minute and try again. 

429 Client Error: Too Many Requests for url: https://api.coingecko.com/api/v3/coins/eos/history?date=11-12-2021 
Program will wait for a minute and try again. 

100 d

1000 days processed
'community_data' | matic-network has no community data before 26-04-2019. Breaking loop.
All done for matic-network.
429 Client Error: Too Many Requests for url: https://api.coingecko.com/api/v3/coins/cardano/history?date=21-02-2022 
Program will wait for a minute and try again. 

429 Client Error: Too Many Requests for url: https://api.coingecko.com/api/v3/coins/cardano/history?date=05-12-2021 
Program will wait for a minute and try again. 

100 days processed
429 Client Error: Too Many Requests for url: https://api.coingecko.com/api/v3/coins/cardano/history?date=04-10-2021 
Program will wait for a minute and try again. 

200 days processed
429 Client Error: Too Many Requests for url: https://api.coingecko.com/api/v3/coins/cardano/history?date=16-07-2021 
Program will wait for a minute and try again. 

429 Client Error: Too Many Requests for url: https://api.coingecko.com/api/v3/coins/cardano/history?date=16-05-2021 
Program will wait for a minute and try again. 



500 days processed
'community_data' | aave has no community data before 02-10-2020. Breaking loop.
All done for aave.
429 Client Error: Too Many Requests for url: https://api.coingecko.com/api/v3/coins/avalanche-2/history?date=22-01-2022 
Program will wait for a minute and try again. 

100 days processed
429 Client Error: Too Many Requests for url: https://api.coingecko.com/api/v3/coins/avalanche-2/history?date=07-11-2021 
Program will wait for a minute and try again. 

429 Client Error: Too Many Requests for url: https://api.coingecko.com/api/v3/coins/avalanche-2/history?date=03-09-2021 
Program will wait for a minute and try again. 

200 days processed
429 Client Error: Too Many Requests for url: https://api.coingecko.com/api/v3/coins/avalanche-2/history?date=17-06-2021 
Program will wait for a minute and try again. 

300 days processed
429 Client Error: Too Many Requests for url: https://api.coingecko.com/api/v3/coins/avalanche-2/history?date=16-04-2021 
Program will wait for a minu

200 days processed
429 Client Error: Too Many Requests for url: https://api.coingecko.com/api/v3/coins/fantom/history?date=25-07-2021 
Program will wait for a minute and try again. 

429 Client Error: Too Many Requests for url: https://api.coingecko.com/api/v3/coins/fantom/history?date=06-05-2021 
Program will wait for a minute and try again. 

300 days processed
429 Client Error: Too Many Requests for url: https://api.coingecko.com/api/v3/coins/fantom/history?date=22-02-2021 
Program will wait for a minute and try again. 

400 days processed
429 Client Error: Too Many Requests for url: https://api.coingecko.com/api/v3/coins/fantom/history?date=09-12-2020 
Program will wait for a minute and try again. 

500 days processed
429 Client Error: Too Many Requests for url: https://api.coingecko.com/api/v3/coins/fantom/history?date=29-09-2020 
Program will wait for a minute and try again. 

429 Client Error: Too Many Requests for url: https://api.coingecko.com/api/v3/coins/fantom/history?date=

### Only for ethereum (substituting omni) | Also, test for possible adds

In [6]:
extractAll(['ethereum'])

429 Client Error: Too Many Requests for url: https://api.coingecko.com/api/v3/coins/ethereum/history?date=21-12-2021 
Program will wait for a minute and try again. 

100 days processed
429 Client Error: Too Many Requests for url: https://api.coingecko.com/api/v3/coins/ethereum/history?date=01-10-2021 
Program will wait for a minute and try again. 

200 days processed
429 Client Error: Too Many Requests for url: https://api.coingecko.com/api/v3/coins/ethereum/history?date=23-07-2021 
Program will wait for a minute and try again. 

429 Client Error: Too Many Requests for url: https://api.coingecko.com/api/v3/coins/ethereum/history?date=14-05-2021 
Program will wait for a minute and try again. 

300 days processed
429 Client Error: Too Many Requests for url: https://api.coingecko.com/api/v3/coins/ethereum/history?date=27-02-2021 
Program will wait for a minute and try again. 

400 days processed
429 Client Error: Too Many Requests for url: https://api.coingecko.com/api/v3/coins/ethereum/h

Unnamed: 0,bytes
ethereum,248597


**Check Results**

In [5]:
c98 = pd.read_csv('ts_CSVs/coin98_daily_data.csv', index_col = 'Unnamed: 0')

# Check end of the time series
c98.tail(50)

Unnamed: 0,twitter_followers,reddit_subs,reddit_avg_posts_48h,reddit_avg_comments_48h,forks,stars,github_subs,total_issues,closed_issues,pull_rqst_merged,pull_request_contributors,usd_cp,eur_cp,brl_cp,usd_mc,eur_mc,brl_mc,usd_tv,eur_tv,brl_tv
14-04-2019,,,,,,,,,,,,,,,,,,,,
13-04-2019,,,,,,,,,,,,,,,,,,,,
12-04-2019,,,,,,,,,,,,,,,,,,,,
11-04-2019,,,,,,,,,,,,,,,,,,,,
10-04-2019,,,,,,,,,,,,,,,,,,,,
09-04-2019,,,,,,,,,,,,,,,,,,,,
08-04-2019,,,,,,,,,,,,,,,,,,,,
07-04-2019,,,,,,,,,,,,,,,,,,,,
06-04-2019,,,,,,,,,,,,,,,,,,,,
05-04-2019,,,,,,,,,,,,,,,,,,,,


* As we can see, **the API is not perfect**. 
* It fails to retrieve 'twitter_followers' data for some dates and have no access to some projects GitHub account. 
* In the next step (**script** | csv_final_prep), we'll handle missing data, whilst making sure our time series stays intact. 