In [2]:
from collections import defaultdict
import numpy as np
import pandas as pd
import pandas_ta as ta
from pycoingecko import CoinGeckoAPI
import time

In [3]:
cg = CoinGeckoAPI()

In [4]:
def pull_data_pages():

    # Initialize an empty list to store all markets data
    all_markets = []

    # Fetch data for cryptocurrencies from CoinGecko API for a single page (page 1)
    for i in range(1, 2):  # Replace 2 with the desired number of pages to fetch (e.g., 10, 20, etc.)
        # Call the CoinGecko API to get cryptocurrency markets data for the specified page
        markets_data = cg.get_coins_markets(vs_currency="usd", page=i)

        # Append the data from the current page to the all_markets list
        all_markets += markets_data

        # Add a delay of 60 seconds before fetching data from the next page (to avoid rate limits)
        time.sleep(60)
    
    return all_markets

markets = pull_data_pages()

In [5]:
markets

In [6]:
def parese_out_ids(all_markets):
    
    # Create a dictionary 'all_symbols' where the keys are 'symbol' values and the values are 'id' values,
# obtained from each market in the 'all_markets' list.
    all_symbols = {market['symbol']: market['id'] for market in all_markets}
    # Create a list 'ids' containing the 'id' values corresponding to all the 'symbol' keys
# in the 'all_symbols' dictionary.
    market_ids = [all_symbols[symbol] for symbol in all_symbols]
    
    return market_ids

ids = parese_out_ids(markets)

In [7]:
#ids

In [8]:
def pull_historical_daily_prices(ids):

    # Create a CoinGeckoAPI object
    cg = CoinGeckoAPI()

    # Set the time period for data retrieval
    timePeriod = 500

    # Create an empty dictionary to store cryptocurrency data
    data = {}

    # Initialize the counter to 0
    counter = 0

    # Iterate through the first 5 cryptocurrency IDs in 'ids' list
    for coin in ids[:5]:
        # If the counter exceeds 25, sleep for 60 seconds to avoid rate limiting
        if counter > 25:
            time.sleep(60)
            counter = 0

        try:
            # Fetch market chart data for the specific cryptocurrency from CoinGecko API
            nested_lists = cg.get_coin_market_chart_by_id(
                id=coin, vs_currency="usd", days=timePeriod
            )["prices"]

            # Store the fetched data in the 'data' dictionary using the coin ID as the key
            data[coin] = {
                "timestamps": [item[0] for item in nested_lists],
                "values": [item[1] for item in nested_lists]
            }

        except Exception as e:
            # If an exception occurs (e.g., API error), print the error message and continue
            print(e, "coin: " + coin)
            time.sleep(60)

        # Increment the counter by 1 for each iteration
        counter += 1
    
    return data

raw_historical_daily_prices = pull_historical_daily_prices(ids)

In [9]:
#raw_historical_daily_prices

In [10]:
def transform_raw_historical_prices(data):

    # Create a list of DataFrames using data from the 'data' dictionary for each coin in 'ids'
    frame_list = [pd.DataFrame(data[coin]["values"], index=data[coin]["timestamps"], columns=[coin]) for coin in ids if coin in data]

    # Concatenate the DataFrames in 'frame_list' into a single DataFrame 'df_cryptocurrency' along the columns (axis=1)
    df_cryptocurrency = pd.concat(frame_list, axis=1)

    # Convert the index of 'df_cryptocurrency' to datetime format with milliseconds unit
    df_cryptocurrency["datetime"] = pd.to_datetime(df_cryptocurrency.index, unit="ms")

    # Extract the date from the datetime index and create a new column 'date' in 'df_cryptocurrency'
    df_cryptocurrency["date"] = df_cryptocurrency["datetime"].dt.date

    # Extract the hour from the datetime index and create a new column 'hour' in 'df_cryptocurrency'
    df_cryptocurrency["hour"] = df_cryptocurrency["datetime"].dt.hour

    # Reshape 'df_cryptocurrency' from wide to long format using the 'melt' function
    # The 'datetime', 'date', and 'hour' columns will be used as identifiers ('id_vars'),
    # and the column containing the cryptocurrency names will be unpivoted ('var_name' as 'currency_name').
    df_cryptocurrency = df_cryptocurrency.melt(id_vars=["datetime", "date", "hour"], var_name="currency_name")
    
    # Copy the 'df_cryptocurrency' DataFrame to 'historical_daily_prices'
    historical_daily_prices = df_cryptocurrency.copy()

    # Extract only the "date", "currency_name", and "value" columns from 'historical_daily_prices'
    historical_daily_prices = historical_daily_prices[["date", "currency_name", "value"]]

    # Drop rows with any missing (NaN) values from 'historical_daily_prices'
    historical_daily_prices = historical_daily_prices.dropna()

    # Replace hyphens ('-') with underscores ('_') in the "currency_name" column
    historical_daily_prices["currency_name"] = historical_daily_prices["currency_name"].str.replace('-', '_')

    # Drop duplicate rows in 'historical_daily_prices' based on 'date' and 'currency_name', keeping the first occurrence
    historical_daily_prices = historical_daily_prices.drop_duplicates(subset=['date', 'currency_name'], keep='first')

    # Set 'currency_name' as the index of 'historical_daily_prices'
    historical_daily_prices = historical_daily_prices.set_index('currency_name')
    
    return historical_daily_prices

historical_daily_prices = transform_raw_historical_prices(raw_historical_daily_prices)

In [12]:
def rename_column(df):
    df.rename(columns={'value': 'close'}, inplace=True)
    return df
historical_daily_prices = rename_column(historical_daily_prices)

In [13]:
historical_daily_prices

Unnamed: 0_level_0,date,close
currency_name,Unnamed: 1_level_1,Unnamed: 2_level_1
bitcoin,2022-03-11,39468.354773
bitcoin,2022-03-12,38775.175588
bitcoin,2022-03-13,38903.693548
bitcoin,2022-03-14,37852.525141
bitcoin,2022-03-15,39669.423812
...,...,...
binancecoin,2023-07-19,240.270930
binancecoin,2023-07-20,241.332773
binancecoin,2023-07-21,242.624777
binancecoin,2023-07-22,243.768842
