In [1]:
import json
import requests
import pandas as pd
import os
from datetime import datetime, timedelta

In [2]:
def loadJSON(filepath):
    with open(filepath) as file:
        return json.load(file)

In [3]:
# Tu API key de Messari
api_key = os.getenv('MESSARI_ANDRE_KEY')

In [4]:
# Cargar el archivo JSON
def obt_ids(file_path):   
   
    with open(file_path, 'r') as file:
        data = json.load(file)

    # Extraer los IDs
    ids = [item['id'] for item in data]

    # Mostrar los IDs extraídos
    return ids

In [5]:
ids_ia = obt_ids('../data/processed/ai_complete_time_series.json')
print("Tamaño de ids_ia: ", len(ids_ia))

ids_gaming = obt_ids('../data/processed/gaming_complete_time_series.json')
print("Tamaño de ids_gaming: ", len(ids_gaming))

ids_meme = obt_ids('../data/processed/meme_complete_time_series.json')
print("Tamaño de ids_gaming: ", len(ids_meme))

ids_rwa = obt_ids('../data/processed/rwa_complete_time_series.json')
print("Tamaño de ids_gaming: ", len(ids_rwa))

Tamaño de ids_ia:  31
Tamaño de ids_gaming:  59
Tamaño de ids_gaming:  18
Tamaño de ids_gaming:  16


In [6]:
def get_from_api(endpoint: str, API_KEY: str):
    base_url = 'https://api.messari.io/'
    url = f'{base_url}{endpoint}'
    headers = {
        'accept': 'application/json',
        'x-messari-api-key': API_KEY,
    }
    return requests.get(url, headers=headers)

# API key from environment variable
API_KEY = os.getenv('MESSARI_ANDRE_KEY')

In [7]:
ai_df = pd.read_csv('../../logical-model/data/csv/coins_AI.csv')
gaming_df = pd.read_csv('../../logical-model/data/csv/coins_gaming.csv')
meme_df = pd.read_csv('../../logical-model/data/csv/coins_meme.csv')
rwa_df = pd.read_csv('../../logical-model/data/csv/coins_RWA.csv')

print("Shapes of the dataframes:", ai_df.shape, gaming_df.shape, meme_df.shape, rwa_df.shape)
print("Number of tokens: ", ai_df.shape[0] + gaming_df.shape[0] + meme_df.shape[0] + rwa_df.shape[0])

Shapes of the dataframes: (250, 17) (400, 17) (500, 17) (100, 17)
Number of tokens:  1250


In [8]:
def remove_non_market_cap(df: pd.DataFrame) -> pd.DataFrame:
    df_without_null = df.dropna(subset=['market_cap'])
    return df_without_null[df_without_null['market_cap'] != 0]

In [9]:
ai_df = remove_non_market_cap(ai_df)
gaming_df = remove_non_market_cap(gaming_df)
meme_df = remove_non_market_cap(meme_df)
rwa_df = remove_non_market_cap(rwa_df)

print("Shapes of the dataframes:", ai_df.shape, gaming_df.shape, meme_df.shape, rwa_df.shape)
print("Number of tokens: ", ai_df.shape[0] + gaming_df.shape[0] + meme_df.shape[0] + rwa_df.shape[0])

Shapes of the dataframes: (167, 17) (261, 17) (220, 17) (75, 17)
Number of tokens:  723


In [10]:
tokens = loadJSON('../data/raw/all-tokens.json')
len(tokens)

14731

In [11]:
def filter_by_category(category_df: pd.DataFrame, tokens: list) -> pd.DataFrame:
    return [
        record for record in tokens
        if (record['name'] in category_df['name'].values) and
        # (record['slug'] in category_df['slug'].values) and
        (record['symbol'] in category_df['symbol'].astype(str).values)
    ]

In [12]:
meme = filter_by_category(category_df=meme_df, tokens=tokens)
gaming = filter_by_category(category_df=gaming_df, tokens=tokens)
ai = filter_by_category(category_df=ai_df, tokens=tokens)
rwa = filter_by_category(category_df=rwa_df, tokens=tokens)

print("Number of tokens:", len(ai), len(gaming), len(meme), len(rwa))
print("Number of tokens:", len(meme) + len(gaming) + len(ai) + len(rwa))

Number of tokens: 80 117 34 38
Number of tokens: 269


In [13]:
# i have the ids, just keep the ones that are in the list
def filter_by_ids(ids: list, tokens: list) -> list:
    return [record for record in tokens if record['id'] in ids]

In [14]:
meme = filter_by_ids(ids=ids_meme, tokens=meme)
gaming = filter_by_ids(ids=ids_gaming, tokens=gaming)
ai = filter_by_ids(ids=ids_ia, tokens=ai)
rwa = filter_by_ids(ids=ids_rwa, tokens=rwa)

print("Number of tokens:", len(ai), len(gaming), len(meme), len(rwa))
print("Number of tokens:", len(meme) + len(gaming) + len(ai) + len(rwa))

Number of tokens: 31 59 18 16
Number of tokens: 124


In [15]:
def getting_interval_timestamps(date: str, days:int) -> tuple[int, int]:
    startdate: datetime = datetime.strptime(date, '%d/%m/%Y')
    enddate: datetime = startdate + timedelta(days=days)
    return int(datetime.timestamp(startdate)), int(datetime.timestamp(enddate))

In [16]:
def get_test_data(date_since:str, interval:str, tokens: list, category: str)->None:
    year = date_since

    while(1):
        print(f"Getting data since {year}")
        annual_data = []

        days = 360

        if datetime.now() - timedelta(days=360) < datetime.strptime(year, "%d/%m/%Y"):
            days = int((datetime.now() - datetime.strptime(year, "%d/%m/%Y")).days)
        
        start_timestamp, end_timestamp = getting_interval_timestamps(year, days)
        print(f"{start_timestamp} - {end_timestamp} ({days} days)")
        
        for token in tokens:
            new_token = token.copy()
            del new_token['allTimeHighData']
            del new_token['cycleLowData']
            new_token['category'] = category
            endpoint = f"marketdata/v1/assets/{token['id']}/price/time-series?interval={interval}&startTime={start_timestamp}&endTime={end_timestamp}"
            response = get_from_api(endpoint, API_KEY)
            if response.status_code == 200:
                result = response.json()
                new_token['market_data'] = result['data']
            else:
                new_token['market_data'] = "No content"
            annual_data.append(new_token)
        
        with open(f'../data/processed/test/{category}-{date_since.replace("/", "-")}.json', 'w') as json_file:
            json.dump(annual_data, json_file, indent=4)
        
        # updating year
        next_year = datetime.strptime(year, "%d/%m/%Y") + timedelta(days=days)
        year = next_year.strftime("%d/%m/%Y")

        if days < 360:
            break

In [18]:
get_test_data('28/10/2024', '1d', meme, 'meme')

Getting data since 28/10/2024
1730091600 - 1730696400 (7 days)


In [19]:
get_test_data('28/10/2024', '1d', gaming, 'gaming')

Getting data since 28/10/2024
1730091600 - 1730696400 (7 days)


In [20]:
get_test_data('28/10/2024', '1d', ai, 'ai')

Getting data since 28/10/2024
1730091600 - 1730696400 (7 days)


In [21]:
get_test_data('28/10/2024', '1d', rwa, 'rwa')

Getting data since 28/10/2024
1730091600 - 1730696400 (7 days)


In [22]:
meme_complete = loadJSON('../data/processed/test/meme-28-10-2024.json')
gaming_complete = loadJSON('../data/processed/test/gaming-28-10-2024.json')
ai_complete = loadJSON('../data/processed/test/ai-28-10-2024.json')
rwa_complete = loadJSON('../data/processed/test/rwa-28-10-2024.json')

print("Number of tokens:", len(ai_complete), len(gaming_complete), len(meme_complete), len(rwa_complete))

Number of tokens: 31 59 18 16


In [23]:
def test_json_to_dataframe(json_file: list) -> pd.DataFrame:
    # json_file has the property 'market_data' that is a list of dictionaries i want every single one of them to be a row with and put the name, symbol and id aswell
    data = []
    for token in json_file:
        for record in token['market_data']:
            new_record = record.copy()
            new_record['name'] = token['name']
            new_record['symbol'] = token['symbol']
            new_record['id'] = token['id']
            new_record['category'] = token['category']
            data.append(new_record)
    return pd.DataFrame(data)

In [24]:
meme_df = test_json_to_dataframe(meme_complete)
gaming_df = test_json_to_dataframe(gaming_complete)
ai_df = test_json_to_dataframe(ai_complete)
rwa_df = test_json_to_dataframe(rwa_complete)

In [25]:
meme_df.to_csv('../data/processed/test/meme.csv', index=False)
gaming_df.to_csv('../data/processed/test/gaming.csv', index=False)
ai_df.to_csv('../data/processed/test/ai.csv', index=False)
rwa_df.to_csv('../data/processed/test/rwa.csv', index=False)