In [1]:
import time
import datetime
import numpy as np
import pandas as pd
from pycoingecko import CoinGeckoAPI

In [2]:
def timestamp_from_date(date_str):
    """
    Converte una stringa 'YYYY-MM-DD' in un timestamp (secondi).
    """
    dt = datetime.datetime.strptime(date_str, "%Y-%m-%d")
    return int(dt.timestamp())

In [4]:
# Parametri iniziali
start_date_str = "2020-12-20"    # t = 20 dicembre 2020
delta_t_days = 180              # es. 180 giorni dopo t
cg = CoinGeckoAPI()

# Se desideri testare anche date di fine diverse, puoi calcolarle così:
start_timestamp = timestamp_from_date(start_date_str)
end_timestamp = start_timestamp + delta_t_days * 24 * 60 * 60  # secondi totali in delta_t

# Otteniamo la data di end_date in formato stringa (eventualmente utile per debug/controllo)
end_date = datetime.datetime.utcfromtimestamp(end_timestamp).strftime('%Y-%m-%d')
print(f"Period: da {start_date_str} a {end_date} (circa {delta_t_days} giorni)")

# Passo 2a: Recuperiamo un elenco di crypto (per esempio, top 50 per market cap).
# L'endpoint get_coins_markets ci dà le monete ordinate per capitalizzazione decrescente
# Se vuoi più di 50, puoi aumentare per_page, ma occhio ai limiti di API.
coins_list = cg.get_coins_markets(vs_currency='usd', order='market_cap_desc', per_page=50, page=1)

# Estraggo solo gli id CoinGecko di queste crypto
# (es. "bitcoin", "ethereum", "tether", ecc.)
coin_ids = [coin['id'] for coin in coins_list]

# Liste dove salvare (market_cap(t), rendimento) per ciascuna crypto
mcap_list = []
returns_list = []
crypto_names = []

Period: da 2020-12-20 a 2021-06-17 (circa 180 giorni)


In [5]:
for cid in coin_ids:
    try:
        # Scarichiamo i dati storici da CoinGecko
        # Il metodo get_coin_market_chart_range_by_id ritorna un dict con chiavi: prices, market_caps, total_volumes
        data_range = cg.get_coin_market_chart_range_by_id(
            id=cid,
            vs_currency='usd',  # se non funziona con 'usdt', prova con 'usd'
            from_timestamp=start_timestamp,
            to_timestamp=end_timestamp
        )
        
        prices = data_range.get('prices', [])
        market_caps = data_range.get('market_caps', [])

        # prices e market_caps sono liste di coppie [timestamp, valore]
        # Esempio: prices[0] = [1608422400000, 23000.0] -> millisecondi, prezzo
        # Controllo che ci siano dati sufficienti (ad es. almeno 2 rilevazioni)
        if len(prices) < 2 or len(market_caps) < 2:
            continue
        
        # Ricavo p(t) e p(t+delta_t) come primi e ultimi di 'prices'
        # p(t)
        p_t = prices[0][1]
        # p(t+delta_t) (l'ultimo prezzo disponibile vicino al end_timestamp)
        p_t_plus = prices[-1][1]
        
        # Market cap a t
        mcap_t = market_caps[0][1]
        
        # Rendimento = p(t+delta_t) / p(t)
        # (in alternativa, se vuoi il “massimo” nel periodo, potresti usare max(prices[i][1]) nel range)
        ret = p_t_plus / p_t if p_t != 0 else None
        
        if ret is not None and mcap_t is not None:
            mcap_list.append(mcap_t)
            returns_list.append(ret)
            crypto_names.append(cid)
        
        # Un delay di 1 secondo tra le chiamate può aiutare a non sovraccaricare le API
        time.sleep(1)
        
    except Exception as e:
        print(f"Errore con {cid}: {e}")
        continue

Errore con bitcoin: {'error': {'status': {'timestamp': '2024-12-23T11:50:13.614+00:00', 'error_code': 10012, 'error_message': 'Your request exceeds the allowed time range. Public API users are limited to querying historical data within the past 365 days. Upgrade to a paid plan to enjoy full historical data access: https://www.coingecko.com/en/api/pricing. '}}}
Errore con ethereum: {'error': {'status': {'timestamp': '2024-12-23T11:50:13.788+00:00', 'error_code': 10012, 'error_message': 'Your request exceeds the allowed time range. Public API users are limited to querying historical data within the past 365 days. Upgrade to a paid plan to enjoy full historical data access: https://www.coingecko.com/en/api/pricing. '}}}
Errore con tether: {'error': {'status': {'timestamp': '2024-12-23T11:50:13.961+00:00', 'error_code': 10012, 'error_message': 'Your request exceeds the allowed time range. Public API users are limited to querying historical data within the past 365 days. Upgrade to a paid p

In [1]:
import pandas as pd
import numpy as np
from datetime import datetime, timedelta
from scipy.stats import pearsonr, spearmanr

In [2]:
file = pd.read_csv('all_crypto_currencies_with_market_cap.csv')
file.head()

Unnamed: 0,date,coin_id,cmc_rank,market_cap,price,open,high,low,close,time_high,time_low,volume_24h,percent_change_1h,percent_change_24h,percent_change_7d,circulating_supply,total_supply,max_supply,num_market_pairs
0,2013-04-28,1,1,1488567000.0,134.210022,,,,,,,0.0,0.639231,,,11091325.0,11091325.0,21000000.0,
1,2013-04-28,2,2,74637020.0,4.348405,,,,,,,0.0,0.799273,,,17164230.0,17164230.0,84000000.0,
2,2013-04-28,5,3,7250187.0,0.386525,,,,,,,0.0,-0.934763,,,18757362.0,18757362.0,,
3,2013-04-28,3,4,5995997.0,1.107233,,,,,,,0.0,-0.050503,,,5415300.0,5415300.0,,
4,2013-04-28,4,5,1503099.0,0.646892,,,,,,,0.0,0.609159,,,2323569.75,2323569.75,42000000.0,


In [3]:
file['date'].max()

'2021-07-31'

In [4]:
coins = pd.read_csv('coins.csv')
coins.head()

Unnamed: 0,id,name,slug,symbol,status,category,description,subreddit,notice,tags,...,message_board,chat,explorer,reddit,technical_doc,source_code,announcement,platform_id,date_added,date_launched
0,1,Bitcoin,bitcoin,BTC,active,coin,## **What Is Bitcoin (BTC)?**\n\nBitcoin is a ...,bitcoin,,"mineable, pow, sha-256, store-of-value, state-...",...,https://bitcointalk.org,,https://blockchain.coinmarketcap.com/chain/bit...,https://reddit.com/r/bitcoin,https://bitcoin.org/bitcoin.pdf,https://github.com/bitcoin/,,,2013-04-28T00:00:00.000Z,
1,2,Litecoin,litecoin,LTC,active,coin,## What Is Litecoin (LTC)?\n\nLitecoin (LTC) i...,litecoin,,"mineable, pow, scrypt, medium-of-exchange, bin...",...,"https://litecointalk.io/, https://litecoin-fou...",https://telegram.me/litecoin,"https://blockchair.com/litecoin, https://chain...",https://reddit.com/r/litecoin,,https://github.com/litecoin-project/litecoin,https://bitcointalk.org/index.php?topic=47417.0,,2013-04-28T00:00:00.000Z,
2,3,Namecoin,namecoin,NMC,active,coin,Namecoin (NMC) is a cryptocurrency . Users are...,namecoin,,"mineable, pow, sha-256, platform",...,,https://telegram.me/namecoin,"https://nmc.tokenview.com/, https://www.namebr...",https://reddit.com/r/namecoin,,https://github.com/namecoin,https://bitcointalk.org/?topic=6017.0,,2013-04-28T00:00:00.000Z,
3,4,Terracoin,terracoin,TRC,active,coin,Terracoin (TRC) launched in 2012 with the aim ...,terracoin,,"mineable, pow, sha-256, masternodes",...,https://medium.com/@clockuniverse,"https://mattermost.terracoin.io/, https://t.me...","https://insight.terracoin.io/, https://explore...",https://reddit.com/r/terracoin,https://wiki.terracoin.io/view/Whitepaper,https://github.com/terracoin,https://bitcointalk.org/index.php?topic=1364146.0,,2013-04-28T00:00:00.000Z,
4,5,Peercoin,peercoin,PPC,active,coin,Peercoin (PPC) is a cryptocurrency . Users are...,peercoin,,"mineable, hybrid-pow-pos, sha-256, medium-of-e...",...,https://talk.peercoin.net,"https://t.me/peercoin, https://discord.gg/m294ReV","https://chainz.cryptoid.info/ppc/, https://exp...",https://reddit.com/r/peercoin,https://docs.peercoin.net/,https://github.com/peercoin,https://bitcointalk.org/index.php?topic=101820.0,,2013-04-28T00:00:00.000Z,


In [11]:
def analyze_correlation(file_df, coins_df, start_date_str="2020-12-20", delta_days=180, alpha=0.05):
    """
    Esegue l'analisi di correlazione tra market cap(t) e rendimento p(t+delta)/p(t).
    file_df: DataFrame con colonne [coin_id, date, close, market_cap].
    coins_df: DataFrame con colonne [id, symbol] (non usata qui, ma potresti usarla per stampe).
    start_date_str: data di inizio (stringa YYYY-MM-DD).
    delta_days: quanti giorni dopo start_date considerare.
    alpha: livello di significatività (default 0.05).
    """

    print("=== Inizio analisi di correlazione ===")
    print(f"Data di inizio (t): {start_date_str}")
    print(f"Delta t (gg): {delta_days}")
    print(f"Livello di significatività alpha: {alpha}")
    
    # Converto la colonna date in datetime
    if file_df["date"].dtype == object:
        print("Converto la colonna 'date' in datetime...")
        file_df["date"] = pd.to_datetime(file_df["date"], format="%Y-%m-%d", errors="coerce")
    
    start_date = datetime.strptime(start_date_str, "%Y-%m-%d")
    end_date = start_date + timedelta(days=delta_days)
    print(f"Data di fine (t + delta_t): {end_date.strftime('%Y-%m-%d')}")

    max_available_date = file_df["date"].max()
    print(f"Data massima disponibile nel dataset: {max_available_date.strftime('%Y-%m-%d')}")

    # Lista con i risultati
    results = []
    
    # Unici coin_id presenti
    unique_coins = file_df["coin_id"].unique()
    print(f"Numero di coin_id unici trovati nel dataset: {len(unique_coins)}")

    # Crea un dizionario: {id: symbol, ...}
    id_to_symbol = dict(zip(coins_df["id"], coins_df["symbol"]))

    # Itero su ciascun coin_id
    for cid in unique_coins:
        symbol = id_to_symbol.get(cid, "Unknown")
        print(f"\n--- Inizio analisi coin_id: {cid} (symbol: {symbol}) ---")
        
        df_coin = file_df[file_df["coin_id"] == cid].copy()

        # Filtro nel range [start_date, end_date]
        df_coin_in_range = df_coin[
            (df_coin["date"] >= start_date) & (df_coin["date"] <= end_date)
        ]

        # Se non ho dati in questo range, salto la coin
        if df_coin_in_range.empty:
            print(f" -> Nessun dato per {cid} nel periodo {start_date} - {end_date}, skip.")
            continue
        
        # Ordino per data (cronologicamente)
        df_coin_in_range.sort_values(by="date", inplace=True)

        # p(t) e market cap(t) = prima riga
        first_row = df_coin_in_range.iloc[0]
        p_t = first_row["close"]
        mcap_t = first_row["market_cap"]

        # p(t+delta) = ultima riga
        last_row = df_coin_in_range.iloc[-1]
        p_t_plus = last_row["close"]

        # Check per NaN
        if pd.isna(p_t) or pd.isna(p_t_plus) or pd.isna(mcap_t):
            print(f" -> Dati NaN per {cid} (p_t={p_t}, p_t_plus={p_t_plus}, mcap_t={mcap_t}). Skip.")
            continue
        
        # Evita divisione per zero
        if p_t == 0:
            print(f" -> p(t) = 0 per {cid}, impossibile calcolare il rendimento. Skip.")
            continue
        
        ret = p_t_plus / p_t
        print(f" -> p(t) = {p_t:.2f}, p(t+delta) = {p_t_plus:.2f}, mcap(t) = {mcap_t:.2f}, return = {ret:.4f}")

        results.append({
            "coin_id": cid,
            "symbol": symbol,
            "price_t": p_t,
            "price_t_plus": p_t_plus,
            "mcap_t": mcap_t,
            "return": ret
        })
    
    # Creo un dataframe con i risultati
    df_corr = pd.DataFrame(results)
    print("\n=== Risultati finali raccolti ===")
    if df_corr.empty:
        print("Nessuna coin è stata valida (dopo filtri e NaN). Impossibile calcolare la correlazione.")
        return None
    
    display(df_corr)

    # Calcolo correlazione Pearson
    corr_value, p_value = pearsonr(df_corr["mcap_t"], df_corr["return"])

    # Correlazione Spearman (facoltativa, ma spesso utile)
    spearman_corr, spearman_p = spearmanr(df_corr["mcap_t"], df_corr["return"])

    print("\n--- CORRELAZIONI ---")
    print(f"Pearson R = {corr_value:.4f} (p-value = {p_value:.6f})")
    print(f"Spearman ρ = {spearman_corr:.4f} (p-value = {spearman_p:.6f})")

    # Test di significatività
    # Se p_value < alpha, rigettiamo l'ipotesi nulla (corr=0) a livello alpha
    if p_value < alpha:
        print(f"=> [Pearson] p-value < {alpha}, rigettiamo H0 (corr=0) e concludiamo che la correlazione ≠ 0.")
        if corr_value < 0:
            print("   Inoltre, la correlazione è negativa (inferiore a 0).")
        else:
            print("   Inoltre, la correlazione è positiva (maggiore di 0).")
    else:
        print(f"=> [Pearson] p-value >= {alpha}, NON possiamo rigettare H0 (corr=0).")

    if spearman_p < alpha:
        print(f"=> [Spearman] p-value < {alpha}, rigettiamo H0 (corr=0) e concludiamo che la correlazione ≠ 0.")
        if spearman_corr < 0:
            print("   Inoltre, la correlazione è negativa (inferiore a 0).")
        else:
            print("   Inoltre, la correlazione è positiva (maggiore di 0).")
    else:
        print(f"=> [Spearman] p-value >= {alpha}, NON possiamo rigettare H0 (corr=0).")

    # Probabilità "low market cap => high return"
    print("\n--- PROBABILITA' LOW CAP => HIGH RETURN ---")
    mcap_median = df_corr["mcap_t"].median()
    ret_median = df_corr["return"].median()
    df_corr["low_mcap"] = df_corr["mcap_t"] < mcap_median
    df_corr["high_return"] = df_corr["return"] > ret_median

    low_mcap_df = df_corr[df_corr["low_mcap"] == True]
    count_low_mcap = len(low_mcap_df)

    if count_low_mcap > 0:
        count_lowcap_highreturn = sum(low_mcap_df["high_return"])
        prob_lowcap_highreturn = count_lowcap_highreturn / count_low_mcap
        print(f"Sulle {count_low_mcap} crypto con market cap sotto la mediana, {count_lowcap_highreturn} hanno un return sopra la mediana.")
        print(f"Probabilità (low cap -> high return) = {prob_lowcap_highreturn:.2%}")
    else:
        print("Nessuna crypto con market cap < mediana. Impossibile calcolare probabilità.")

    # Ritorno l'oggetto df_corr e i valori di correlazione, se vuoi usarli dopo
    return {
        "df_corr": df_corr,
        "pearson_r": corr_value,
        "pearson_p": p_value,
        "spearman_r": spearman_corr,
        "spearman_p": spearman_p
    }

In [None]:
results = analyze_correlation(file, coins, "2020-12-20", 180, 0.05)
results

=== Inizio analisi di correlazione ===
Data di inizio (t): 2020-12-20
Delta t (gg): 180
Livello di significatività alpha: 0.05
Data di fine (t + delta_t): 2021-06-18
Data massima disponibile nel dataset: 2021-07-31
Numero di coin_id unici trovati nel dataset: 8927

--- Inizio analisi coin_id: 1 (symbol: BTC) ---


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_coin_in_range.sort_values(by="date", inplace=True)


 -> p(t) = 23477.30, p(t+delta) = 38053.50, mcap(t) = 436129331482.26, return = 1.6209

--- Inizio analisi coin_id: 2 (symbol: LTC) ---
 -> p(t) = 114.67, p(t+delta) = 167.06, mcap(t) = 7583089812.36, return = 1.4569

--- Inizio analisi coin_id: 5 (symbol: PPC) ---
 -> p(t) = 0.29, p(t+delta) = 1.12, mcap(t) = 7740115.13, return = 3.8493

--- Inizio analisi coin_id: 3 (symbol: NMC) ---
 -> p(t) = 0.53, p(t+delta) = 1.54, mcap(t) = 7796417.28, return = 2.9118

--- Inizio analisi coin_id: 4 (symbol: TRC) ---
 -> p(t) = 0.06, p(t+delta) = 0.02, mcap(t) = 1274349.84, return = 0.3801

--- Inizio analisi coin_id: 7 (symbol: DVC) ---
 -> Nessun dato per 7 nel periodo 2020-12-20 00:00:00 - 2021-06-18 00:00:00, skip.

--- Inizio analisi coin_id: 6 (symbol: NVC) ---
 -> p(t) = 0.35, p(t+delta) = 1.48, mcap(t) = 822899.57, return = 4.1872

--- Inizio analisi coin_id: 8 (symbol: FTC) ---
 -> p(t) = 0.02, p(t+delta) = 0.03, mcap(t) = 4541889.43, return = 1.7963

--- Inizio analisi coin_id: 10 (symb

Unnamed: 0,coin_id,symbol,price_t,price_t_plus,mcap_t,return
0,1,BTC,23477.295197,38053.504173,4.361293e+11,1.620864
1,2,LTC,114.665993,167.055693,7.583090e+09,1.456890
2,5,PPC,0.290234,1.117204,7.740115e+06,3.849323
3,3,NMC,0.529058,1.540498,7.796417e+06,2.911773
4,4,TRC,0.055563,0.021117,1.274350e+06,0.380067
...,...,...,...,...,...,...
5202,10139,BIOS,6.386833,6.164358,0.000000e+00,0.965167
5203,9959,BUN,33.553353,22.015337,0.000000e+00,0.656129
5204,8669,SOV,23.609578,23.609578,0.000000e+00,1.000000
5205,10477,TULIP\u20bf,0.007208,0.007208,0.000000e+00,1.000000



--- CORRELAZIONI ---
Pearson R = -0.0003 (p-value = 0.980359)
Spearman ρ = 0.2765 (p-value = 0.000000)
=> [Pearson] p-value >= 0.05, NON possiamo rigettare H0 (corr=0).
=> [Spearman] p-value < 0.05, rigettiamo H0 (corr=0) e concludiamo che la correlazione ≠ 0.
   Inoltre, la correlazione è positiva (maggiore di 0).

--- PROBABILITA' LOW CAP => HIGH RETURN ---
Nessuna crypto con market cap < mediana. Impossibile calcolare probabilità.


In [14]:
results['df_corr']

Unnamed: 0,coin_id,symbol,price_t,price_t_plus,mcap_t,return,low_mcap,high_return
0,1,BTC,23477.295197,38053.504173,4.361293e+11,1.620864,False,True
1,2,LTC,114.665993,167.055693,7.583090e+09,1.456890,False,True
2,5,PPC,0.290234,1.117204,7.740115e+06,3.849323,False,True
3,3,NMC,0.529058,1.540498,7.796417e+06,2.911773,False,True
4,4,TRC,0.055563,0.021117,1.274350e+06,0.380067,False,False
...,...,...,...,...,...,...,...,...
5202,10139,BIOS,6.386833,6.164358,0.000000e+00,0.965167,False,False
5203,9959,BUN,33.553353,22.015337,0.000000e+00,0.656129,False,False
5204,8669,SOV,23.609578,23.609578,0.000000e+00,1.000000,False,False
5205,10477,TULIP\u20bf,0.007208,0.007208,0.000000e+00,1.000000,False,False
