In [2]:
# import yfinance as yf
# import pandas as pd
# import time

# # List of crypto-related tickers
# tickers = [
#     "MSTR",   # MicroStrategy
#     "GLXY.TO",# Galaxy Digital (Toronto-listed)
#     "3350.T", # Metaplanet Inc (Tokyo Stock Exchange)
#     "COIN",   # Coinbase
#     "RIOT",   # Riot Platforms
#     "MARA",   # Marathon Digital
#     "CLSK",   # CleanSpark
#     "WULF",   # TeraWulf
#     "CORZ",   # Core Scientific
#     "BTDR"    # Bitdeer Technologies (NASDAQ) – you can swap with HIVE.V if preferred
# ]

# # Dictionary to store DataFrames
# price_data = {}

# for ticker in tickers:
#     print(f"Downloading {ticker}...")
#     ticker_obj = yf.Ticker(ticker)
#     try:
#         historyPrices = ticker_obj.history(
#             period="5y",
#             interval="1d",
#             threads=False  # Avoid multi-threaded requests which trigger rate limit
#         )
#         price_data[ticker] = historyPrices
#         print(f"{ticker} downloaded: {len(historyPrices)} rows")
#     except Exception as e:
#         print(f"Error downloading {ticker}: {e}")
#     time.sleep(5)  # Pause 5 seconds between requests to prevent rate limit

# Optionally: Combine into one Excel file with a sheet per ticker
# with pd.ExcelWriter("crypto_stocks_5y.xlsx") as writer:
#     for ticker, df in price_data.items():
#         df.to_excel(writer, sheet_name=ticker)



In [3]:
import pandas_datareader as pdr

import pandas as pd
import numpy as np

from datetime import datetime
import datetime

import matplotlib.pyplot as plt

from pathlib import Path

In [4]:
# Define the start and end dates for the data retrieval
start_date = '2020-01-01'
end_date = '2025-08-01'
#end_date

# FRED Macroeconomic Indicators

In [5]:
def get_fred_macro_data(fred_series, start_date, end_date):
    """
    Fetch FRED macroeconomic data, compute YoY and QoQ changes,
    and return a single merged DataFrame resampled to quarter-end.

    Parameters:
    - fred_series: dict of {friendly_name: FRED_series_code}
    - start_date, end_date: date strings in 'YYYY-MM-DD' format

    Returns:
    - macro_df: DataFrame with macro features per quarter
    """
    fred_data = {}

    for name, code in fred_series.items():
        try:
            df = pdr.DataReader(code, "fred", start=start_date, end=end_date)
            if df.empty:
                print(f"[FRED] No data for {name} ({code}), skipping.")
                continue
            # YoY and QoQ percentage changes
            df[name + '_yoy'] = df[code].pct_change(4)
            df[name + '_qoq'] = df[code].pct_change(1)

            # Rename column to friendly name
            df = df.rename(columns={code: name})

            fred_data[name] = df

        except Exception as e:
            print(f"[FRED] Error for {name} ({code}): {e}")
            continue

    if not fred_data:
        print("No FRED data retrieved.")
        return pd.DataFrame()

    # Merge and resample to quarter-end
    macro_df = pd.concat(fred_data.values(), axis=1).resample('QE').last()
    

    # Extract year and quarter for joining with other datasets
    macro_df = macro_df.reset_index()
    macro_df['year'] = macro_df['DATE'].dt.year
    macro_df['quarter'] = macro_df['DATE'].dt.quarter
    macro_df = macro_df.rename(columns={'DATE': 'date'})
    return macro_df

In [6]:
fred_series = {
    'gdp_us': 'GDPC1',
    'cpi_us': 'CPIAUCSL',
    'unemployment_us': 'UNRATE',
    'interest_us': 'FEDFUNDS',
    'gdp_de': 'CLVMNACSCAB1GQDE',
    'cpi_de': 'DEUCPIALLMINMEI',
    'interest_eu': 'IRLTLT01EZM156N'
}

In [7]:
macro_df = get_fred_macro_data(fred_series, start_date, end_date)
macro_df.tail()

Unnamed: 0,date,gdp_us,gdp_us_yoy,gdp_us_qoq,cpi_us,cpi_us_yoy,cpi_us_qoq,unemployment_us,unemployment_us_yoy,unemployment_us_qoq,...,gdp_de_yoy,gdp_de_qoq,cpi_de,cpi_de_yoy,cpi_de_qoq,interest_eu,interest_eu_yoy,interest_eu_qoq,year,quarter
18,2024-09-30,23400.294,0.027188,0.007595,314.851,0.005464,0.002292,4.1,0.025,-0.02381,...,-0.006217,0.000192,126.1978,0.003353,0.0,2.834165,-0.092048,-0.02469,2024,3
19,2024-12-31,23542.349,0.025337,0.006071,317.603,0.011053,0.003647,4.1,-0.02381,-0.02381,...,-0.001648,0.001822,127.0412,0.006683,0.005004,2.812343,-0.0322,-0.039871,2024,4
20,2025-03-31,23512.717,0.019918,-0.001259,319.615,0.010005,-0.0005,4.2,0.0,0.02439,...,0.002487,0.003062,127.7792,0.010843,0.003311,3.313772,0.131316,0.108784,2025,1
21,2025-06-30,23703.782,0.020663,0.008126,321.5,0.005394,0.00287,4.1,0.0,-0.02381,...,0.002301,-0.002767,,,,3.084761,0.032157,0.004483,2025,2
22,2025-09-30,,,,322.132,0.007875,0.001966,4.3,0.02381,0.02381,...,,,,,,3.173563,-0.042311,0.028787,2025,3


In [100]:
macro_df.rename(columns={'date': 'quarter_end'}, inplace=True)

In [72]:
df_crypto = pd.read_csv('multi_crypto.csv', parse_dates=['timestamp'])

In [73]:
df_crypto.rename(columns={'timestamp': 'date'}, inplace=True)

In [74]:
#df_crypto.drop(columns=['open', 'high', 'low'], inplace=True)

In [75]:
# Data preprocessing

In [84]:
def get_price_features(df):
        df = df.copy()

        #drop unused columns
        df.columns = df.columns.str.lower()
        df.drop(columns=['open', 'high', 'low'], inplace=True)

        # 1. Log of Volume (avoid log(0) by replacing 0 with NaN)
        df['ln_volume'] = np.log(df['volume'].replace(0, np.nan))

        # 2. Median growth rates
        df['daily_growth'] = df['close'].pct_change()
        df['weekly_growth'] = df['close'].pct_change(7) 
        df['monthly_growth'] = df['close'].pct_change(30)

        # 3. Rolling volatility
        df['vol_weekly'] = df['close'].pct_change().rolling(7).std()
        df['momentum_weekly'] = df['weekly_growth'] / df['vol_weekly']

        # 4. moving averages
        df['ma_20'] = df['close'].rolling(window=20).mean()
        df['ma_50'] = df['close'].rolling(window=50).mean()
        df['ma_100'] = df['close'].rolling(window=100).mean()

        # 5. Add metadata
        df['year'] = df['date'].dt.year
        df['quarter'] = df['date'].dt.quarter
        df["quarter_end"] = df["date"] + pd.offsets.QuarterEnd(0)
        
        return df

In [77]:
#df_crypto['Date'] = pd.to_datetime(df_crypto['Date'])

df_crypto_clean = (
    df_crypto
    .groupby('ticker', group_keys=False)
    .apply(get_price_features)
    .reset_index(drop=True)
)



  .apply(get_price_features)


In [78]:
df_crypto_clean

Unnamed: 0,date,close,volume,ticker,ln_volume,daily_growth,weekly_growth,monthly_growth,vol_weekly,momentum_weekly,ma_20,ma_50,ma_100,year,quarter,quarter_end
0,2021-01-01,29331.6900,5.418293e+04,BTC/USDT,10.900121,,,,,,,,,2021,1,2021-03-31
1,2021-01-01,728.9100,6.751141e+05,ETH/USDT,13.422637,,,,,,,,,2021,1,2021-03-31
2,2021-01-01,1.8421,4.421991e+06,SOL/USDT,15.302101,,,,,,,,,2021,1,2021-03-31
3,2021-01-02,32178.3300,1.299939e+05,BTC/USDT,11.775243,0.097050,,,,,,,,2021,1,2021-03-31
4,2021-01-02,774.5600,1.352619e+06,ETH/USDT,14.117553,0.062628,,,,,,,,2021,1,2021-03-31
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
5110,2025-08-31,4391.8300,3.565862e+05,ETH/USDT,12.784331,0.004145,-0.081236,0.259053,0.041732,-1.946622,4484.0775,3997.6288,3279.1015,2025,3,2025-09-30
5111,2025-08-31,200.6200,3.303224e+06,SOL/USDT,15.010410,-0.010261,-0.026022,0.232991,0.053323,-0.488011,194.9555,183.8930,168.6992,2025,3,2025-09-30
5112,2025-09-01,109257.3400,1.288992e+04,BTC/USDT,9.464201,0.009340,-0.007762,-0.029224,0.017758,-0.437069,113793.4815,115925.1828,111669.6387,2025,3,2025-09-30
5113,2025-09-01,4358.6800,3.774613e+05,ETH/USDT,12.841223,-0.007548,-0.003999,0.284254,0.026622,-0.150211,4472.4855,4025.3618,3297.3843,2025,3,2025-09-30


In [79]:
df_stock = pd.read_csv('CryptoEquities_OHLCV.csv', parse_dates=['Date'])

In [80]:
df_stock = df_stock[df_stock['Ticker'].apply(lambda x: x in ['MSTR', 'COIN', 'RIOT', 'MARA', 'CLSK', 'WULF'])].reset_index(drop=True)

In [85]:
#df_stock['Date'] = pd.to_datetime(df_stock['Date'])

df_stock_clean = (
    df_stock
    .groupby('Ticker', group_keys=False)
    .apply(get_price_features)
    .reset_index(drop=True)
)

  .apply(get_price_features)


In [86]:
df_stock_clean

Unnamed: 0,date,close,volume,ticker,ln_volume,daily_growth,weekly_growth,monthly_growth,vol_weekly,momentum_weekly,ma_20,ma_50,ma_100,year,quarter,quarter_end
0,2021-01-04,42.522,14679020,MSTR,16.501930,,,,,,,,,2021,1,2021-03-31
1,2021-01-05,42.863,11006240,MSTR,16.213973,0.008019,,,,,,,,2021,1,2021-03-31
2,2021-01-06,48.055,15360310,MSTR,16.547297,0.121130,,,,,,,,2021,1,2021-03-31
3,2021-01-07,53.575,22844050,MSTR,16.944201,0.114868,,,,,,,,2021,1,2021-03-31
4,2021-01-08,53.164,16453870,MSTR,16.616071,-0.007671,,,,,,,,2021,1,2021-03-31
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
6945,2025-08-25,8.930,39878723,WULF,17.501353,-0.025109,0.025258,0.747554,0.042657,0.592132,6.6860,5.4774,4.3867,2025,3,2025-09-30
6946,2025-08-26,9.240,36027100,WULF,17.399782,0.034714,0.030100,0.897331,0.043178,0.697129,6.8870,5.5818,4.4493,2025,3,2025-09-30
6947,2025-08-27,9.130,29942098,WULF,17.214776,-0.011905,-0.026652,0.739048,0.039467,-0.675314,7.0930,5.6778,4.5141,2025,3,2025-09-30
6948,2025-08-28,9.440,41332648,WULF,17.537163,0.033954,0.075171,0.761194,0.030715,2.447343,7.3070,5.7876,4.5825,2025,3,2025-09-30


In [105]:
macro_df.columns

Index(['quarter_end', 'gdp_us', 'gdp_us_yoy', 'gdp_us_qoq', 'cpi_us',
       'cpi_us_yoy', 'cpi_us_qoq', 'unemployment_us', 'unemployment_us_yoy',
       'unemployment_us_qoq', 'interest_us', 'interest_us_yoy',
       'interest_us_qoq', 'gdp_de', 'gdp_de_yoy', 'gdp_de_qoq', 'cpi_de',
       'cpi_de_yoy', 'cpi_de_qoq', 'interest_eu', 'interest_eu_yoy',
       'interest_eu_qoq', 'year', 'quarter'],
      dtype='object')

In [106]:
all_data = pd.merge(final_df, macro_df,
                    on = ['quarter_end'],
                    how = 'left')

In [108]:
all_data.to_csv('all_data_raw.csv', index=False)

In [107]:
all_data#.info()

Unnamed: 0,date,close_crypto,volume_crypto,ticker_crypto,ln_volume_crypto,daily_growth_crypto,weekly_growth_crypto,monthly_growth_crypto,vol_weekly_crypto,momentum_weekly_crypto,...,gdp_de_yoy,gdp_de_qoq,cpi_de,cpi_de_yoy,cpi_de_qoq,interest_eu,interest_eu_yoy,interest_eu_qoq,year,quarter
0,2021-01-01,29331.69,5.418293e+04,BTC/USDT,10.900121,,,,,,...,-0.006255,-0.006158,107.6424,0.024072,0.004922,0.147856,-3.686229,1.339151,2021,1
1,2021-01-02,32178.33,1.299939e+05,BTC/USDT,11.775243,0.097050,,,,,...,-0.006255,-0.006158,107.6424,0.024072,0.004922,0.147856,-3.686229,1.339151,2021,1
2,2021-01-03,33000.05,1.209576e+05,BTC/USDT,11.703195,0.025536,,,,,...,-0.006255,-0.006158,107.6424,0.024072,0.004922,0.147856,-3.686229,1.339151,2021,1
3,2021-01-04,31988.71,1.408999e+05,BTC/USDT,11.855805,-0.030647,,,,,...,-0.006255,-0.006158,107.6424,0.024072,0.004922,0.147856,-3.686229,1.339151,2021,1
4,2021-01-04,31988.71,1.408999e+05,BTC/USDT,11.855805,-0.030647,,,,,...,-0.006255,-0.006158,107.6424,0.024072,0.004922,0.147856,-3.686229,1.339151,2021,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
22450,2025-08-29,205.08,8.607487e+06,SOL/USDT,15.968143,-0.043604,0.021875,0.153625,0.053398,0.409647,...,,,,,,3.173563,-0.042311,0.028787,2025,3
22451,2025-08-29,205.08,8.607487e+06,SOL/USDT,15.968143,-0.043604,0.021875,0.153625,0.053398,0.409647,...,,,,,,3.173563,-0.042311,0.028787,2025,3
22452,2025-08-30,202.70,3.894576e+06,SOL/USDT,15.175095,-0.011605,-0.007054,0.176983,0.053354,-0.132212,...,,,,,,3.173563,-0.042311,0.028787,2025,3
22453,2025-08-31,200.62,3.303224e+06,SOL/USDT,15.010410,-0.010261,-0.026022,0.232991,0.053323,-0.488011,...,,,,,,3.173563,-0.042311,0.028787,2025,3
