In [6]:
import pandas as pd

pd.set_option('display.max_rows', None)        # Show all rows
pd.set_option('display.max_columns', None)     # Show all columns
pd.set_option('display.width', None)           # Disable line wrapping
pd.set_option('display.max_colwidth', None)    # Show full column content (especially for strings)

def load_and_sort_csv(path):
    df = pd.read_csv(path, parse_dates=['Date'], index_col='Date')
    df.index = df.index.strftime('%Y-%m-%d')
    return df.sort_index(ascending=True)

In [7]:
cbbi = load_and_sort_csv('data/Bitcoin Bull Run Index (CBBI).csv')
# add postfix _cbbi before the column names
cbbi.columns = [f"{col}_cbbi" for col in cbbi.columns]
cscsi20 = load_and_sort_csv('data/cscsi20_CompassSESAMmCryptoSentimentIndex.csv')
dataset = load_and_sort_csv('data/dataset.csv')
fear_greed = load_and_sort_csv('data/fear_greed_index.csv')
sentiment = load_and_sort_csv('data/sentiment_grouped.csv')

In [9]:
df = pd.concat([cbbi, cscsi20, dataset, fear_greed, sentiment], axis=1, join='inner')
# move df['btc_close'] to the end of df
df = pd.concat([df.drop(columns=['btc_price']), df['btc_price']], axis=1)

# export the df into csv in current dir
df.to_csv('data/final_dataset.csv')

In [11]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import StandardScaler, MinMaxScaler, RobustScaler
from sklearn.model_selection import train_test_split # For a basic split, but time series needs careful handling
from statsmodels.tsa.stattools import adfuller
import matplotlib.pyplot as plt
import seaborn as sns

# --- 2. Handle Missing Values (NaNs) ---
# Strategy: Forward fill is common for time series.
# You might also consider backward fill, interpolation, or more complex imputation.
print(f"\nMissing values before handling:\n{df.isnull().sum()[df.isnull().sum() > 0]}")
df.ffill(inplace=True)
# You might need to handle cases where the first row(s) still have NaNs after ffill
df.bfill(inplace=True) # Fill any remaining NaNs from the beginning
print(f"\nMissing values after ffill and bfill:\n{df.isnull().sum().sum()}")

df.head(3)


Missing values before handling:
Series([], dtype: int64)

Missing values after ffill and bfill:
0


Unnamed: 0_level_0,Price_cbbi,PiCycle_cbbi,RUPL_cbbi,RHODL_cbbi,Puell_cbbi,2YMA_cbbi,Trolololo_cbbi,MVRV_cbbi,ReserveRisk_cbbi,Woobull_cbbi,Confidence_cbbi,CSCSI20,active_addresses_blockchain,hash_rate_blockchain,miner_revenue_blockchain,difficulty_blockchain,estimated_transaction_volume_usd_blockchain,Gold_Price,Gold_Share,Gold_Volatility,Oil_Crude_Price,Oil_Brent_Price,Oil_Volatility,DJI,GSPC,IXIC,NYFANG,CBOE_Volatility,EM_ETF,DXY,EURUSD,btc_sma_14,btc_ema_14,btc_rsi_14,btc_macd,btc_macd_signal,btc_macd_diff,btc_bb_high,btc_bb_low,btc_bb_mid,btc_bb_width,btc_atr_14,btc_trading_volume,btc_volatility_index,Fear Greed,positive_sentiment,negative_sentiment,bullish_sentiment,bearish_sentiment,risk_uncertainty_sentiment,problem_malicious_sentiment,active_trading_sentiment,long_term_investment_sentiment,market_narrative_sentiment,core_technology_sentiment,development_ecosystem_sentiment,news_events_sentiment,regulations_sentiment,community_social_sentiment,price_sentiment,volume_sentiment,marketcap_sentiment,btc_price
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1,Unnamed: 25_level_1,Unnamed: 26_level_1,Unnamed: 27_level_1,Unnamed: 28_level_1,Unnamed: 29_level_1,Unnamed: 30_level_1,Unnamed: 31_level_1,Unnamed: 32_level_1,Unnamed: 33_level_1,Unnamed: 34_level_1,Unnamed: 35_level_1,Unnamed: 36_level_1,Unnamed: 37_level_1,Unnamed: 38_level_1,Unnamed: 39_level_1,Unnamed: 40_level_1,Unnamed: 41_level_1,Unnamed: 42_level_1,Unnamed: 43_level_1,Unnamed: 44_level_1,Unnamed: 45_level_1,Unnamed: 46_level_1,Unnamed: 47_level_1,Unnamed: 48_level_1,Unnamed: 49_level_1,Unnamed: 50_level_1,Unnamed: 51_level_1,Unnamed: 52_level_1,Unnamed: 53_level_1,Unnamed: 54_level_1,Unnamed: 55_level_1,Unnamed: 56_level_1,Unnamed: 57_level_1,Unnamed: 58_level_1,Unnamed: 59_level_1,Unnamed: 60_level_1,Unnamed: 61_level_1,Unnamed: 62_level_1,Unnamed: 63_level_1
2017-03-01,1179.77,0.6505,0.8657,0.4903,0.4827,0.5251,0.0458,0.6642,0.3038,0.454,0.498,100.0,531643.0,3155225.0,2448422.0,440779900000.0,283137400.0,1248.900024,119.059998,14.41,53.830002,55.599998,24.67,21115.550781,2395.959961,5904.029785,1564.329956,12.54,32.264683,101.779999,1.056915,1122.8957,1132.690334,77.52711,55.658198,45.465383,10.192815,1237.442187,934.109498,1085.775842,27.936953,35.208378,229056992,42.810059,81.0,284.0,204.714286,120.0,47.0,62.8,69.714286,104.5,305.75,21.4,157.0,188.545455,95.875,128.0,45.6,1012.0,52.0,55.0,1222.5
2017-03-02,1194.63,0.6518,0.874,0.4958,0.5234,0.5285,0.0504,0.6869,0.3014,0.4577,0.5078,106.98,565742.0,3440072.0,2775953.0,440779900000.0,431801400.0,1231.900024,117.580002,13.89,52.610001,55.080002,25.940001,21002.970703,2381.919922,5861.220215,1549.859985,11.81,31.696007,102.199997,1.053585,1138.86499,1148.466291,79.815674,60.179446,48.408195,11.77125,1259.793827,937.991458,1098.892642,29.284241,36.015638,368275008,46.51001,78.0,343.6,244.428571,185.0,63.0,81.2,79.571429,133.5,350.875,29.0,156.875,201.363636,94.375,142.8,50.6,1356.0,63.0,84.0,1251.01001
2017-03-03,1225.1999,0.6532,0.8802,0.5027,0.5163,0.5361,0.0605,0.6834,0.3131,0.4651,0.5123,110.47,583123.0,3458650.0,2741613.0,445545600000.0,422240200.0,1225.5,117.510002,13.66,53.330002,55.900002,25.59,21005.710938,2383.120117,5870.75,1550.199951,10.96,31.938534,101.540001,1.050972,1155.206421,1165.336118,81.520321,64.948862,51.716329,13.232533,1284.394771,940.444511,1112.419641,30.919111,35.557385,315739008,29.600098,77.0,313.6,222.0,151.0,63.0,76.4,70.714286,124.0,336.125,22.4,151.875,192.818182,94.75,137.4,42.6,1235.0,45.0,74.0,1274.98999
