In [1]:
import pandas as pd
import numpy as np

df = pd.read_csv("../data/nettoyé/merge_btc_sentiment_4h.csv", parse_dates=["date"])


In [2]:
df = df.copy()

# Returns
df["return_4h"] = df["close"].pct_change()
df["log_return"] = np.log(df["close"] / df["close"].shift(1))

# Volatility / structure
df["high_low_pct"] = (df["high"] - df["low"]) / df["close"]
df["close_open_pct"] = (df["close"] - df["open"]) / df["open"]

# Order flow
df["buy_pressure"] = df["taker_buy_base_vol"] / df["volume"]


In [3]:
# News impact
df["sentiment_weighted"] = df["news_sentiment_mean"] * df["news_count"]
df["sentiment_weighted_21_24"] = (
    df["news21_24_sentiment_mean"] * df["news21_24_count"]
)

# Momentum émotionnel
df["sentiment_momentum"] = df["news_sentiment_mean"].diff()
df["fear_greed_delta"] = df["fear_greed_index"].diff()


In [8]:
# Ensure date is datetime UTC
df["date"] = pd.to_datetime(df["date"], utc=True, errors="coerce")

# Time features
df["hour"] = df["date"].dt.hour
df["hour_sin"] = np.sin(2 * np.pi * df["hour"] / 24)
df["hour_cos"] = np.cos(2 * np.pi * df["hour"] / 24)

df["day_of_week"] = df["date"].dt.dayofweek


In [9]:
df = df.dropna().reset_index(drop=True)
df.info()
df.head()


<class 'pandas.core.frame.DataFrame'>
RangeIndex: 17467 entries, 0 to 17466
Data columns (total 30 columns):
 #   Column                    Non-Null Count  Dtype              
---  ------                    --------------  -----              
 0   date                      17467 non-null  datetime64[ns, UTC]
 1   open                      17467 non-null  float64            
 2   high                      17467 non-null  float64            
 3   low                       17467 non-null  float64            
 4   close                     17467 non-null  float64            
 5   volume                    17467 non-null  float64            
 6   n_trades                  17467 non-null  int64              
 7   taker_buy_base_vol        17467 non-null  float64            
 8   taker_buy_quote_vol       17467 non-null  float64            
 9   fear_greed_index          17467 non-null  float64            
 10  news_sentiment_mean       17467 non-null  float64            
 11  news_sentiment_

Unnamed: 0,date,open,high,low,close,volume,n_trades,taker_buy_base_vol,taker_buy_quote_vol,fear_greed_index,...,close_open_pct,buy_pressure,sentiment_weighted,sentiment_weighted_21_24,sentiment_momentum,fear_greed_delta,hour,hour_sin,hour_cos,day_of_week
0,2018-01-01 04:00:00+00:00,13434.98,13818.55,13322.15,13570.35,1302.214836,18061,588.516222,7998796.0,0.0,...,0.010076,0.451935,0.0,0.0,0.0,0.0,4,0.8660254,0.5,0
1,2018-01-01 08:00:00+00:00,13569.98,13735.24,13001.13,13220.56,1319.755931,17200,582.668293,7792109.0,0.0,...,-0.025749,0.441497,0.0,0.0,0.0,0.0,8,0.8660254,-0.5,0
2,2018-01-01 12:00:00+00:00,13220.56,13330.0,12750.0,13247.0,1831.933153,21357,830.314747,10833040.0,0.0,...,0.002,0.453245,0.0,0.0,0.0,0.0,12,1.224647e-16,-1.0,0
3,2018-01-01 16:00:00+00:00,13247.0,13290.65,12940.0,13240.37,1092.337234,13178,557.453779,7324320.0,0.0,...,-0.0005,0.510331,0.0,0.0,0.0,0.0,16,-0.8660254,-0.5,0
4,2018-01-01 20:00:00+00:00,13222.03,13599.7,13222.03,13380.0,1387.469883,16361,663.467239,8923949.0,0.0,...,0.011947,0.478185,0.0,0.0,0.0,0.0,20,-0.8660254,0.5,0
