In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt


In [2]:
df= pd.read_csv("btc_data.csv")
df

Unnamed: 0,timestamp,open,high,low,close,volume
0,2019-09-08,10000.00,10412.65,10000.00,10391.63,3096.291
1,2019-09-09,10316.62,10475.54,10077.22,10307.00,14824.373
2,2019-09-10,10307.00,10382.97,9940.87,10102.02,9068.955
3,2019-09-11,10094.27,10293.11,9884.31,10159.55,10897.922
4,2019-09-12,10163.06,10450.13,10042.12,10415.13,15609.634
...,...,...,...,...,...,...
1551,2023-12-07,43775.10,44066.70,42815.00,43296.00,346421.958
1552,2023-12-08,43296.00,44779.30,43100.00,44209.50,281880.593
1553,2023-12-09,44209.60,44389.20,43555.00,43734.30,189339.656
1554,2023-12-10,43734.20,44099.20,43552.50,43810.00,144548.259


In [3]:
#df= data.drop("hour",axis=1)

In [4]:
df

Unnamed: 0,timestamp,open,high,low,close,volume
0,2019-09-08,10000.00,10412.65,10000.00,10391.63,3096.291
1,2019-09-09,10316.62,10475.54,10077.22,10307.00,14824.373
2,2019-09-10,10307.00,10382.97,9940.87,10102.02,9068.955
3,2019-09-11,10094.27,10293.11,9884.31,10159.55,10897.922
4,2019-09-12,10163.06,10450.13,10042.12,10415.13,15609.634
...,...,...,...,...,...,...
1551,2023-12-07,43775.10,44066.70,42815.00,43296.00,346421.958
1552,2023-12-08,43296.00,44779.30,43100.00,44209.50,281880.593
1553,2023-12-09,44209.60,44389.20,43555.00,43734.30,189339.656
1554,2023-12-10,43734.20,44099.20,43552.50,43810.00,144548.259


In [5]:
# Calculate indicators

# Simple Moving Average (SMA) - 50 days
df['SMA_50'] = df['close'].rolling(window=50).mean()

# Exponential Moving Average (EMA) - 50 days
df['EMA_50'] = df['close'].ewm(span=50, adjust=False).mean()

# Simple Moving Average (SMA) - 100 days
df['SMA_100'] = df['close'].rolling(window=100).mean()

# Exponential Moving Average (EMA) - 100 days
df['EMA_100'] = df['close'].ewm(span=100, adjust=False).mean()

# Simple Moving Average (SMA) - 200 days
df['SMA_200'] = df['close'].rolling(window=200).mean()

# Exponential Moving Average (EMA) - 200 days
df['EMA_200'] = df['close'].ewm(span=200, adjust=False).mean()

# Moving Average Convergence Divergence (MACD)
# Typically uses the difference between a 26-period EMA and a 12-period EMA
df['MACD'] = df['close'].ewm(span=12, adjust=False).mean() - df['close'].ewm(span=26, adjust=False).mean()
df['MACD_signal'] = df['MACD'].ewm(span=9, adjust=False).mean()

# Relative Strength Index (RSI) - 14 days
delta = df['close'].diff()
gain = np.where(delta > 0, delta, 0)
loss = np.where(delta < 0, -delta, 0)
average_gain = pd.Series(gain).rolling(window=14).mean()
average_loss = pd.Series(loss).rolling(window=14).mean()
rs = average_gain / average_loss
df['RSI'] = 100 - (100 / (1 + rs))

# Bollinger Bands
df['middle_band'] = df['close'].rolling(window=20).mean()
df['upper_band'] = df['middle_band'] + 1.96 * df['close'].rolling(window=20).std()
df['lower_band'] = df['middle_band'] - 1.96 * df['close'].rolling(window=20).std()

# Average True Range (ATR) - 14 days
df['high_low'] = df['high'] - df['low']
df['high_close'] = (df['high'] - df['close'].shift()).abs()
df['low_close'] = (df['low'] - df['close'].shift()).abs()
df['tr'] = df[['high_low', 'high_close', 'low_close']].max(axis=1)
df['ATR'] = df['tr'].rolling(window=14).mean()

# Stochastic Oscillator - %K with a 14-day period
df['L14'] = df['low'].rolling(window=14).min()
df['H14'] = df['high'].rolling(window=14).max()
df['%K'] = 100 * ((df['close'] - df['L14']) / (df['H14'] - df['L14']))

# Volume Weighted Average Price (VWAP)
df['VWAP'] = (df['volume'] * (df['high'] + df['low'] + df['close']) / 3).cumsum() / df['volume'].cumsum()

# On-Balance Volume (OBV)
df['OBV'] = np.where(df['close'] > df['close'].shift(), df['volume'], 
            np.where(df['close'] < df['close'].shift(), -df['volume'], 0)).cumsum()

# Williams %R - 14 days
df['%R'] = -100 * ((df['H14'] - df['close']) / (df['H14'] - df['L14']))

# Dropping intermediate calculation columns
df.drop(columns=['high_low', 'high_close', 'low_close', 'tr', 'L14', 'H14'], inplace=True)

df.head()

Unnamed: 0,timestamp,open,high,low,close,volume,SMA_50,EMA_50,SMA_100,EMA_100,...,MACD_signal,RSI,middle_band,upper_band,lower_band,ATR,%K,VWAP,OBV,%R
0,2019-09-08,10000.0,10412.65,10000.0,10391.63,3096.291,,10391.63,,10391.63,...,0.0,,,,,,,10268.093333,0.0,
1,2019-09-09,10316.62,10475.54,10077.22,10307.0,14824.373,,10388.311176,,10389.954158,...,-1.350222,,,,,,,10283.391431,-14824.373,
2,2019-09-10,10307.0,10382.97,9940.87,10102.02,9068.955,,10377.084072,,10384.252492,...,-6.743215,,,,,,,10235.865909,-23893.328,
3,2019-09-11,10094.27,10293.11,9884.31,10159.55,10897.922,,10368.553324,,10379.802938,...,-13.454203,,,,,,,10200.330286,-12995.406,
4,2019-09-12,10163.06,10450.13,10042.12,10415.13,15609.634,,10370.37986,,10380.502483,...,-16.531188,,,,,,,10230.13013,2614.228,


In [6]:
df.isna().sum()

timestamp        0
open             0
high             0
low              0
close            0
volume           0
SMA_50          49
EMA_50           0
SMA_100         99
EMA_100          0
SMA_200        199
EMA_200          0
MACD             0
MACD_signal      0
RSI             13
middle_band     19
upper_band      19
lower_band      19
ATR             13
%K              13
VWAP             0
OBV              0
%R              13
dtype: int64

In [7]:
df.to_csv("btc_indicators.csv",index=False)

In [8]:
# Convert to datetime
df['timestamp'] = pd.to_datetime(df['timestamp'])

# i will also add some seasonal features or lagg features
df['week_of_year'] = df['timestamp'].dt.isocalendar().week
df['day_of_week'] = df['timestamp'].dt.dayofweek  # Monday=0, Sunday=6
df['month_of_year'] = df['timestamp'].dt.month

In [9]:
# Calculate the rolling sum of the last 7 days and 30 days for the 'volume' column
df['volume_7_days'] = df['volume'].rolling(window=7).sum()
df['volume_30_days'] = df['volume'].rolling(window=30).sum()


In [10]:
df

Unnamed: 0,timestamp,open,high,low,close,volume,SMA_50,EMA_50,SMA_100,EMA_100,...,ATR,%K,VWAP,OBV,%R,week_of_year,day_of_week,month_of_year,volume_7_days,volume_30_days
0,2019-09-08,10000.00,10412.65,10000.00,10391.63,3096.291,,10391.630000,,10391.630000,...,,,10268.093333,0.000,,36,6,9,,
1,2019-09-09,10316.62,10475.54,10077.22,10307.00,14824.373,,10388.311176,,10389.954158,...,,,10283.391431,-14824.373,,37,0,9,,
2,2019-09-10,10307.00,10382.97,9940.87,10102.02,9068.955,,10377.084072,,10384.252492,...,,,10235.865909,-23893.328,,37,1,9,,
3,2019-09-11,10094.27,10293.11,9884.31,10159.55,10897.922,,10368.553324,,10379.802938,...,,,10200.330286,-12995.406,,37,2,9,,
4,2019-09-12,10163.06,10450.13,10042.12,10415.13,15609.634,,10370.379860,,10380.502483,...,,,10230.130130,2614.228,,37,3,9,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1551,2023-12-07,43775.10,44066.70,42815.00,43296.00,346421.958,36287.766,36400.776386,31528.112,33517.180425,...,1275.735714,84.260549,27999.142108,-3128257.440,-15.739451,49,3,12,2685400.806,9415076.997
1552,2023-12-08,43296.00,44779.30,43100.00,44209.50,281880.593,36598.018,36707.000841,31697.303,33728.909526,...,1312.278571,92.947409,28006.931340,-2846376.847,-7.052591,49,4,12,2624919.405,9362178.141
1553,2023-12-09,44209.60,44389.20,43555.00,43734.30,189339.656,36879.644,36982.581200,31875.366,33927.036070,...,1349.321429,87.065711,28012.114509,-3035716.503,-12.934289,49,5,12,2601048.750,8772639.860
1554,2023-12-10,43734.20,44099.20,43552.50,43810.00,144548.259,37157.936,37250.323114,32055.510,34122.738326,...,1339.685714,88.002673,28016.051241,-2891168.244,-11.997327,49,6,12,2523679.104,8617814.227


In [11]:
# Price Volume Trend (PVT)
df['PVT'] = (df['volume'] * ((df['close'] - df['close'].shift(1)) / df['close'].shift(1))).cumsum()

In [14]:
df

Unnamed: 0,timestamp,open,high,low,close,volume,SMA_50,EMA_50,SMA_100,EMA_100,...,%K,VWAP,OBV,%R,week_of_year,day_of_week,month_of_year,volume_7_days,volume_30_days,PVT
0,2019-09-08,10000.00,10412.65,10000.00,10391.63,3096.291,8867.1508,10391.630000,8466.4587,10391.630000,...,47.942931,10268.093333,0.000,-52.057069,36,6,9,93406.520,875066.779,-120.730500
1,2019-09-09,10316.62,10475.54,10077.22,10307.00,14824.373,8867.1508,10388.311176,8466.4587,10389.954158,...,47.942931,10283.391431,-14824.373,-52.057069,37,0,9,93406.520,875066.779,-120.730500
2,2019-09-10,10307.00,10382.97,9940.87,10102.02,9068.955,8867.1508,10377.084072,8466.4587,10384.252492,...,47.942931,10235.865909,-23893.328,-52.057069,37,1,9,93406.520,875066.779,-301.088936
3,2019-09-11,10094.27,10293.11,9884.31,10159.55,10897.922,8867.1508,10368.553324,8466.4587,10379.802938,...,47.942931,10200.330286,-12995.406,-52.057069,37,2,9,93406.520,875066.779,-239.026353
4,2019-09-12,10163.06,10450.13,10042.12,10415.13,15609.634,8867.1508,10370.379860,8466.4587,10380.502483,...,47.942931,10230.130130,2614.228,-52.057069,37,3,9,93406.520,875066.779,153.659372
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1551,2023-12-07,43775.10,44066.70,42815.00,43296.00,346421.958,36287.7660,36400.776386,31528.1120,33517.180425,...,84.260549,27999.142108,-3128257.440,-15.739451,49,3,12,2685400.806,9415076.997,407907.380379
1552,2023-12-08,43296.00,44779.30,43100.00,44209.50,281880.593,36598.0180,36707.000841,31697.3030,33728.909526,...,92.947409,28006.931340,-2846376.847,-7.052591,49,4,12,2624919.405,9362178.141,413854.764011
1553,2023-12-09,44209.60,44389.20,43555.00,43734.30,189339.656,36879.6440,36982.581200,31875.3660,33927.036070,...,87.065711,28012.114509,-3035716.503,-12.934289,49,5,12,2601048.750,8772639.860,411819.585949
1554,2023-12-10,43734.20,44099.20,43552.50,43810.00,144548.259,37157.9360,37250.323114,32055.5100,34122.738326,...,88.002673,28016.051241,-2891168.244,-11.997327,49,6,12,2523679.104,8617814.227,412069.785522


In [13]:
# Fill NaNs with the first non-NaN value in the column
for column in ["PVT",'SMA_50','SMA_100','SMA_200', 'RSI', 'middle_band', 'upper_band', 'lower_band', 'ATR', '%K', '%R',"volume_7_days","volume_30_days"]:
    first_non_nan = df[column].dropna().iloc[0]
    df[column].fillna(first_non_nan, inplace=True)

In [15]:
df.to_csv("btc_indicators_seasonal.csv",index=False)