In [19]:
import pandas as pd
from ta import add_all_ta_features
from ta.trend import SMAIndicator, EMAIndicator, MACD
from ta.momentum import RSIIndicator
from ta.volatility import BollingerBands, AverageTrueRange

In [None]:
import yfinance as yf

# Initialize the Ticker for Bitcoin in USD
btc = yf.Ticker("BTC-USD")

# Retrieve general information
info = btc.info
print("Info:", info)

# Retrieve historical market data for the past 1 year
hist = btc.history(period="10y")
print("Historical Market Data:", hist)

# Save historical data to CSV
hist.to_csv("../data/raw/historical_data.csv")

In [20]:
df = pd.read_csv("../data/raw/historical_data.csv", parse_dates=['Date'])
df.set_index('Date', inplace=True)

In [21]:
data = pd.DataFrame()

In [None]:
data['btc_close'] = df['Close']

In [None]:
data['btc_sma_14'] = SMAIndicator(close=df['Close'], window=14).sma_indicator()

In [None]:
data['btc_ema_14'] = EMAIndicator(close=df['Close'], window=14).ema_indicator()

In [None]:
data['btc_rsi_14'] = RSIIndicator(close=df['Close'], window=14).rsi()

In [None]:
macd = MACD(close=df['Close'])
data['btc_macd'] = macd.macd()
data['btc_macd_signal'] = macd.macd_signal()
data['btc_macd_diff'] = macd.macd_diff()

In [None]:
bb = BollingerBands(close=df['Close'], window=20, window_dev=2)
data['btc_bb_high'] = bb.bollinger_hband()
data['btc_bb_low'] = bb.bollinger_lband()
data['btc_bb_mid'] = bb.bollinger_mavg()
data['btc_bb_width'] = bb.bollinger_wband()

In [None]:
data['btc_atr_14'] = AverageTrueRange(high=df['High'], low=df['Low'], close=df['Close'], window=14).average_true_range()

In [None]:
data['btc_trading_volume'] = df['Volume']

In [None]:
data['btc_volatility_index'] = df['High'] - df['Low']

In [31]:
data.to_csv('data/btc_usd_with_indicators.csv')

In [None]:
import pandas as pd

# Load your CSV file
df = pd.read_csv('data/btc_usd_with_indicators.csv')

# List of columns to handle
columns = ['SMA_14', 'EMA_14', 'RSI_14', 'MACD', 'MACD_Signal', 'MACD_Diff', 
           'BB_High', 'BB_Low', 'BB_Mid', 'BB_Width', 'ATR_14']

# Replace initial NaN or zero values with the mean of the first non-null, non-zero values
for col in columns:
    # Calculate the first valid (non-null and non-zero) mean value
    valid_values = df[col][(df[col].notnull()) & (df[col] != 0)]
    if not valid_values.empty:
        first_valid_mean = valid_values.iloc[0]
    else:
        first_valid_mean = 0  # Fallback in case the column is completely null or zero
    
    # Replace initial NaN or zero values
    df[col] = df[col].where((df[col].notnull()) & (df[col] != 0), first_valid_mean)

# Save the updated dataset to a new CSV file
df.to_csv('../data/processed/btc_usd_with_indicators_updated.csv', index=False)

print("Initial NaN or zero values replaced and updated file saved as 'updated_file.csv'.")

Initial NaN or zero values replaced and updated file saved as 'updated_file.csv'.
