In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import yfinance as yf
import plotly.graph_objects as go

import warnings
warnings.simplefilter("ignore", category=FutureWarning)

# Step 1: Data Collection

Obtain historical Bitcoin price data from Yahoo Finance from the last 5 years

In [None]:
crypto_symbol = 'BTC-USD'
start_date = '2019-01-01'
end_date = '2024-12-31'

df_btc = yf.download(crypto_symbol, start=start_date, end=end_date)
df_btc.head()

# Step 2: Data Preprocessing
Prepare the data through cleaning and preprocessing. Ensure the date column is in date-time format, remove duplicates, handle any missing values and check for outliers.

In [None]:
# Resetting the index
df_btc.reset_index(inplace=True)

# Check for missing values and remove them if any
df_btc.dropna(inplace=True)

# Check for duplicate entries and remove them if any
df_btc.drop_duplicates(inplace=True)

# Drop duplicate columns
df_btc.drop(columns=['Adj Close'], inplace=True)

df_btc.head()

In [None]:
# Check the data types of the columns
df_btc.info()

In [None]:
# Check summary statistics
df_btc.describe()

In [None]:
# Candlestick chart to represent price movements of Bitcoin
candlestick = go.Candlestick(x=df_btc['Date'], 
                             open=df_btc['Open'],  
                             high=df_btc['High'], 
                             low=df_btc['Low'], 
                             close=df_btc['Close'])

fig = go.Figure(data=[candlestick])

fig.update_layout(title='Bitcoin Candlestick Chart', xaxis_title='Date', yaxis_title='Price', xaxis_rangeslider_visible=False)

fig.show()


## Step 3: Feature Engineering
Generate meaningful features to capture Bitcoin price patterns. Common features include rolling averages, technical indicators, and sentiment scores from news or social media.

### 1. Return Metrics
- **Log Return:** The logarithmic return (also known as continuously compounded returns) is the difference in the log of closing prices, which approximates the continuously compounded return. Positive is typically good, indicating growth, while negative indicates a decline.

In [None]:
# Log Returns
df_btc['Log_Return'] = np.log(df_btc['Close'] / df_btc['Close'].shift(1))

### 2. Volatility Measures
- **Historical Volatility:** The standard deviation of log returns, representing how large the returns can be. Lower is often preferred for less risk, while higher can indicate more risk.
- **Average True Range (ATR):** Represents market volatility and reflects the degree of price variation. Lower values suggest less volatility, while higher values can indicate increased risk or trading opportunity.

In [None]:
# Calculate Historical Volatility
df_btc['Hist_Volatility'] = df_btc['Log_Return'].rolling(window=21).std() * np.sqrt(252) # Assuming 21 trading days in a month

# Average True Range (ATR)
df_btc['True_Range'] = np.maximum(df_btc['High'] - df_btc['Low'], 
                                        np.maximum(np.abs(df_btc['High'] - df_btc['Close'].shift(1)), 
                                                   np.abs(df_btc['Low'] - df_btc['Close'].shift(1))))
df_btc['ATR'] = df_btc['True_Range'].rolling(window=14).mean()

### 3. Trend Indicators
- **Weighted Close Price:** Averages the high, low, and double the close price. General sentiment depends on context and strategy.
- **Momentum:** Indicates the strength and speed of a price movement. Positive values (upward trend) are generally good, while negative values indicate a potentially unfavorable downward trend.
- **Moving Averages (MA7, MA30, EMA12, EMA26):** Averages prices over a specified period, smoothing price data. Crossings of different MAs (short-term vs. long-term) are used to identify potential trend reversals.
- **Moving Average Convergence Divergence (MACD):** Indicates the relationship between two moving averages of a security's price. Values above the signal line or above zero are often seen as positive.

In [None]:
# Weighted Close Price
df_btc['Weighted_Close'] = (df_btc['High'] + df_btc['Low'] + df_btc['Close'] * 2) / 4

# Momentum: Rate of change of the price
df_btc['Momentum'] = df_btc['Close'] - df_btc['Close'].shift(4)

# Calculate moving averages (MA) - short term
df_btc['MA7'] = df_btc['Close'].rolling(window=7).mean()

# Calculate moving averages (MA) - long term
df_btc['MA30'] = df_btc['Close'].rolling(window=30).mean() 

# Exponential Moving Average (EMA) - short term
df_btc['EMA12'] = df_btc['Close'].ewm(span=12, adjust=False).mean()

# Exponential Moving Average (EMA) - long term
df_btc['EMA26'] = df_btc['Close'].ewm(span=26, adjust=False).mean()

# Moving Average Convergence Divergence (MACD)
df_btc['MACD'] = df_btc['EMA12'] - df_btc['EMA26']

# Signal line (9-day EMA of MACD)
df_btc['Signal_Line'] = df_btc['MACD'].ewm(span=9, adjust=False).mean()

# MACD Histogram
df_btc['MACD_Histogram'] = df_btc['MACD'] - df_btc['Signal_Line']


### 4. Oscillators
- **Relative Strength Index (RSI):** Measures the speed and change of price movements. Values above 70 might indicate an overbought condition (bad), and below 30, an oversold condition (good).
- **Money Flow Index (MFI):** Utilizes price and volume to predict the reliability of a trend. Above 80 can suggest overbought conditions (bad), while below 20 can indicate oversold conditions (good).

In [None]:
def compute_rsi(data, window):
    diff = data.diff(1)
    gain = (diff.where(diff > 0, 0)).fillna(0)
    loss = (-diff.where(diff < 0, 0)).fillna(0)

    avg_gain = gain.rolling(window=window).mean()
    avg_loss = loss.rolling(window=window).mean()

    rs = avg_gain / avg_loss
    rsi = 100 - (100 / (1 + rs))
    
    return rsi

In [None]:
# Relative Strength Index (RSI)
df_btc['RSI'] = compute_rsi(df_btc['Close'], window=14)

# Money Flow Index (MFI)
df_btc['Typical_Price'] = (df_btc['High'] + df_btc['Low'] + df_btc['Close']) / 3
df_btc['Money_Flow'] = df_btc['Typical_Price'] * df_btc['Volume']
df_btc['Money_Flow_Positive'] = np.where(df_btc['Typical_Price'] > df_btc['Typical_Price'].shift(1), 
                                               df_btc['Money_Flow'], 0)
df_btc['Money_Flow_Negative'] = np.where(df_btc['Typical_Price'] < df_btc['Typical_Price'].shift(1), 
                                               df_btc['Money_Flow'], 0)
df_btc['Money_Flow_Pos_14'] = df_btc['Money_Flow_Positive'].rolling(window=14).sum()
df_btc['Money_Flow_Neg_14'] = df_btc['Money_Flow_Negative'].rolling(window=14).sum()
df_btc['Money_Flow_Index'] = 100 * (df_btc['Money_Flow_Pos_14'] / 
                                          (df_btc['Money_Flow_Pos_14'] + df_btc['Money_Flow_Neg_14']))

### 5. Other Metrics
- **Bollinger Bands:** Identify the volatility and overbought/oversold levels. Prices near the upper band might suggest a sell signal (bad), and near the lower band a buy signal (good).
- **Percentage Change in Close Price and Volume:** Indicates the rate of change from one period to another for volume and price. Positive changes might indicate increased interest (good), while negative changes suggest the opposite.

In [None]:
# Bollinger Bands
df_btc['Rolling_Mean'] = df_btc['Close'].rolling(window=20).mean()
df_btc['Bollinger_High'] = df_btc['Rolling_Mean'] + (df_btc['Close'].rolling(window=20).std() * 2)
df_btc['Bollinger_Low'] = df_btc['Rolling_Mean'] - (df_btc['Close'].rolling(window=20).std() * 2)

# Calculate the percentage change in 'Close' price
df_btc['Pct_Change_Close'] = df_btc['Close'].pct_change() * 100

# Calculate the percentage change in 'Volume'
df_btc['Pct_Change_Volume'] = df_btc['Volume'].pct_change() * 100

In [None]:
# Drop NaN values generated by moving averages and other features
df_btc.dropna(inplace=True)

# Save the data
df_btc.to_csv('BTC_Price_and_Indicators_Data.csv', index=False)

df_btc

In [None]:
import plotly.express as px

# Plot the closing price over time
fig_1 = px.line(df_btc, x='Date', y='Close', title='Bitcoin Closing Price Over Time')
fig_1.update_xaxes(title_text='Date')
fig_1.update_yaxes(title_text='Close Price (USD)')

In [None]:
# Plot the Volume over time
fig_2 = px.line(df_btc, x='Date', y='Volume', title='Bitcoin Trading Volume Over Time')
fig_2.update_xaxes(title_text='Date')
fig_2.update_yaxes(title_text='Volume')
fig_2.show()

In [None]:
# Plot the Moving Averages and Closing Price over time
fig_3 = px.line(df_btc, x='Date', y=['Close', 'MA7', 'MA30'], title='Bitcoin Moving Averages and Close Price')
fig_3.update_xaxes(title_text='Date')
fig_3.update_yaxes(title_text='Price (USD)')
fig_3.show()

In [None]:
# Plot the Historical Volatility over time
fig_4 = px.line(df_btc, x='Date', y='Hist_Volatility', title='Bitcoin Historical Volatility Over Time')
fig_4.update_xaxes(title_text='Date')
fig_4.update_yaxes(title_text='Historical Volatility')
fig_4.show()