In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import plotly.graph_objects as go
from plotly.subplots import make_subplots
from plotly import express as px_colors
import seaborn as sns
import yfinance as yf
import ta  # Technical Analysis library for financial indicators
from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense, Dropout
from tensorflow.keras.callbacks import EarlyStopping, ReduceLROnPlateau
from tensorflow.keras.regularizers import l2
import plotly.express as px

In [2]:
# Data Collection
data = yf.download('BTC-USD', start='2022-01-01', end='2023-11-15')
data = data[['Open', 'High', 'Low', 'Close', 'Volume']]  # Using multiple features


[*********************100%***********************]  1 of 1 completed


In [3]:
# Save the DataFrame to a CSV file
data.to_csv('bitcoin_trading_data.csv', index=True)

In [4]:
# Read the DataFrame from the CSV file
data = pd.read_csv('bitcoin_trading_data.csv', index_col=0)

In [5]:
# Preprocessing
scaler = MinMaxScaler(feature_range=(0, 1))
scaled_data = scaler.fit_transform(data)

In [6]:
def create_dataset(dataset, time_step=1):
    X, Y = [], []
    for i in range(len(dataset) - time_step - 1):
        X.append(dataset[i:(i + time_step), :])
        Y.append(dataset[i + time_step, 3])  # Target is still the 'Close' price
    return np.array(X), np.array(Y)

time_step = 60
X, y = create_dataset(scaled_data, time_step)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)




In [7]:
# LSTM Model
model = Sequential()
model.add(LSTM(100, return_sequences=True, input_shape=(time_step, X_train.shape[2])))
model.add(Dropout(0.2))
model.add(LSTM(100, return_sequences=False))
model.add(Dropout(0.2))
model.add(Dense(1))


In [8]:
# Compile the model
model.compile(optimizer='adam', loss='mean_squared_error')

In [9]:
# Define early stopping callback
early_stopping = EarlyStopping(monitor='val_loss', patience=7, restore_best_weights=True)
reduce_lr = ReduceLROnPlateau(monitor='val_loss', factor=0.2, patience=5, min_lr=0.001)

# Training with potentially adjusted batch size and epochs
model.fit(X_train, y_train, epochs=200, batch_size=32, validation_split=0.1, callbacks=[early_stopping, reduce_lr])

# Prediction and Evaluation
y_pred = model.predict(X_test)
y_test_scaled_back = scaler.inverse_transform(np.concatenate((np.zeros_like(y_test).reshape(-1,1), y_test.reshape(-1,1), np.zeros((len(y_test), scaled_data.shape[1] - 2))), axis=1))[:, 1]
dummy_test = np.zeros((len(y_pred), scaled_data.shape[1]))
dummy_test[:, 3] = y_pred.flatten()
y_pred_transformed = scaler.inverse_transform(dummy_test)[:, 3]

r_squared = r2_score(y_test_scaled_back, y_pred_transformed)
mse = mean_squared_error(y_test_scaled_back, y_pred_transformed)
mae = mean_absolute_error(y_test_scaled_back, y_pred_transformed)

print("R-Squared score:", r_squared)
print("MSE:", mse)
print("MAE:", mae)


Epoch 1/200
Epoch 2/200
Epoch 3/200
Epoch 4/200
Epoch 5/200
Epoch 6/200
Epoch 7/200
Epoch 8/200
Epoch 9/200
Epoch 10/200
Epoch 11/200
Epoch 12/200
Epoch 13/200
Epoch 14/200
Epoch 15/200
Epoch 16/200
Epoch 17/200
Epoch 18/200
Epoch 19/200
Epoch 20/200
Epoch 21/200
Epoch 22/200
Epoch 23/200
Epoch 24/200
Epoch 25/200
Epoch 26/200
Epoch 27/200
Epoch 28/200
Epoch 29/200
Epoch 30/200
Epoch 31/200
Epoch 32/200
Epoch 33/200
Epoch 34/200
Epoch 35/200
Epoch 36/200
Epoch 37/200
R-Squared score: 0.9695075859293983
MSE: 1387363.674619973
MAE: 877.3238522116025


In [10]:
# Predict the next day's closing price
last_sequence = scaled_data[-time_step:]
last_sequence = last_sequence.reshape(1, time_step, scaled_data.shape[1])
next_day_prediction = model.predict(last_sequence)
next_day_dummy = np.zeros((1, scaled_data.shape[1]))
next_day_dummy[:, 3] = next_day_prediction.flatten()
predicted_close_price = scaler.inverse_transform(next_day_dummy)[:, 3]

print("Predicted Next Day Price:", predicted_close_price)

Predicted Next Day Price: [36505.19352215]


In [11]:
# Predict the next day
last_sequence = scaled_data[-time_step:]
last_sequence = last_sequence.reshape(1, time_step, 5)
next_day_prediction = model.predict(last_sequence)
predicted_close_price = scaler.inverse_transform(np.concatenate((next_day_prediction, np.zeros((1, 4))), axis=1))[:, 0]



In [12]:
# Predict the next day's closing price
last_sequence = scaled_data[-time_step:]
last_sequence = last_sequence.reshape(1, time_step, 5)
next_day_prediction = model.predict(last_sequence)

# Adjust the shape of next_day_prediction for inverse transformation
next_day_dummy = np.zeros((1, scaled_data.shape[1]))
next_day_dummy[:, 3] = next_day_prediction.flatten()
predicted_close_price = scaler.inverse_transform(next_day_dummy)[:, 3]

# Print the predicted next day price
print("Predicted Next Day Price:", predicted_close_price)
print("Actual values:", y_test[:10])
print("Predicted values:", y_pred_transformed[:10])
print("X_test shape:", X_test.shape)


Predicted Next Day Price: [36505.19352215]
Actual values: [0.1611059  0.38842895 0.17996516 0.03325575 0.15085655 0.14727457
 0.1091179  0.10283383 0.33608436 0.25256181]
Predicted values: [20184.81578492 28023.00861537 21453.06001748 17277.95295759
 19770.24395175 20348.49388983 19104.33180837 19134.24852047
 26764.43374937 21814.47053655]
X_test shape: (125, 60, 5)


In [13]:
# Add the predicted price to the DataFrame
# Convert the index to datetime if it's not already
data.index = pd.to_datetime(data.index)

# Now, adding a Timedelta should work
next_day = data.index[-1] + pd.Timedelta(days=1)
data.loc[next_day] = [np.nan, np.nan, np.nan, predicted_close_price, np.nan]

In [14]:
# Convert next_day to a format that matches your data index
next_day_formatted = pd.to_datetime(next_day)

# Ensure data index is in datetime format for proper plotting
data.index = pd.to_datetime(data.index)

fig = make_subplots(specs=[[{"secondary_y": True}]])

# Historical data traces
fig.add_trace(go.Scatter(x=data.index[-time_step:], y=data['Close'].dropna()[-time_step:], name='Historical Daily Close Price'), secondary_y=False)
fig.add_trace(go.Scatter(x=data.index[-time_step:], y=data['Volume'].dropna()[-time_step:], name='Trading Volume', marker=dict(color='orange')), secondary_y=True)

# Prediction trace
# Check if next_day_formatted is within the range of your data index
if next_day_formatted in data.index:
    fig.add_trace(go.Scatter(x=[next_day_formatted], y=[predicted_close_price], mode='markers', name='Predicted Next Day Price', marker=dict(color='red', size=10)), secondary_y=False)
else:
    print("Next day date is out of range")

# Figure layout settings
fig.update_layout(title_text="BTC Price and Volume Trend")
fig.update_xaxes(title_text="Date")
fig.update_yaxes(title_text="Close Price (USD)", secondary_y=False)
fig.update_yaxes(title_text="Volume", secondary_y=True)

fig.show()

In [15]:
# Initialize a DataFrame to store predictions
predicted_prices = pd.DataFrame(index=data.index, columns=['Predicted_Close'])

# Iterate over the dataset to generate predictions
for i in range(len(data) - time_step):
    input_sequence = scaled_data[i:(i + time_step)]
    input_sequence = input_sequence.reshape(1, time_step, scaled_data.shape[1])
    predicted_close = model.predict(input_sequence)
    
    # Inverse transform the predicted value
    next_day_dummy = np.zeros((1, scaled_data.shape[1]))
    next_day_dummy[:, 3] = predicted_close.flatten()
    predicted_close_price = scaler.inverse_transform(next_day_dummy)[:, 3]
    
    # Store the prediction
    predicted_prices.iloc[i + time_step] = predicted_close_price

# Plot the BTC Price and Volume Trend with Predictions
fig = make_subplots(specs=[[{"secondary_y": True}]])

# Historical data traces
fig.add_trace(go.Scatter(x=data.index, y=data['Close'], name='Historical Daily Close Price'), secondary_y=False)
fig.add_trace(go.Scatter(x=data.index, y=data['Volume'], name='Trading Volume', marker=dict(color='orange')), secondary_y=True)

# Prediction trace
fig.add_trace(go.Scatter(x=predicted_prices.index, y=predicted_prices['Predicted_Close'], name='Predicted Close Price', marker=dict(color='green')), secondary_y=False)

# Figure layout settings
fig.update_layout(title_text="BTC Price and Volume Trend with Predictions")
fig.update_xaxes(title_text="Date")
fig.update_yaxes(title_text="Close Price (USD)", secondary_y=False)
fig.update_yaxes(title_text="Volume", secondary_y=True)

fig.show()



In [35]:
# Initialize a DataFrame to store predictions
predicted_prices = pd.DataFrame(index=data.index, columns=['Predicted_Close'])

# Iterate over the dataset to generate predictions
for i in range(len(data) - time_step):
    input_sequence = scaled_data[i:(i + time_step)]
    input_sequence = input_sequence.reshape(1, time_step, scaled_data.shape[1])
    predicted_close = model.predict(input_sequence)
    
    # Inverse transform the predicted value
    next_day_dummy = np.zeros((1, scaled_data.shape[1]))
    next_day_dummy[:, 3] = predicted_close.flatten()
    predicted_close_price = scaler.inverse_transform(next_day_dummy)[:, 3]
    
    # Store the prediction
    predicted_prices.iloc[i + time_step] = predicted_close_price

# Predict the next day's closing price
last_sequence = scaled_data[-time_step:]
last_sequence = last_sequence.reshape(1, time_step, scaled_data.shape[1])
next_day_prediction = model.predict(last_sequence)

# Inverse transform the predicted value for the next day
next_day_dummy = np.zeros((1, scaled_data.shape[1]))
next_day_dummy[:, 3] = next_day_prediction.flatten()
next_day_predicted_close = scaler.inverse_transform(next_day_dummy)[:, 3]

# Add the next day's prediction to the predicted_prices DataFrame
next_day_index = data.index[-1] + pd.Timedelta(days=1)
predicted_prices.loc[next_day_index] = next_day_predicted_close

# Plot the BTC Price and Volume Trend with Predictions
fig = make_subplots(specs=[[{"secondary_y": True}]])

# Historical data traces
fig.add_trace(go.Scatter(x=data.index, y=data['Close'], name='Historical Daily Close Price'), secondary_y=False)
fig.add_trace(go.Scatter(x=data.index, y=data['Volume'], name='Trading Volume', marker=dict(color='orange')), secondary_y=True)

# Prediction trace for historical data
fig.add_trace(go.Scatter(x=predicted_prices.index[:-1], y=predicted_prices['Predicted_Close'][:-1], name='Predicted Close Price', marker=dict(color='green')), secondary_y=False)

# Future prediction as a red dot
fig.add_trace(go.Scatter(x=[next_day_index], y=[next_day_predicted_close[0]], mode='markers', name='Future Prediction', marker=dict(color='red', size=10)), secondary_y=False)

# Figure layout settings
fig.update_layout(title_text="BTC Price and Volume Trend with Predictions")
fig.update_xaxes(title_text="Date")
fig.update_yaxes(title_text="Close Price (USD)", secondary_y=False)
fig.update_yaxes(title_text="Volume", secondary_y=True)

fig.show()




In [16]:
# EDA: Volatility Analysis
data.index = pd.to_datetime(data.index)

# Calculate the 30-day rolling standard deviation
data['30d_Rolling_Std'] = data['Close'].dropna().rolling(window=30).std()

# Create an interactive plot
fig = px.line(data, x=data.index, y='30d_Rolling_Std', title='Bitcoin 30-Day Rolling Volatility')
fig.update_xaxes(title_text='Date')
fig.update_yaxes(title_text='Standard Deviation of Close Prices')
fig.show()

In [17]:
# Create an interactive box plot
fig = px.box(data, y='Volume', title='Box Plot of Bitcoin Trading Volume')
fig.update_xaxes(title_text='Volume')
fig.update_yaxes(title_text='Value')
fig.show()

In [18]:
# # EDA: Time Series Decomposition Plots
# decomposition = seasonal_decompose(data['Close'].dropna(), model='additive', period=30)  # Drop NaN values
# fig = decomposition.plot()
# fig.set_size_inches(10, 8)
# fig.suptitle('Time Series Decomposition of Bitcoin Closing Price')
# plt.show()

In [19]:
# EDA: Correlation Heatmaps
corr_matrix = data.dropna().corr()

# Create an interactive heatmap
fig = px.imshow(corr_matrix, text_auto=True, color_continuous_scale='RdBu', aspect="auto")
fig.update_layout(title='Correlation Heatmap', xaxis_title='Features', yaxis_title='Features')
fig.show()





In [20]:
# Model Selection and Development
from sklearn.linear_model import LinearRegression
from sklearn.ensemble import RandomForestRegressor
from sklearn.neural_network import MLPRegressor
from sklearn.model_selection import train_test_split, TimeSeriesSplit, GridSearchCV
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score

In [21]:
# Preparing data for additional models
X = scaled_data[:, :-1]  # All features except the target
y = scaled_data[:, 3]   # 'Close' price as the target

In [22]:
# Train-Test Split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, shuffle=False)


In [23]:
# Linear Regression
lr_model = LinearRegression()
lr_model.fit(X_train, y_train)
lr_predictions = lr_model.predict(X_test)


In [24]:
# Random Forest
rf_model = RandomForestRegressor()
rf_model.fit(X_train, y_train)
rf_predictions = rf_model.predict(X_test)


In [25]:
# Neural Network
nn_model = MLPRegressor(hidden_layer_sizes=(50, 50), max_iter=500)
nn_model.fit(X_train, y_train)
nn_predictions = nn_model.predict(X_test)

In [26]:
# Model Evaluation
print("Linear Regression MSE:", mean_squared_error(y_test, lr_predictions))
print("Random Forest MSE:", mean_squared_error(y_test, rf_predictions))
print("Neural Network MSE:", mean_squared_error(y_test, nn_predictions))

Linear Regression MSE: 1.5632365679989826e-32
Random Forest MSE: 6.350809934964085e-05
Neural Network MSE: 7.930046330356763e-05


In [27]:
# Model Evaluation and Refinement: Cross-Validation, Hyperparameter Tuning (Example for Random Forest)
tscv = TimeSeriesSplit(n_splits=5)
param_grid = {'n_estimators': [100, 200], 'max_features': ['auto', 'sqrt'], 'max_depth': [10, 20]}
grid_search = GridSearchCV(estimator=rf_model, param_grid=param_grid, cv=tscv, n_jobs=-1)
grid_search.fit(X_train, y_train)
print("Best Parameters for Random Forest:", grid_search.best_params_)


`max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features=1.0` or remove this parameter as it is also the default value for RandomForestRegressors and ExtraTreesRegressors.



Best Parameters for Random Forest: {'max_depth': 20, 'max_features': 'auto', 'n_estimators': 100}


In [28]:
# Calculate the 10-day Simple Moving Average
data['SMA_10'] = data['Close'].rolling(window=10).mean()


In [29]:
# Define trading signals
data['strategy_signal'] = 0  # Default to no position
data['strategy_signal'][data['Close'] > data['SMA_10']] = 1  # Buy signal
data['strategy_signal'][data['Close'] < data['SMA_10']] = -1  # Sell signal




A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy



A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy



In [30]:
# Calculate daily returns
data['strategy_return'] = data['strategy_signal'].shift(1) * data['Close'].pct_change()

In [31]:
# Calculate cumulative returns
data['cumulative_strategy_return'] = (1 + data['strategy_return']).cumprod()
data['cumulative_market_return'] = (1 + data['Close'].pct_change()).cumprod()

In [32]:
# plt.figure(figsize=(12, 6))

# # Plotting the market and strategy cumulative returns
# plt.plot(data['cumulative_market_return'], label='Market Returns (Buy and Hold)')
# plt.plot(data['cumulative_strategy_return'], label='Strategy Returns')

# # Setting up a secondary y-axis for SMA
# ax2 = plt.gca().twinx()
# ax2.plot(data['Close'], color='lightgray', linestyle='--', label='Close Price')
# ax2.plot(data['SMA_10'], color='orange', linestyle='-', label='SMA 10')
# ax2.set_ylabel('Close Price and SMA 10')

# # Marking trade points
# # Buy signals: Close < SMA_10, Sell signals: Close > SMA_10
# buy_signals = data['Close'] > data['SMA_10']
# sell_signals = data['Close'] < data['SMA_10']

# ax2.plot(data[buy_signals].index, data['Close'][buy_signals], 'g^', label='Buy Signal', markersize=8)  # Green triangles for buy
# ax2.plot(data[sell_signals].index, data['Close'][sell_signals], 'rv', label='Sell Signal', markersize=8)  # Red triangles for sell

# # Adding legends
# lines, labels = plt.gca().get_legend_handles_labels()
# lines2, labels2 = ax2.get_legend_handles_labels()
# ax2.legend(lines + lines2, labels + labels2, loc='upper left')

# # Setting titles and labels
# plt.title('Backtesting Trading Strategy with SMA')
# plt.xlabel('Date')
# plt.ylabel('Cumulative Returns')

# plt.show()


In [33]:
# # Performance Metrics
# total_strategy_return = data['cumulative_strategy_return'].iloc[-1] - 1
# total_market_return = data['cumulative_market_return'].iloc[-1] - 1
# print(f"Total Strategy Return: {total_strategy_return:.2f}") 
# print(f"Total Market Return: {total_market_return:.2f}")
