<a href="https://colab.research.google.com/github/Bane-24/Crypto-Price-Prediction/blob/main/Daily_Forecast.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [35]:
# Installing required libraries
!pip install cryptocmd
!pip install yfinance
!pip install keras-tuner

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt 
import seaborn as sns
from cryptocmd import CmcScraper
from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import TimeSeriesSplit
from sklearn.metrics import mean_squared_error
import yfinance as yf
import tensorflow as tf
from tensorflow import keras
import keras_tuner
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, LSTM, Dropout, Bidirectional, Conv1D
from kerastuner.tuners import BayesianOptimization

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/


In [None]:
# Scrape Bitcoin historical data
scraper = CmcScraper("BTC", "01-01-2014","31-03-2023")
bitcoin_df = scraper.get_dataframe()

# Scrape stock market data
stock_data = yf.download("SPY", start="2014-01-01", end="2023-03-31")
stock_df = stock_data["Adj Close"].to_frame().reset_index().rename(columns={"Adj Close": "stock_price", "Date": "Date"})

# Merge data and handle NA using interpolation
merged_df = bitcoin_df.merge(stock_df, on="Date", how="inner")
merged_df.set_index("Date", inplace=True)
merged_df.index = pd.to_datetime(merged_df.index)
merged_df.interpolate(method="time", inplace=True)

# Normalize features separately
scaler_btc = MinMaxScaler()
merged_df["Close"] = scaler_btc.fit_transform(merged_df[["Close"]])
scaler_stock = MinMaxScaler()
merged_df["stock_price"] = scaler_stock.fit_transform(merged_df[["stock_price"]])
scaler_volume = MinMaxScaler()
merged_df["Volume"] = scaler_volume.fit_transform(merged_df[["Volume"]])




In [None]:
merged_df = merged_df.sort_values(by = ['Date'])
merged_df.tail()

In [None]:
# Prepare data
def prepare_data(df, feature_columns, target_column, n_past, n_future):
    x_data, y_data = [], []
    for i in range(n_past, len(df) - n_future + 1):
        x_data.append(df[feature_columns].iloc[i - n_past:i].values)
        y_data.append(df[target_column].iloc[i:i + n_future].values)
    return np.array(x_data), np.array(y_data)

n_past = 30
n_future = 1
feature_columns = ["Close", "stock_price","Volume"]
target_column = "Close"
x_data, y_data = prepare_data(merged_df, feature_columns, target_column, n_past, n_future)

# Split into train and test sets
# Set the cutoff date
train_date = "2022-03-01"
# Calculate the index of the train_date
train_date_index = merged_df.index.get_loc(pd.Timestamp(train_date), method='nearest')

# Calculate the train_size based on the date
train_size = len(merged_df.loc[:train_date]) - n_past

x_train, x_test = x_data[:train_size], x_data[train_size:]
y_train, y_test = y_data[:train_size], y_data[train_size:]

In [None]:
# Creating a time-series split
tscv = TimeSeriesSplit(n_splits=5)
for train_index, test_index in tscv.split(x_data):
    x_train, x_test = x_data[train_index], x_data[test_index]
    y_train, y_test = y_data[train_index], y_data[test_index]


In [None]:
print("x_train shape:", x_train.shape)
print("y_train shape:", y_train.shape)
print("x_test shape:", x_test.shape)
print("y_test shape:", y_test.shape)


In [None]:
# LSTM & Hyperparameter tuning
from sklearn.model_selection import RandomizedSearchCV
from keras.wrappers.scikit_learn import KerasRegressor
from tensorflow.keras import regularizers
from tensorflow.keras.layers import Conv1D


def create_model(learning_rate=0.001, dropout_rate=0.2, neurons=50, l1_reg=0.001, l2_reg=0.001):
    model = Sequential()
    model.add(Conv1D(filters=32, kernel_size=3, activation='relu', input_shape=(n_past, len(feature_columns))))
    model.add(Bidirectional(LSTM(neurons, activation="tanh", return_sequences=True,
                                 kernel_regularizer=regularizers.l1_l2(l1=l1_reg, l2=l2_reg))))
    model.add(Dropout(dropout_rate))
    model.add(LSTM(neurons, activation="tanh", return_sequences=True,
                   kernel_regularizer=regularizers.l1_l2(l1=l1_reg, l2=l2_reg)))
    model.add(Dropout(dropout_rate))
    model.add(LSTM(neurons, activation="tanh", return_sequences=False,
                   kernel_regularizer=regularizers.l1_l2(l1=l1_reg, l2=l2_reg)))
    model.add(Dropout(dropout_rate))
    model.add(Dense(neurons, activation='relu'))
    model.add(Dense(n_future))
    optimizer = tf.keras.optimizers.Adam(lr=learning_rate)
    model.compile(optimizer=optimizer, loss="mse")
    return model


model = KerasRegressor(build_fn=create_model, verbose=0)
param_dist = {
    'batch_size': [16, 32, 64],
    'epochs': [10,15,20],
    'learning_rate': [0.01, 0.001,0.0001],
    'dropout_rate': [0.2, 0.4],
    'neurons': [12, 25, 50,70],
    'l1_reg': [0.001, 0.01],
    'l2_reg': [0.001, 0.01]
}
random_search = RandomizedSearchCV(estimator=model, param_distributions=param_dist, n_iter=10, cv=tscv, verbose=2, random_state=42, n_jobs=-1)
random_search.fit(x_data, y_data)

print("Best parameters: ", random_search.best_params_)

best_model = random_search.best_estimator_.model
best_model.fit(x_train, y_train, epochs=random_search.best_params_['epochs'], batch_size=random_search.best_params_['batch_size'])



In [None]:
print("Best parameters: ", random_search.best_params_)


In [None]:


# Make predictions
y_pred = best_model.predict(x_test)


In [None]:
# Invert the scaling for predictions
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score
y_pred_actual = scaler_btc.inverse_transform(y_pred)
y_test_actual = scaler_btc.inverse_transform(y_test)
# Evaluate the model
mse = mean_squared_error(y_test_actual, y_pred_actual)
mae = mean_absolute_error(y_test_actual, y_pred_actual)
r2 = r2_score(y_test_actual, y_pred_actual)

print("Mean Squared Error: {:.2f}".format(mse))
print("Mean Absolute Error: {:.2f}".format(mae))
print("R2 Score: {:.2f}".format(r2))

# Visualize the results
plt.figure(figsize=(12, 6))
plt.plot(merged_df.index[-len(y_pred_actual):], y_pred_actual, label="Predicted Price", color='r')
plt.plot(merged_df.index[-len(y_test_actual):], y_test_actual, label="Actual Price", color='b')
plt.xlabel("Date")
plt.ylabel("Price")
plt.legend(loc="best")
plt.title("BTC Price Prediction")
plt.show()


In [None]:
# Forecast for the next day
x_forecast = merged_df[feature_columns].values[-n_past:]
x_forecast = x_forecast.reshape((1, n_past, len(feature_columns)))

# Predict the future price
y_forecast = best_model.predict(x_forecast)

# Invert the scaling for the forecasted price
forecasted_price_actual = scaler_btc.inverse_transform(y_forecast)

print(forecasted_price_actual)

# Add the forecasted price to the y_pred_actual dataframe
next_day = merged_df.index[-1] + pd.Timedelta(days=1)
y_pred_actual_df = pd.concat([y_pred_actual_df, pd.DataFrame(forecasted_price_actual, index=[next_day], columns=["Predicted Price"])])
y_test_actual_df = pd.concat([y_test_actual_df, pd.DataFrame({"Actual Price": [None]}, index=[next_day])])

# Plot the actual and predicted prices
plt.figure(figsize=(14,7))
plt.plot(y_test_actual_df.index, y_test_actual_df['Actual Price'], color='blue', label='Actual Price')
plt.plot(y_pred_actual_df.index, y_pred_actual_df['Predicted Price'], color='red', label='Predicted Price')
plt.title('Bitcoin Price Prediction')
plt.xlabel('Date')
plt.ylabel('Price')
plt.legend()
plt.grid(True)
plt.show()


In [None]:
# YET TO BE FIXED
import matplotlib.dates as mdates

# Remove the "Actual Price" column from y_test_actual_df
y_test_actual_df = y_test_actual_df.drop(columns=["Actual Price"])

fig, ax = plt.subplots(figsize=(12, 6))

# Plot the data
ax.plot(y_pred_actual_df.index, y_pred_actual_df["Predicted Price"], label="Predicted Price", color='r')
ax.plot(y_test_actual_df.index, y_test_actual_df, label="Actual Price", color='b')

# Set the date format
ax.xaxis.set_major_locator(mdates.MonthLocator())  # to get a tick every month
ax.xaxis.set_major_formatter(mdates.DateFormatter('%Y-%m'))  # optional formatting 

ax.set_xlabel("Date")
ax.set_ylabel("Price")
plt.legend(loc="best")
plt.title("BTC Price Prediction")
plt.xticks(rotation=45)  # Rotates X-Axis Ticks
plt.tight_layout()
plt.show()
