In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import mean_squared_error
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense, Dropout

# Load the dataset
data = pd.read_csv("BTC-USD.csv")
# Convert 'Date' column to datetime
data['Date'] = pd.to_datetime(data['Date'])
# Sort by date
data.sort_values('Date', inplace=True)
# Set 'Date' column as index
data.set_index('Date', inplace=True)

# Function to perform LSTM-based stock price prediction
def predict_stock_price(data, time_step=100, epochs=100):
    # Initialize lists to store predicted and actual prices
    predicted_prices = []
    actual_prices = []
    # Iterate over years
    years = data.index.year.unique()
    for year in years:
        # Select data for the current year
        year_data = data[data.index.year == year]
        # Normalize the data
        scaler = MinMaxScaler(feature_range=(0, 1))
        scaled_data = scaler.fit_transform(year_data['Close'].values.reshape(-1,1))
        # Prepare the data for training
        X, y = [], []
        for i in range(len(scaled_data) - time_step - 1):
            X.append(scaled_data[i:(i+time_step), 0])
            y.append(scaled_data[i + time_step, 0])
        X, y = np.array(X), np.array(y)
        X = np.reshape(X, (X.shape[0], X.shape[1], 1))
        # Split the data into training and testing sets (80-20 split)
        train_size = int(len(X) * 0.8)
        X_train, X_test = X[:train_size], X[train_size:]
        y_train, y_test = y[:train_size], y[train_size:]
        # Build the LSTM model
        model = Sequential()
        model.add(LSTM(units=50, return_sequences=True, input_shape=(X_train.shape[1], 1)))
        model.add(Dropout(0.2))
        model.add(LSTM(units=50, return_sequences=True))
        model.add(Dropout(0.2))
        model.add(LSTM(units=50))
        model.add(Dropout(0.2))
        model.add(Dense(units=1))
        # Compile the model
        model.compile(optimizer='adam', loss='mean_squared_error')
        # Train the model
        model.fit(X_train, y_train, epochs=epochs, batch_size=32, verbose=0)
        # Make predictions
        predicted_stock_price = model.predict(X_test)
        predicted_stock_price = scaler.inverse_transform(predicted_stock_price)
        actual_stock_price = scaler.inverse_transform(y_test.reshape(-1,1))
        # Store the predicted and actual prices
        predicted_prices.extend(predicted_stock_price)
        actual_prices.extend(actual_stock_price)
        # Print RMSE for the current year
        rmse = np.sqrt(mean_squared_error(actual_stock_price, predicted_stock_price))
        print(f"RMSE for year {year}: {rmse}")
    # Plot actual vs predicted prices
    plt.figure(figsize=(12, 6))
    plt.plot(actual_prices, color='blue', label='Actual Price')
    plt.plot(predicted_prices, color='red', label='Predicted Price')
    plt.title('Actual vs Predicted Stock Prices')
    plt.xlabel('Time')
    plt.ylabel('Stock Price')
    plt.legend()
    plt.show()

# Predict stock prices and compare year-wise
predict_stock_price(data)

RMSE for year 2014: 2.453552734375023
RMSE for year 2015: 129.05907026880303
RMSE for year 2016: 91.80573306175889
RMSE for year 2017: 2988.3205824494557




RMSE for year 2018: 687.8520558166077




RMSE for year 2019: 664.9971683120153
RMSE for year 2020: 1836.6272508457012
RMSE for year 2021: 2609.787110378389


IndexError: tuple index out of range