In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.preprocessing import MinMaxScaler
from keras.models import Sequential
from keras.layers import Dense, LSTM
from sklearn.metrics import mean_squared_error
import math
import requests
from datetime import datetime
import yfinance as yf

# Polygon.io API key
API_KEY = '27DzQ9ENavxTqAh6IaO4kFVZZQnc3uDk'

# Function to fetch stock data from Polygon.io
def get_polygon_data(symbol):
    # Correct symbols for cryptocurrencies
    if symbol == "BTC-USD":
        symbol = "X:BTCUSD"
    elif symbol == "ETH-USD":
        symbol = "X:ETHUSD"
    
    url = f'https://api.polygon.io/v2/aggs/ticker/{symbol}/range/1/day/2004-01-01/{datetime.now().date()}?adjusted=true&sort=asc&limit=5000&apiKey={API_KEY}'
    response = requests.get(url)
    data = response.json()
    
    if 'results' not in data:
        raise ValueError(f"Failed to fetch data for {symbol}. Check the symbol or API response.")
    
    df = pd.DataFrame(data['results'])
    df['date'] = pd.to_datetime(df['t'], unit='ms')
    df.set_index('date', inplace=True)
    df['adj_close'] = df['c']  # 'c' is assumed to be adjusted close price
    return df[['adj_close']]

# Function to fetch stock data from Yahoo Finance
def get_yfinance_data(symbol):
    df = yf.download(symbol, start="2004-01-01")
    return df[['Close']]

# Get stock symbol from user input (Jupyter-specific)
stock = input("Enter the Stock Symbol (e.g., AAPL, TSLA, BTC-USD, ETH-USD): ").upper()

# Fetch data from Polygon.io
stock_data_polygon = None
yfinance_data = None

try:
    stock_data_polygon = get_polygon_data(stock)
    print(f"Successfully fetched data from Polygon.io for {stock}")
    display(stock_data_polygon.head())
except Exception as e:
    print(f"Error fetching data from Polygon.io: {e}")

try:
    yfinance_data = get_yfinance_data(stock)
    print(f"Successfully fetched data from yfinance for {stock}")
    display(yfinance_data.head())
except Exception as e:
    print(f"Error fetching data from yfinance: {e}")

# Only proceed if both stock_data and yfinance_data were fetched successfully
if stock_data_polygon is not None and yfinance_data is not None:
    # Align yfinance data to the Polygon.io date range
    start_date = stock_data_polygon.index.min()
    end_date = stock_data_polygon.index.max()
    yfinance_data_filtered = yfinance_data.loc[start_date:end_date]

    # Merge data on the date index
    merged_data = pd.merge(stock_data_polygon, yfinance_data_filtered, left_index=True, right_index=True, how='outer')
    merged_data.ffill(inplace=True)  # Forward fill missing values

    # Preprocessing
    merged_data = merged_data.dropna()
    scaler = MinMaxScaler(feature_range=(0, 1))
    scaled_data = scaler.fit_transform(merged_data['adj_close'].values.reshape(-1, 1))

    # Train/Test split (80% train, 20% test)
    train_len = int(len(scaled_data) * 0.8)
    train_data = scaled_data[:train_len]
    test_data = scaled_data[train_len - 60:]  # 60-day overlap

    # Create training data
    x_train, y_train = [], []
    for i in range(60, len(train_data)):
        x_train.append(train_data[i-60:i, 0])
        y_train.append(train_data[i, 0])

    # Create testing data
    x_test, y_test = [], []
    for i in range(60, len(test_data)):
        x_test.append(test_data[i-60:i, 0])
        y_test.append(test_data[i, 0])

    # Convert to numpy arrays
    x_train, y_train = np.array(x_train), np.array(y_train)
    x_test, y_test = np.array(x_test), np.array(y_test)

    # Reshape for LSTM model
    x_train = np.reshape(x_train, (x_train.shape[0], x_train.shape[1], 1))
    x_test = np.reshape(x_test, (x_test.shape[0], x_test.shape[1], 1))

    # Build LSTM model
    model = Sequential()
    model.add(LSTM(units=100, return_sequences=True, input_shape=(x_train.shape[1], 1)))
    model.add(LSTM(units=50, return_sequences=False))
    model.add(Dense(25))
    model.add(Dense(1))

    # Compile model
    model.compile(optimizer='adam', loss='mean_squared_error')

    # Train model
    model.fit(x_train, y_train, batch_size=2, epochs=50)

    # Predict on test data
    predictions = model.predict(x_test)
    predictions = scaler.inverse_transform(predictions)

    # Plot the results
    plt.figure(figsize=(14,5))
    plt.plot(merged_data.index[train_len:], merged_data['adj_close'][train_len:], label='Actual Price')
    plt.plot(merged_data.index[train_len:], predictions, label='Predicted Price')
    plt.xlabel('Date')
    plt.ylabel('Stock Price')
    plt.legend()
    plt.show()

    # Calculate RMSE
    rmse = math.sqrt(mean_squared_error(y_test, predictions))
    print(f"Model RMSE: {rmse}")

    # Save the model
    model.save(f'{stock}_lstm_model.h5')
    print(f"Model saved as {stock}_lstm_model.h5")
else:
    print(f"Stock data for {stock} could not be fetched. Please try again.")


Enter the Stock Symbol (e.g., AAPL, TSLA, BTC-USD, ETH-USD):  TSLA


Successfully fetched data from Polygon.io for TSLA


Unnamed: 0_level_0,adj_close
date,Unnamed: 1_level_1
2022-10-17 04:00:00,219.35
2022-10-18 04:00:00,220.19
2022-10-19 04:00:00,222.04
2022-10-20 04:00:00,207.28
2022-10-21 04:00:00,214.44


[*********************100%***********************]  1 of 1 completed

Successfully fetched data from yfinance for TSLA





Unnamed: 0_level_0,Close
Date,Unnamed: 1_level_1
2010-06-29,1.592667
2010-06-30,1.588667
2010-07-01,1.464
2010-07-02,1.28
2010-07-06,1.074


  super().__init__(**kwargs)


Epoch 1/50
[1m369/369[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m19s[0m 38ms/step - loss: 0.0152
Epoch 2/50
[1m369/369[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m20s[0m 37ms/step - loss: 0.0038
Epoch 3/50
[1m369/369[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m18s[0m 32ms/step - loss: 0.0021
Epoch 4/50
[1m369/369[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m13s[0m 34ms/step - loss: 0.0018
Epoch 5/50
[1m369/369[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m14s[0m 39ms/step - loss: 0.0017
Epoch 6/50
[1m369/369[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m13s[0m 35ms/step - loss: 0.0014
Epoch 7/50
[1m369/369[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m13s[0m 35ms/step - loss: 0.0011
Epoch 8/50
[1m369/369[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m13s[0m 35ms/step - loss: 0.0012
Epoch 9/50
[1m369/369[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m22s[0m 40ms/step - loss: 0.0011
Epoch 10/50
[1m369/369[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m14