In [80]:
import datetime as dt
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import yfinance as yf
from sklearn.preprocessing import MinMaxScaler
from tensorflow.keras.layers import Dense, Dropout, LSTM
from tensorflow.keras.models import Sequential
from ta.momentum import StochasticOscillator
from ta.volatility import BollingerBands
from ta.volume import OnBalanceVolumeIndicator
from ta.utils import dropna
from ta.trend import SMAIndicator, MACD,  WMAIndicator, EMAIndicator
from ta.momentum import WilliamsRIndicator, RSIIndicator


def get_data(company, start, end):
    data = yf.download(company, start=start, end=end)
    return data

def add_indicators(data):
    # Create a new DataFrame with the added indicators
    df = data.copy()
    
    # Relative Strength Index (RSI)
    rsi_indicator = RSIIndicator(close=df["Close"])
    df["RSI"] = rsi_indicator.rsi()
    
    # Moving Average Convergence Divergence (MACD)
    macd_indicator = MACD(close=df["Close"])
    df["MACD"] = macd_indicator.macd()
    
    # Stochastic Oscillator
    stochastic_indicator = StochasticOscillator(high=df["High"], low=df["Low"], close=df["Close"])
    df["%K"] = stochastic_indicator.stoch()
    df["%D"] = stochastic_indicator.stoch_signal()
    
    # Bollinger Bands
    bb_indicator = BollingerBands(close=df["Close"])
    df["BB_High"] = bb_indicator.bollinger_hband()
    df["BB_Low"] = bb_indicator.bollinger_lband()
    
    # On-Balance Volume (OBV)
    obv_indicator = OnBalanceVolumeIndicator(close=df["Close"], volume=df["Volume"])
    df["OBV"] = obv_indicator.on_balance_volume()
    
    # Simple Moving Average (SMA)
    sma_indicator = SMAIndicator(close=df["Close"], window=20)
    df["SMA"] = sma_indicator.sma_indicator()
    
    # Exponential Moving Average (EMA)
    ema_indicator = EMAIndicator(close=df["Close"], window=20)
    df["EMA"] = ema_indicator.ema_indicator()
    
    # Weighted Moving Average (WMA)
    wma_indicator = WMAIndicator(close=df["Close"], window=20)
    df["WMA"] = wma_indicator.wma()
    
    # Williams Alligator
    williamsr_indicator = WilliamsRIndicator(high=df["High"], low=df["Low"], close=df["Close"])
    df["WilliamsR"] = williamsr_indicator.williams_r()
    
    # Drop any rows with NaN values
    df = dropna(df)
    
    return df

def train_model(company_data):
    # Prepare the data
    scaler = MinMaxScaler(feature_range=(0,1))
    scaled_data = scaler.fit_transform(company_data['Close'].values.reshape(-1, 1))

    prediction_days = 20

    x_train = []
    y_train = []

    for x in range(prediction_days, len(scaled_data)):
        x_train.append(scaled_data[x-prediction_days:x,0])
        y_train.append(scaled_data[x,0])
    x_train, y_train = np.array(x_train), np.array(y_train)
    x_train = np.reshape(x_train,(x_train.shape[0], x_train.shape[1],1))

    # Build and train the model
    model = Sequential()
    model.add(LSTM(units=50, return_sequences=True,input_shape=(x_train.shape[1],1)))
    model.add(Dropout(0.2))

    model.add(LSTM(units=50, return_sequences=True))
    model.add(Dropout(0.2))

    model.add(LSTM(units=50))
    model.add(Dropout(0.2))

    model.add(Dense(units=1))

    model.compile(optimizer='adam', loss='mean_squared_error')
    model.fit(x_train, y_train, epochs=2, batch_size=32)

    return model, scaler, prediction_days

# Define a list of companies and date range to download data
companies = ["AAPL", "MSFT", "GOOG"]
start = "2010-03-03"
end = "2021-01-01"

# Download data for each company and add indicators
data = {}
models = {}
scalers = {}
prediction_days = {}
for company in companies:
    company_data = get_data(company, start, end)
    company_data = add_indicators(company_data)
    data[company] = company_data

    # Train the model for this company
    model, scaler, pred_days = train_model(company_data)
    models[company] = model
    scalers[company] = scaler
    prediction_days[company] = pred_days
    
    # Get actual prices for this company
    actual_data = yf.download(company, start=start, end=end)
    actual_prices = actual_data["Close"].values

    # Make predictions for this company
    inputs = actual_data["Close"][-prediction_days:].values
    inputs = inputs.reshape(-1,1)
    inputs = scaler.transform(inputs)
    
    X_test = []
    for x in range(prediction_days, len(inputs)):
        X_test.append(inputs[x-prediction_days:x,0])
    X_test = np.array(X_test)
    X_test = np.reshape(X_test, (X_test.shape[0], X_test.shape[1], 1))
    predicted_prices = model.predict(X_test)
    predicted_prices = scaler.inverse_transform(predicted_prices)
    
    # Plot actual and predicted prices for this company
    plt.plot(actual_prices,color="black",label=f"Actual {company} prices")
    plt.plot(predicted_prices,color='green',label=f"predicted {company} prices")
    plt.title(f"{company} share price")
    plt.legend()
    plt.show()

[*********************100%***********************]  1 of 1 completed
Epoch 1/2
Epoch 2/2
[*********************100%***********************]  1 of 1 completed


  print(actual_data["Close"][prediction_days].values)


KeyError: "None of [Index(['AAPL'], dtype='object', name='Date')] are in the [index]"

In [67]:
import datetime as dt
test_start = dt.datetime(2021,1,2)
test_end = dt.datetime(2023,1,1)
print(test_start)
print(test_end)

2021-01-02 00:00:00
2023-01-01 00:00:00


In [68]:
import yfinance as yf
import numpy as np
from sklearn.linear_model import LinearRegression
def add_indicators(data):
    # Create a new DataFrame with the added indicators
    df = data.copy()
    
    # Relative Strength Index (RSI)
    rsi_indicator = RSIIndicator(close=df["Close"])
    df["RSI"] = rsi_indicator.rsi()
    
    # Moving Average Convergence Divergence (MACD)
    macd_indicator = MACD(close=df["Close"])
    df["MACD"] = macd_indicator.macd()
    
    # Stochastic Oscillator
    stochastic_indicator = StochasticOscillator(high=df["High"], low=df["Low"], close=df["Close"])
    df["%K"] = stochastic_indicator.stoch()
    df["%D"] = stochastic_indicator.stoch_signal()
    
    # Bollinger Bands
    bb_indicator = BollingerBands(close=df["Close"])
    df["BB_High"] = bb_indicator.bollinger_hband()
    df["BB_Low"] = bb_indicator.bollinger_lband()
    
    # On-Balance Volume (OBV)
    obv_indicator = OnBalanceVolumeIndicator(close=df["Close"], volume=df["Volume"])
    df["OBV"] = obv_indicator.on_balance_volume()
    
    # Simple Moving Average (SMA)
    sma_indicator = SMAIndicator(close=df["Close"], window=20)
    df["SMA"] = sma_indicator.sma_indicator()
    
    # Exponential Moving Average (EMA)
    ema_indicator = EMAIndicator(close=df["Close"], window=20)
    df["EMA"] = ema_indicator.ema_indicator()
    
    # Weighted Moving Average (WMA)
    wma_indicator = WMAIndicator(close=df["Close"], window=20)
    df["WMA"] = wma_indicator.wma()
    
    # Williams Alligator
    williamsr_indicator = WilliamsRIndicator(high=df["High"], low=df["Low"], close=df["Close"])
    df["WilliamsR"] = williamsr_indicator.williams_r()
    
    # Drop any rows with NaN values
    df = dropna(df)
    
    return df

# Define a list of stock tickers
stocks = ["AAPL", "MSFT", "GOOGL"]

# Loop over the stock tickers and train a model for each one
for stock in stocks:
    # Load historical stock prices from Yahoo Finance
    data = yf.download(stock, start="2010-01-01", end="2022-01-01")
    data = add_indicators(data)
    # Compute the log returns of the stock prices
    data["log_return"] = np.log(data["Close"]).diff()

    # Drop any missing values
    data = data.dropna()

    # Split the data into training and testing sets
    train_data = data[:len(data)//2]
    test_data = data[len(data)//2:]

    # Train a linear regression model on the training data
    X_train = train_data[["log_return"]]
    y_train = train_data["log_return"].shift(-1).fillna(0)
    model = LinearRegression()
    model.fit(X_train, y_train)

    # Evaluate the model on the testing data
    X_test = test_data[["log_return"]]
    y_test = test_data["log_return"].shift(-1).fillna(0)
    score = model.score(X_test, y_test)

    print(f"Stock: {stock}, R-squared score: {score:.2f}")


[*********************100%***********************]  1 of 1 completed
Stock: AAPL, R-squared score: -0.00
[*********************100%***********************]  1 of 1 completed
Stock: MSFT, R-squared score: -0.01
[*********************100%***********************]  1 of 1 completed
Stock: GOOGL, R-squared score: -0.00


In [70]:
import yfinance as yf
import numpy as np
from sklearn.linear_model import LinearRegression
def add_indicators(data):
    # Create a new DataFrame with the added indicators
    df = data.copy()
    
    # Relative Strength Index (RSI)
    rsi_indicator = RSIIndicator(close=df["Close"])
    df["RSI"] = rsi_indicator.rsi()
    
    # Moving Average Convergence Divergence (MACD)
    macd_indicator = MACD(close=df["Close"])
    df["MACD"] = macd_indicator.macd()
    
    # Stochastic Oscillator
    stochastic_indicator = StochasticOscillator(high=df["High"], low=df["Low"], close=df["Close"])
    df["%K"] = stochastic_indicator.stoch()
    df["%D"] = stochastic_indicator.stoch_signal()
    
    # Bollinger Bands
    bb_indicator = BollingerBands(close=df["Close"])
    df["BB_High"] = bb_indicator.bollinger_hband()
    df["BB_Low"] = bb_indicator.bollinger_lband()
    
    # On-Balance Volume (OBV)
    obv_indicator = OnBalanceVolumeIndicator(close=df["Close"], volume=df["Volume"])
    df["OBV"] = obv_indicator.on_balance_volume()
    
    # Simple Moving Average (SMA)
    sma_indicator = SMAIndicator(close=df["Close"], window=20)
    df["SMA"] = sma_indicator.sma_indicator()
    
    # Exponential Moving Average (EMA)
    ema_indicator = EMAIndicator(close=df["Close"], window=20)
    df["EMA"] = ema_indicator.ema_indicator()
    
    # Weighted Moving Average (WMA)
    wma_indicator = WMAIndicator(close=df["Close"], window=20)
    df["WMA"] = wma_indicator.wma()
    
    # Williams Alligator
    williamsr_indicator = WilliamsRIndicator(high=df["High"], low=df["Low"], close=df["Close"])
    df["WilliamsR"] = williamsr_indicator.williams_r()
    
    # Drop any rows with NaN values
    df = dropna(df)
    
    return df

# Define a list of stock tickers
stocks = ["AAPL", "MSFT", "GOOGL"]

# Loop over the stock tickers and train a model for each one
for stock in stocks:
    # Load historical stock prices from Yahoo Finance
    data = yf.download(stock, start="2010-01-01", end="2022-01-01")
    #data = add_indicators(data)
    # Compute the log returns of the stock prices
    data["log_return"] = np.log(data["Close"]).diff()

    # Drop any missing values
    data = data.dropna()

    # Split the data into training and testing sets
    train_data = data[:len(data)//2]
    test_data = data[len(data)//2:]

    # Train a linear regression model on the training data
    X_train = train_data[["log_return"]]
    y_train = train_data["log_return"].shift(-1).fillna(0)
    model = LinearRegression()
    model.fit(X_train, y_train)

    # Evaluate the model on the testing data
    X_test = test_data[["log_return"]]
    y_test = test_data["log_return"].shift(-1).fillna(0)
    score = model.score(X_test, y_test)

    print(f"Stock: {stock}, R-squared score: {score:.2f}")


[*********************100%***********************]  1 of 1 completed
Stock: AAPL, R-squared score: -0.00
[*********************100%***********************]  1 of 1 completed
Stock: MSFT, R-squared score: -0.01
[*********************100%***********************]  1 of 1 completed
Stock: GOOGL, R-squared score: -0.01
