In [5]:
ticker = 'AAPL'

In [6]:
import os
from random import random
import pandas as pd
import numpy as np


def gen_int_random_size(size):
    return int((random() * 100) % size)

In [7]:
def load_random_stock(ticker):
    current_directory = os.getcwd()
    preprocessed_directory = os.path.join(current_directory + "/stocknet-dataset/price/preprocessed")
    files = os.listdir(preprocessed_directory)
    # print(files)
    
# gen_int_random_size(len(directories))
    random_file = os.path.join(preprocessed_directory, ticker + ".txt")
    print(random_file)
    return random_file

def load_into_pandas(filename):    
    try:
        columns = ['date', 'open_price', 'high_price', 'low_price', 'close_price', 'adj_close_price', 'volume']
        df = pd.read_csv(filename, sep='\t', header=None, names=columns, parse_dates=['date'])
        return df
    except Exception as e:
        print(f"An error occurred: {e}")
        return None

In [18]:
stock_file = load_random_stock(ticker)
stock_df = load_into_pandas(stock_file)
stock_df['date'] = pd.to_datetime(stock_df['date'])
stock_df_sorted = stock_df.sort_values(by='date')
stock_df_sorted = stock_df_sorted.reset_index(drop=True)
stock_df_sorted.set_index('date', inplace=True)
stock_df_sorted

/Users/darkosegvic/PersonalDevelopment/stocknet/semantic-sentiment-analyst/stocknet-dataset/price/preprocessed/AAPL.txt


Unnamed: 0_level_0,open_price,high_price,low_price,close_price,adj_close_price,volume
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2012-09-05,-0.007022,0.107768,0.109047,0.097979,-0.611802,84093800.0
2012-09-06,0.009012,0.111639,0.120094,0.107725,0.779618,97799100.0
2012-09-07,0.006166,0.109697,0.116947,0.105966,0.538215,82416600.0
2012-09-10,-0.026013,0.106800,0.111420,0.076953,-2.284607,121999500.0
2012-09-11,-0.003244,0.110742,0.119075,0.096363,-0.277496,125995800.0
...,...,...,...,...,...,...
2017-08-28,0.010071,0.001752,0.013387,0.000438,1.610000,25966000.0
2017-08-29,0.008918,-0.008485,0.010219,-0.009104,1.440003,29516900.0
2017-08-30,0.002701,0.005463,0.006016,-0.001842,0.440002,27269600.0
2017-08-31,0.003979,0.001775,0.007163,0.000796,0.649994,26785100.0


In [9]:
import requests
def get_earnings(ticker):
    url = f'https://www.alphavantage.co/query?function=EARNINGS&symbol={ticker}&apikey=B0RLXVWXQMHH0LQD'
    r = requests.get(url)
    data = r.json()
    print(data)
    return data['quarterlyEarnings']

# earnings = get_earnings("AAPL")


In [21]:
import pandas as pd
from sklearn.preprocessing import MinMaxScaler
import numpy as np
from utils import load_json_to_dictionary

earnings = load_json_to_dictionary("AAPL_earnings.json")

eps_df = pd.DataFrame(earnings)
eps_df['fiscalDateEnding'] = pd.to_datetime(eps_df['fiscalDateEnding'])

# Set the fiscalDateEnding as the index to align with stock_df_sorted
eps_df.set_index('fiscalDateEnding', inplace=True)

# print(eps_df.index)
# print(stock_df_sorted['date'])
# Reindex eps_df to match stock_df_sorted index, filling NaNs with forward fill method
eps_df = eps_df.reindex(stock_df_sorted.index, method='ffill')

# Concatenate the EPS data with the stock_df_sorted DataFrame
stock_df_combined = pd.merge(left= stock_df_sorted, right=eps_df, left_index=True, right_index=True)

# Calculate P/E Ratio
stock_df_combined['PE_ratio'] = stock_df_combined['adj_close_price'] / stock_df_combined['reportedEPS'].astype(float)

# Select the relevant columns and display the result
stock_df_combined = stock_df_combined[['adj_close_price', 'reportedEPS', 'PE_ratio']]

stock_df_combined
# eps_df

Unnamed: 0_level_0,adj_close_price,reportedEPS,PE_ratio
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
2012-09-05,-0.611802,0.31,-1.973555
2012-09-06,0.779618,0.31,2.514897
2012-09-07,0.538215,0.31,1.736177
2012-09-10,-2.284607,0.31,-7.369700
2012-09-11,-0.277496,0.31,-0.895148
...,...,...,...
2017-08-28,1.610000,0.5175,3.111111
2017-08-29,1.440003,0.5175,2.782614
2017-08-30,0.440002,0.5175,0.850245
2017-08-31,0.649994,0.5175,1.256027


In [29]:

# Select relevant features
df = stock_df_combined[['adj_close_price', 'PE_ratio']]
# df = stock_df_combined[['adj_close_price']]

# Scale the features
scaler = MinMaxScaler(feature_range=(0, 1))
scaled_data = scaler.fit_transform(df)

# Create training and test data
train_size = int(len(scaled_data) * 0.8)
test_size = len(scaled_data) - train_size
train_data, test_data = scaled_data[0:train_size, :], scaled_data[train_size - 30:, :]

def create_dataset(dataset, time_step=1):
    dataX, dataY = [], []
    for i in range(len(dataset) - time_step - 1):
        a = dataset[i:(i + time_step)]
        dataX.append(a)
        dataY.append(dataset[i + time_step, 0])  
    return np.array(dataX), np.array(dataY)

# Reshape into X=t, t+1, t+2, ... t+n and Y=t+n+1
time_step = 60
X_train, y_train = create_dataset(train_data, time_step)
X_test, y_test = create_dataset(test_data, time_step)

# Reshape input to be [samples, time steps, features]
X_train = X_train.reshape(X_train.shape[0], X_train.shape[1], len(df.columns))
X_test = X_test.reshape(X_test.shape[0], X_test.shape[1], len(df.columns))


In [28]:
from models.models import simpleLSTM, stackedLSTM, bidirectionalLSTM, LSTMAttentionMechanism, encoderDecoderLSTM

model = simpleLSTM(time_step, len(df.columns), 'adam', 'mean_squared_error')
# model = encoderDecoderLSTM(time_step, 'adam', 'mean_squared_error')

# model.fit(X_train, y_train,epochs = 100)
model.fit(X_train, y_train,epochs = 100, validation_split=0.5)


Epoch 1/100
[1m15/15[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 14ms/step - loss: 0.2162 - val_loss: 0.0175
Epoch 2/100
[1m15/15[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 8ms/step - loss: 0.0145 - val_loss: 0.0143
Epoch 3/100
[1m15/15[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 8ms/step - loss: 0.0071 - val_loss: 0.0128
Epoch 4/100
[1m15/15[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 8ms/step - loss: 0.0055 - val_loss: 0.0128
Epoch 5/100
[1m15/15[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 8ms/step - loss: 0.0051 - val_loss: 0.0128
Epoch 6/100
[1m15/15[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 8ms/step - loss: 0.0068 - val_loss: 0.0128
Epoch 7/100
[1m15/15[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 8ms/step - loss: 0.0043 - val_loss: 0.0128
Epoch 8/100
[1m15/15[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 8ms/step - loss: 0.0072 - val_loss: 0.0128
Epoch 9/100
[1m15/15[0m [32m━━━━━━━━━━━━━━━━

<keras.src.callbacks.history.History at 0x348da5880>

simpleLSTM  - 0.0054
stackedLSTM - 0.0066
bidirectionalLSTM - 0.0060
attentionLSTM - 0.0062
encoderDecoderLSTM - 


In [None]:
# Summary
model.summary()

In [None]:
import matplotlib.pyplot as plt

# Make predictions
train_predict = model.predict(X_train)
test_predict = model.predict(X_test)

# Invert predictions back to original scale
train_predict = scaler.inverse_transform(train_predict)
test_predict = scaler.inverse_transform(test_predict)
y_train_inv = scaler.inverse_transform([y_train])
y_test_inv = scaler.inverse_transform([y_test])

# Calculate RMSE performance metrics
import math
from sklearn.metrics import mean_squared_error
train_rmse = math.sqrt(mean_squared_error(y_train_inv[0], train_predict[:,0]))
test_rmse = math.sqrt(mean_squared_error(y_test_inv[0], test_predict[:,0]))
print(f"Train RMSE: {train_rmse}, Test RMSE: {test_rmse}")

# Plotting
plt.figure(figsize=(12,6))
plt.plot(y_train_inv[0], label='Actual Train')
plt.plot(train_predict[:,0], label='Predicted Train')
plt.plot(range(len(y_train_inv[0]), len(y_train_inv[0]) + len(y_test_inv[0])), y_test_inv[0], label='Actual Test')
plt.plot(range(len(train_predict[:,0]), len(train_predict[:,0]) + len(test_predict[:,0])), test_predict[:,0], label='Predicted Test')
plt.title('Stock Price Prediction')
plt.xlabel('Days')
plt.ylabel('Price')
plt.legend()
plt.show()


In [None]:
# Prepare the last 30 days data
last_30_days = scaled_data[-30:]
X_input = last_30_days.reshape(1, -1)

# Convert to the format accepted by the model
temp_input = list(X_input)
temp_input = temp_input[0].tolist()

# Demonstrate prediction for next 30 days
lst_output = []
n_steps = 30
i = 0
while(i < 30):
    
    if(len(temp_input) > 30):
        # Reshape and predict the next value
        X_input = np.array(temp_input[1:])
        X_input = X_input.reshape(1, -1)
        X_input = X_input.reshape((1, n_steps, 1))
        yhat = model.predict(X_input, verbose=0)
        temp_input.extend(yhat[0].tolist())
        temp_input = temp_input[1:]
        lst_output.extend(yhat.tolist())
        i += 1
    else:
        X_input = X_input.reshape((1, n_steps, 1))
        yhat = model.predict(X_input, verbose=0)
        temp_input.extend(yhat[0].tolist())
        lst_output.extend(yhat.tolist())
        i += 1

# Transform the prediction back to the original scale
final_output = scaler.inverse_transform(lst_output)

# Visualize the prediction
days = range(len(scaled_data), len(scaled_data) + 30)
plt.figure(figsize=(12,6))
plt.plot(days, final_output, label='Predicted Next 30 Days')
plt.title('Future Stock Price Prediction')
plt.xlabel('Days')
plt.ylabel('Price')
plt.legend()
plt.show()
