In [182]:
# Step 1: Connecting to a stock market API to retrieve historical stock
#         data for a specific company. 

In [183]:
# Connecting to yahoo finance API 

import yfinance as yf 

# Getting desired company
ticker = input("Which company would you liked to analyze? ").upper()

# Testing if the ticker is valid. If it is, its information is retrieved
end = False

while not end:
  stock_info = yf.Ticker(ticker).info
  if stock_info:
    stock_history = yf.Ticker(ticker).history(period='max')
    end = True
  else:
    ticker = input('Invalid ticker. Try again: ').upper()

print(stock_history)

Which company would you liked to analyze? AAPL
                                 Open        High         Low       Close  \
Date                                                                        
1980-12-12 00:00:00-05:00    0.099874    0.100308    0.099874    0.099874   
1980-12-15 00:00:00-05:00    0.095098    0.095098    0.094663    0.094663   
1980-12-16 00:00:00-05:00    0.088149    0.088149    0.087715    0.087715   
1980-12-17 00:00:00-05:00    0.089886    0.090321    0.089886    0.089886   
1980-12-18 00:00:00-05:00    0.092492    0.092927    0.092492    0.092492   
...                               ...         ...         ...         ...   
2022-12-23 00:00:00-05:00  130.919998  132.419998  129.639999  131.860001   
2022-12-27 00:00:00-05:00  131.380005  131.410004  128.720001  130.029999   
2022-12-28 00:00:00-05:00  129.669998  131.029999  125.870003  126.040001   
2022-12-29 00:00:00-05:00  127.989998  130.479996  127.730003  129.610001   
2022-12-30 00:00:00-05:00  12

In [184]:
# Step 2: Preprocessing the data and splitting it into training and test sets.

In [200]:
import numpy as np
from sklearn.preprocessing import StandardScaler 
from sklearn.model_selection import train_test_split

# Converting the historical data into a numpy array 
stock_prices = np.array(stock_history['Close'])
max_price = max(stock_prices)
min_price = min(stock_prices)
print(max_price, min_price)


# Normalizing the stock prices 
scaler = StandardScaler()
norm_stock_prices = scaler.fit_transform(stock_prices.reshape(-1, 1))

X_train, X_test, y_train, y_test = train_test_split(norm_stock_prices, norm_stock_prices, test_size=0.2)

180.95973205566406 0.03821254149079323


In [186]:
# Building the deep learning models
# Building a RNN, LSTM network

In [187]:
# RNN

In [188]:
from keras.layers import SimpleRNN, Dense
from keras.models import Sequential 

RNN_model = Sequential()
RNN_model.add(SimpleRNN(units=32, input_shape=(10, 1)))
RNN_model.add(Dense(1))

RNN_model.compile(loss='mean_squared_error', optimizer='adam', metrics=['mae'])
RNN_model.fit(X_train, y_train, batch_size=64, epochs=10, validation_data=(X_test, y_test))


Epoch 1/10








Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<keras.callbacks.History at 0x7fdf30c32f40>

In [189]:
# LSTM

In [190]:
from keras.layers import LSTM
from keras.models import Sequential

LSTM_model = Sequential()
LSTM_model.add(LSTM(units=32, input_shape=(10, 1)))
LSTM_model.add(Dense(1))

LSTM_model.compile(loss='mean_squared_error', optimizer='adam', metrics=['mae'])
LSTM_model.fit(X_train, y_train, batch_size=64, epochs=10, validation_data=(X_test, y_test))

Epoch 1/10








Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<keras.callbacks.History at 0x7fdf309b6130>

In [191]:
# Building support vector machines and gradient boosting models

In [192]:
# SVP

In [193]:
from sklearn.svm import SVR

SVM_model = SVR(kernel='linear')
SVM_model.fit(X_train, y_train)

predictions = SVM_model.predict(X_test)
accuracy = SVM_model.score(X_test, y_test)

print(accuracy)


0.9916383799764377


  y = column_or_1d(y, warn=True)


In [194]:
# gradient boost

In [195]:
from sklearn.ensemble import GradientBoostingRegressor
from sklearn.metrics import mean_absolute_error, mean_squared_error

GB_model = GradientBoostingRegressor(random_state=0)
GB_model.fit(X_train, y_train)

r2_score = GB_model.score(X_test, y_test)
# print(f'Test R^2 score: {r2_score:.2f}')


# Get the model's MAE and MSE scores on the test data
mae = mean_absolute_error(y_test, GB_model.predict(X_test))
mse = mean_squared_error(y_test, GB_model.predict(X_test))
# print(f'Test MAE: {mae:.2f}')
# print(f'Test MSE: {mse:.2f}')


  y = column_or_1d(y, warn=True)


In [196]:
# Combining the models 

In [197]:
RNN_pred = RNN_model.predict(X_test).sum(axis=1)
LSTM_pred = LSTM_model.predict(X_test).sum(axis=1)
SVM_pred = SVM_model.predict(X_test)
GB_pred = GB_model.predict(X_test)


combined_predictions = (RNN_pred + LSTM_pred + SVM_pred + GB_pred) / 4
final_predictions = combined_predictions
final_predictions.flatten()

y_test_reshaped = y_test.reshape(-1, 1)
from sklearn.metrics import mean_absolute_error
mae = mean_absolute_error(y_test, final_predictions)
print('Mean absolute error: ', mae)

print(len(y_test))








Mean absolute error:  0.0249198756332726
2121


In [202]:
print(final_predictions)
denormalized_prices = final_predictions * (max_price - min_price) + min_price

[ 0.27396934 -0.41983676 -0.35753069 ...  0.37767868 -0.41526481
  0.059342  ]
[ 49.60516267 -75.91929271 -64.64678275 ...  68.36841383 -75.09212841
  10.77445769]
