In [7]:
import predictor
import settings
import pandas as pd
from sklearn import metrics
from sklearn.linear_model import LinearRegression
from sklearn.model_selection import train_test_split
import numpy as np

# dataset example
# dataset = predictor.predict(url="", example=True, example_url=f"https://www.alphavantage.co/query?function=TIME_SERIES_INTRADAY&symbol=IBM&interval=5min&apikey={settings.settings['api_key']}")

url = "https://www.alphavantage.co/query?function=TIME_SERIES_DAILY_ADJUSTED&symbol=AAPL&outputsize=full&apikey=JFXYWSTSKDJB8MJ2"
# real dataset

dataset = predictor.predict(url, example=True)
dataset

Unnamed: 0,open,high,low,close,adjusted close,volume,dividend amount,split coefficient,date
0,148.130,150.42,146.925,150.18,150.180000,51804132.0,0.0,1.0,19991101
1,150.160,150.37,147.715,148.01,148.010000,58724070.0,0.0,1.0,19991101
2,152.305,152.70,149.970,151.29,151.290000,74829573.0,0.0,1.0,19991101
3,146.430,151.48,146.150,150.72,150.720000,80389400.0,0.0,1.0,19991101
4,149.130,149.87,147.290,148.79,148.790000,64218266.0,0.0,1.0,19991101
...,...,...,...,...,...,...,...,...,...
5799,84.620,88.37,84.000,88.31,0.672034,3721500.0,0.0,1.0,19991101
5800,82.060,85.37,80.620,83.62,0.636344,3384700.0,0.0,1.0,19991101
5801,81.620,83.25,81.000,81.50,0.620210,2932700.0,0.0,1.0,19991101
5802,78.000,81.69,77.310,80.25,0.610698,3564600.0,0.0,1.0,19991101


In [8]:
# close data contains close prices. open data the opposite - open prices
close_data = dataset.iloc[:, 3]
open_data = dataset.iloc[:, 0]

# to predict close price, we need open price. same with open price
# in dataset_with_closed we have close price to predict open price
# in dataset_with_opened we have open price to predict close price
dataset_with_closed, dataset_with_opened = dataset.drop("open", axis=1), dataset.drop("close", axis=1)

In [9]:
# splitting to train and test data to predict open price
x_train_open, x_test_open, y_train_open, y_test_open = train_test_split(
    dataset_with_closed,
    open_data,
    test_size=0.15,
    random_state=False
)

In [10]:
# splitting to train and test data to predict close price
x_train_close, x_test_close, y_train_close, y_test_close = train_test_split(
    dataset_with_opened,
    close_data,
    test_size=0.15,
    random_state=False
)

In [11]:
# linear regression for close price
regressor_close = LinearRegression()
regressor_close.fit(x_train_close, y_train_close)

LinearRegression()

In [12]:
# regressor_close.predict()

In [13]:
# predicting close price
close_predict = regressor_close.predict(x_test_close)
df = pd.DataFrame({"Actual close price": y_test_close, "Predicted close price": close_predict})
df

Unnamed: 0,Actual close price,Predicted close price
1519,110.88,109.591855
3457,88.37,89.677369
895,202.86,203.306004
5424,24.96,25.261651
2033,105.11,105.125139
...,...,...
352,137.27,136.524897
3957,88.00,87.732159
3738,161.36,163.462381
2714,493.42,494.579631


In [14]:
# MSE for predicted close price
print(f"Mean squared error for close price: {metrics.mean_squared_error(y_test_close, close_predict)}")

# MAE for predicted close price
print(f"Mean absolute error for close price: {metrics.mean_absolute_error(y_test_close, close_predict)}")

Mean squared error for close price: 2.3887200955436616
Mean absolute error for close price: 0.9030307069541926


In [15]:
# linear regression for open price
regressor_open = LinearRegression()
regressor_open.fit(x_train_open, y_train_open)

LinearRegression()

In [16]:
# predicting open price
open_predict = regressor_open.predict(x_test_open)
df = pd.DataFrame({"Actual open price": y_test_open, "Predicted open price": open_predict})
df

Unnamed: 0,Actual open price,Predicted open price
1519,109.88,109.074011
3457,88.83,89.999232
895,205.88,205.219280
5424,25.97,25.835989
2033,104.85,104.929126
...,...,...
352,136.60,135.847441
3957,88.59,88.077675
3738,161.71,163.902741
2714,490.96,493.147009


In [17]:
# MSE for predicted open price
print(f"Mean squared error for close price: {metrics.mean_squared_error(y_test_open, open_predict)}")

# MAE for predicted open price
print(f"Mean absolute error for close price: {metrics.mean_absolute_error(y_test_open, open_predict)}")

Mean squared error for close price: 2.323623115717956
Mean absolute error for close price: 0.909965043705712
