# Web Scraping

In [16]:
#Importing the required libraries for web scraping
import pandas as pd
import requests
from bs4 import BeautifulSoup
import bs4
from fastnumbers import isfloat 
from fastnumbers import fast_float
from multiprocessing.dummy import Pool as ThreadPool 

In [17]:
#Data cleaning functions for web scraping

def ffloat(string): #Cleans the commas and % from the string given
    if string is None:
        return np.nan
    if type(string)==float or type(string)==np.float64:
        return string
    if type(string)==int or type(string)==np.int64:
        return string
    return fast_float(string.split(" ")[0].replace(',','').replace('%',''),
                      default=np.nan)

def ffloat_list(string_list): #Cleans the string from a whole list of string
    return list(map(ffloat,string_list))

def remove_multiple_spaces(string): #Strips the spaces from string
    if type(string)==str:
        return ' '.join(string.split())
    return string

def get_children(html_content): #Gets the child content from the html tag, usually for tables where <td> is a child of <tr>
    return [item for item in html_content.children if 
            type(item)==bs4.element.Tag or 
            len(str(item).replace("\n","").strip())>0]

In [18]:
response = requests.get("https://finance.yahoo.com/gainers", timeout=240) #Establishes a connection to the website   
response.status_code #If its more than 240, then the connection is not established

200

In [19]:
html = BeautifulSoup(response.content, "html.parser") #Gets the html content of the page

In [20]:
def get_table_simple(table,is_table_tag=True): #Changes the table data in the form of a list
    elems = table.find_all('tr') if is_table_tag else get_children(table)
    table_data = list()
    for row in elems:
        row_data = list()
        row_elems = get_children(row)
        for elem in row_elems:
            text = elem.text.strip().replace("\n","")
            text = remove_multiple_spaces(text)
            if len(text)==0:
                continue
            row_data.append(text)
        table_data.append(row_data)
    return table_data

In [21]:
init_list = get_table_simple(html) #Gets the table from top gained and saves it as init_list
top5list = [] #Creates an empty list
for x in range(6): #Selects the top 5 of the table from top gained (0-5 as 0 is the headings and labels)
    top5list.append(init_list[x]) #appends it to the top5 list
dftop5gainers = pd.DataFrame.from_records(top5list) #Change the list into a dataframe
print("Top 5 gainers currently are: ")
dftop5gainers

Top 5 gainers currently are: 


Unnamed: 0,0,1,2,3,4,5,6,7,8,9
0,Symbol,Name,Price (Intraday),Change,% Change,Volume,Avg Vol (3 month),Market Cap,PE Ratio (TTM),52 Week Range
1,OXY,Occidental Petroleum Corporation,20.79,+5.24,+33.70%,119.073M,49.28M,19.06B,,
2,SJMHY,SJM Holdings Limited,4.8300,+1.0300,+27.11%,279980,247,8.652B,,
3,ENBL,"Enable Midstream Partners, LP",6.43,+1.29,+25.10%,3.852M,3.152M,2.8B,8.53,
4,APA,Apache Corporation,16.07,+3.07,+23.62%,36.337M,24.892M,6.065B,,
5,RCL,Royal Caribbean Cruises Ltd.,69.44,+11.75,+20.37%,61.286M,24.512M,14.54B,79.00,


## Predict using LSTM (Hartalega)

In [None]:
import numpy as np
import math
import datetime
import matplotlib.pyplot as plt
from sklearn.linear_model import LinearRegression
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error,confusion_matrix, classification_report

import keras
from keras.preprocessing.sequence import TimeseriesGenerator
import tensorflow as tf

import pandas as pd
import pandas_datareader as web

In [None]:
#Start data and end date
start = datetime.datetime(2019,3,9)
end = datetime.datetime(2020,3,12)

#load the dataset
df = web.DataReader('5168.KL','yahoo',start,end)
df.to_csv('stock1_Har.csv')
df = pd.read_csv('stock1_Har.csv')

df.head()

In [None]:
#Find the row with max close price for documentation
#max close price is at 12 March 2020 which is 6.57
df.iloc[df['Close'].idxmax()]

In [None]:
#Find the row with minimum close price for documentation
#min close price is at 26 March 2020 which is 4.55
df.iloc[df['Close'].idxmin()]

In [None]:
#convert date column to datetitme and set axis to date and drop all other columns except close and date
df['Date'] = pd.to_datetime(df['Date'])
df.set_axis(df['Date'], inplace=True)
df.drop(columns=['Open', 'High', 'Low', 'Volume'], inplace=True)

In [None]:
from pandas.plotting import register_matplotlib_converters
register_matplotlib_converters()
#plot the data
plt.figure(figsize=(12,8))
plt.plot(df['Close'])
plt.title("Hartalega Holdings Berhad")
plt.xlabel('Date')
plt.ylabel('Price')
plt.show()

In [None]:
#get the values of close column and reshape it into an array
close_data = df['Close'].values
close_data = close_data.reshape((-1,1))

#set split percentage to 80%
split_percent = 0.80
split = int(split_percent*len(close_data))

#split close column
close_train = close_data[:split]
close_test = close_data[split:]

#split date column
date_train = df['Date'][:split]
date_test = df['Date'][split:]

print(len(close_train))
print(len(close_test))

In [None]:
#number of previous days' data to use, to predict the value for the next day
look_back = 15

#TimeseriesGenerator is function takes in data points and generate batches of temporal data for training or validation.
train_generator = TimeseriesGenerator(close_train, close_train, length=look_back, batch_size=20)     
test_generator = TimeseriesGenerator(close_test, close_test, length=look_back, batch_size=1)


In [None]:
from keras.models import Sequential
from keras.layers import LSTM, Dense

model = Sequential()
model.add(
    LSTM(10,
        activation='relu',
        input_shape=(look_back,1))
)
model.add(Dense(1))
#use adam optimiser and mean squared loss function for 25 epochs
model.compile(optimizer='adam', loss='mse')

num_epochs = 25
#use fit_generator as data generator is used
model.fit_generator(train_generator, epochs=num_epochs, verbose=1)

In [None]:
prediction = model.predict_generator(test_generator)

#reshape it back
close_train = close_train.reshape((-1))
close_test = close_test.reshape((-1))
prediction = prediction.reshape((-1))


#plot the graph with the predicted value
plt.figure(figsize=(12,8))
plt.plot(df['Close'], color='blue')
plt.plot(date_test[:len(prediction)], prediction, color='red')
plt.legend(['actual','predicted'])
plt.title("Hartalega Holdings Berhad")
plt.xlabel('Date')
plt.ylabel('Price')


plt.show()


In [None]:
#Calculate mean squared loss
model.evaluate(test_generator)

In [None]:
close_data = close_data.reshape((-1))

#forecasting by feed the model past n days (look_back) and get future's value, to get days after the predicted future, the model is feed in pat n-1 days of value along with the predicted value
def predict(num_prediction, model):
    prediction_list = close_data[-look_back:]
    
    for _ in range(num_prediction):
        x = prediction_list[-look_back:]
        x = x.reshape((1, look_back, 1))
        out = model.predict(x)[0][0]
        prediction_list = np.append(prediction_list, out)
    prediction_list = prediction_list[look_back-1:]
        
    return prediction_list
    
def predict_dates(num_prediction):
    last_date = df['Date'].values[-1]
    prediction_dates = pd.date_range(last_date, periods=num_prediction+1).tolist()
    return prediction_dates

#predict 30 days
num_prediction = 30
forecast = predict(num_prediction, model)
forecast_dates = predict_dates(num_prediction)

In [None]:
#plot the data with the predicted 30 days
plt.figure(figsize=(12,8))
plt.plot(df['Close'], color='blue')
plt.plot(forecast_dates[:len(forecast)],forecast,color='orange')
plt.legend(['actual','forecasted'])
plt.title("Hartalega Holdings Berhad")
plt.xlabel('Date')
plt.ylabel('Price')
plt.show()

In [None]:
#get the prediction for 1 year
for i in range(12):
    num_prediction = 30
    forecast = predict(num_prediction, model)
    forecast_dates = predict_dates(num_prediction)

forecast[-1]

## Predict using LSTM (IHH)

In [None]:
import numpy as np
import math
import datetime
import matplotlib.pyplot as plt
from sklearn.linear_model import LinearRegression
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error,confusion_matrix, classification_report

import keras
from keras.preprocessing.sequence import TimeseriesGenerator
import tensorflow as tf

import pandas as pd
import pandas_datareader as web

In [None]:
#Start data and end date
start = datetime.datetime(2019,3,9)
end = datetime.datetime(2020,3,12)

#load the dataset
df = web.DataReader('5225.KL','yahoo',start,end)
df.to_csv('stock2_IHH.csv')
df = pd.read_csv('stock2_IHH.csv')

df.tail()

In [None]:
#Find the row with max close price for documentation
df.iloc[df['Close'].idxmax()]

In [None]:
#Find the row with minimum close price for documentation
df.iloc[df['Close'].idxmin()]

In [None]:
#convert date column to datetitme and set axis to date and drop all other columns except close and date
df['Date'] = pd.to_datetime(df['Date'])
df.set_axis(df['Date'], inplace=True)
df.drop(columns=['Open', 'High', 'Low', 'Volume'], inplace=True)

In [None]:
from pandas.plotting import register_matplotlib_converters
register_matplotlib_converters()
#plot the data
plt.figure(figsize=(12,8))
plt.plot(df['Close'])
plt.title("IHH Healthcare Berhad")
plt.xlabel('Date')
plt.ylabel('Price')
plt.show()

In [None]:
#get the values of close column and reshape it into an array
close_data = df['Close'].values
close_data = close_data.reshape((-1,1))
#set split percentage to 80%
split_percent = 0.80
split = int(split_percent*len(close_data))
#split close column
close_train = close_data[:split]
close_test = close_data[split:]
#split date column
date_train = df['Date'][:split]
date_test = df['Date'][split:]

print(len(close_train))
print(len(close_test))

In [None]:
#number of previous days' data to use, to predict the value for the next day
look_back = 15

#TimeseriesGenerator is function takes in data points and generate batches of temporal data for training or validation.
train_generator = TimeseriesGenerator(close_train, close_train, length=look_back, batch_size=20)     
test_generator = TimeseriesGenerator(close_test, close_test, length=look_back, batch_size=1)


In [None]:
from keras.models import Sequential
from keras.layers import LSTM, Dense

model = Sequential()
model.add(
    LSTM(10,
        activation='relu',
        input_shape=(look_back,1))
)
model.add(Dense(1))
#use adam optimiser and mean squared loss function for 25 epochs
model.compile(optimizer='adam', loss='mse')

num_epochs = 25
#use fit_generator as data generator is used
model.fit_generator(train_generator, epochs=num_epochs, verbose=1)

In [None]:
prediction = model.predict_generator(test_generator)

#reshape it back for plotting
close_train = close_train.reshape((-1))
close_test = close_test.reshape((-1))
prediction = prediction.reshape((-1))

#plot the graph with the predicted value
plt.figure(figsize=(12,8))
plt.plot(df['Close'], color='blue')
plt.plot(date_test[:len(prediction)], prediction, color='red')
plt.legend(['actual','predicted'])
plt.title("IHH Healthcare Berhad")
plt.xlabel('Date')
plt.ylabel('Price')
#plt.plot(date_test[:len(close_test)], close_test, color='green')


plt.show()


In [None]:
#Calculate mean squared loss
model.evaluate(test_generator)

In [None]:
close_data = close_data.reshape((-1))
#forecasting by feed the model past n days (look_back) and get future's value, to get days after the predicted future, the model is feed in pat n-1 days of value along with the predicted value
def predict(num_prediction, model):
    prediction_list = close_data[-look_back:]
    
    for _ in range(num_prediction):
        x = prediction_list[-look_back:]
        x = x.reshape((1, look_back, 1))
        out = model.predict(x)[0][0]
        prediction_list = np.append(prediction_list, out)
    prediction_list = prediction_list[look_back-1:]
        
    return prediction_list
    
def predict_dates(num_prediction):
    last_date = df['Date'].values[-1]
    prediction_dates = pd.date_range(last_date, periods=num_prediction+1).tolist()
    return prediction_dates

#predict 30 days
num_prediction = 30
forecast = predict(num_prediction, model)
forecast_dates = predict_dates(num_prediction)

In [None]:
#plot the data with the predicted 30 days
plt.figure(figsize=(12,8))
plt.plot(df['Close'], color='blue')
plt.plot(forecast_dates[:len(forecast)],forecast,color='orange')
plt.legend(['actual','forecasted'])
plt.title("IHH Healthcare Berhad")
plt.xlabel('Date')
plt.ylabel('Price')
plt.show()

In [None]:
#get the prediction for 1 year
num_prediction = 365
forecast = predict(num_prediction, model)
forecast_dates = predict_dates(num_prediction)
    
forecast[-1]

## Predict Using Linear Regression (Petronas Chemicals)

In [None]:
#Importing the required libraries 
import pandas as pd
import math
import numpy as np
import matplotlib.pyplot as plt
import pandas_datareader as web
from sklearn import metrics
from sklearn.linear_model import LinearRegression
from sklearn.model_selection import train_test_split

# Analysis

In [None]:
#Reads the data and changes it into a dataframe
df5183 = web.DataReader('5183.KL', data_source='yahoo', start='2011-03-12',end='2020-03-12')

In [None]:
#Creates a new column to show the difference between the open and close of the data (how much it rises or falls in a day)
df5183['Difference'] = df5183['Open'] - df5183['Close']
df5183.head()

In [None]:
df5183.tail()

In [None]:
# Highest changes in stock price
print("Max increase in MYR: RM", round(df5183['Difference'].max(), 2))
print("Max decrease in MYR: RM",round(df5183['Difference'].min(), 2))

In [None]:
# Max and min stock price
print("Max stock price: MYR", round(df5183['Adj Close'].max(), 2))
print("Min stock price: MYR", round(df5183['Adj Close'].min(), 2))

In [None]:
# Moving average of stock price
print("The moving average of stock price: ")
df5183['Adj Close'].sum() / len(df5183['Adj Close'])

In [None]:
#Plots the figure based on adjusted close 
plt.figure(figsize = (12,8))
plt.plot(df5183['Adj Close'])
plt.xlabel("Year")
plt.ylabel("Price")
plt.show()

In [None]:
#Creates a csv for the dataframe
df5183.to_csv('pchem.csv')

# Training

In [None]:
#Creates a new list called dfPre, this is where we do the predictions
dfPre = []
dfPre = df5183[['Adj Close']]

In [None]:
print(dfPre.head())

In [None]:
#predict n days into the future, in this case, 30
forecast_out = 30 
#Creates a new column that is shifted based on the forecast_out
dfPre['Prediction'] = dfPre[['Adj Close']].shift(-forecast_out) 
print(dfPre.head())

In [None]:
X = np.array(dfPre.drop(['Prediction'], 1))
len(X)  #Checks the amount of data

In [None]:
#reserving 60 data for testing
X_new = X[0:2165] #Creates a new X ommitting 60 units of the data
len(X_new) + 60 #Check if the amount of data is correct 

In [None]:
y = np.array(dfPre['Prediction']) #Creates a y with only the prediction column
y_new = y[:-60] #Takes away the last 60 columns 
len(y_new)

In [None]:
#Split the data into train and test with a ratio of 1:5
X_train, X_test, y_train, y_test = train_test_split(X_new, y_new, test_size = 0.2) 

In [None]:
y = np.array(dfPre['Prediction'])
y = y[:-forecast_out] #Takes away the last n columns which were supposed to be predicted
print(y)
len(y)

In [None]:
lr = LinearRegression() #Creates the model
lr.fit(X_train, y_train) #Trains the model

# Testing

In [None]:
y_pre = lr.predict(X[2195:2225]) #Creates a list that stores the predictions of the last 30 days
print("Predicted last 30 days")
print(y_pre)

In [None]:
confidence = lr.score(X_test, y_test) #Checks the confidence score
print("Confidence Score: ", confidence)

In [None]:
x_forecast = np.array(dfPre.drop(['Prediction'], 1))[-forecast_out:] #Checks the actual last 30 days
print("Actual last 30 days: ")
print(x_forecast)

In [None]:
#Creates a graph that compares the actual with predicted results
plt.figure(figsize = (12,8))
plt.plot(y_pre, color='r')
plt.plot(x_forecast, color='b')
plt.legend(['Forecasted', 'Actual'])
plt.xlabel("Days")
plt.ylabel("Price")

In [None]:
print("Mean Squared Error: ", metrics.mean_squared_error(y_pre, x_forecast)) #Checks the mse

# Prediction

In [None]:
y_pre2 = [] 
y_pre2.append(np.array(y_pre)) #Creates 12 months worth of predicted data to predict a year into the future
for x in range(11): 
    y_pre_alt = y_pre2[x]
    y_pre_in = lr.predict(y_pre_alt.reshape(-1, 1))
    y_pre2.append(np.array(y_pre_in))

In [None]:
forward = []
for x in y_pre2: #Takes the 12 months of data and changes them from a 2-D list to a 1-D list
    for y in x:
        forward.append(y)

In [None]:
#The stock prices predicted from 1 day into the future, to a year into the future
plt.figure(figsize = (12,8))
plt.plot(forward)
plt.xlabel("Days into the Future")
plt.ylabel("Price")

In [None]:
print("Final Value: ")
print(forward[len(forward)-1])

## Predict Using Linear Regression (Maxis Communications)

In [None]:
#Importing the required libraries 
import pandas as pd
import math
import numpy as np
import matplotlib.pyplot as plt
import pandas_datareader as web
from sklearn.linear_model import LinearRegression
from sklearn.model_selection import train_test_split
from sklearn import metrics

# Analysis

In [None]:
#Reads the data and changes it into a dataframe
df6012 = web.DataReader('6012.KL', data_source='yahoo', start='2011-03-12',end='2020-03-12') 

In [None]:
#Creates a new column to show the difference between the open and close of the data (how much it rises or falls in a day)
df6012['Difference'] = df6012['Open'] - df6012['Close']
df6012.head()

In [None]:
df6012.tail()

In [None]:
# Highest changes in stock price
print("Max increase in MYR: RM", round(df6012['Difference'].max(), 2))
print("Max decrease in MYR: RM",round(df6012['Difference'].min(), 2))

In [None]:
# Max and min stock price
print("Max stock price: MYR", round(df6012['Adj Close'].max(), 2))
print("Min stock price: MYR", round(df6012['Adj Close'].min(), 2))

In [None]:
# Moving average of stock price
print("The moving average of stock price: ")
df6012['Adj Close'].sum() / len(df6012['Adj Close'])

In [None]:
#Plots the figure based on adjusted close 
plt.figure(figsize = (12,8))
plt.plot(df6012['Adj Close'])
plt.xlabel("Year")
plt.ylabel("Price")
plt.show()

In [None]:
#Creates a csv for the dataframe
df6012.to_csv('maxis.csv')

# Training

In [None]:
#Creates a new list called dfPre, this is where we do the predictions
dfPre = []
dfPre = df6012[['Adj Close']]

In [None]:
print(dfPre.head())

In [None]:
#predict n days into the future, in this case, 30
forecast_out = 30 
#Creates a new column that is shifted based on the forecast_out
dfPre['Prediction'] = dfPre[['Adj Close']].shift(-forecast_out) 
print(dfPre.head())

In [None]:
X = np.array(dfPre.drop(['Prediction'], 1))
len(X) #Checks the amount of data

In [None]:
#reserving 60 data for testing
X_new = X[0:2165] #Creates a new X ommitting 60 units of the data
len(X_new) + 60 #Check if the amount of data is correct 

In [None]:
y = np.array(dfPre['Prediction']) #Creates a y with only the prediction column
y_new = y[:-60] #Takes away the last 60 columns 
len(y_new)

In [None]:
#Split the data into train and test with a ratio of 1:5
X_train, X_test, y_train, y_test = train_test_split(X_new, y_new, test_size = 0.2) 

In [None]:
y = np.array(dfPre['Prediction'])
y = y[:-forecast_out] #Takes away the last n columns which were supposed to be predicted
print(y)
len(y)

In [None]:
lr = LinearRegression() #Creates the model
lr.fit(X_train, y_train) #Trains the model

# Testing

In [None]:
y_pre = lr.predict(X[2195:2225]) #Creates a list that stores the predictions of the last 30 days
print("Predicted last 30 days")
print(y_pre)

In [None]:
confidence = lr.score(X_test, y_test) #Checks the confidence score
print("Confidence Score: ", confidence) 

In [None]:
x_forecast = np.array(dfPre.drop(['Prediction'], 1))[-forecast_out:] #Checks the actual last 30 days
print("Actual last 30 days: ")
print(x_forecast)

In [None]:
#Creates a graph that compares the actual with predicted results
plt.figure(figsize = (12,8))
plt.plot(y_pre, color='r')
plt.plot(x_forecast, color='b') 
plt.legend(['Forecasted', 'Actual'])
plt.xlabel("Days")
plt.ylabel("Price")

In [None]:
print("Mean Squared Error: ", metrics.mean_squared_error(y_pre, x_forecast)) #Checks the mse

# Prediction

In [None]:
y_pre2 = [] 
y_pre2.append(np.array(y_pre)) #Creates 12 months worth of predicted data to predict a year into the future
for x in range(11):
    y_pre_alt = y_pre2[x]
    y_pre_in = lr.predict(y_pre_alt.reshape(-1, 1))
    y_pre2.append(np.array(y_pre_in))

In [None]:
forward = []
for x in y_pre2: #Takes the 12 months of data and changes them from a 2-D list to a 1-D list
    for y in x:
        forward.append(y)

In [None]:
#The stock prices predicted from 1 day into the future, to a year into the future
plt.figure(figsize = (12,8))
plt.plot(forward)
plt.xlabel("Days into the Future")
plt.ylabel("Price")

In [None]:
print("Final Value: ")
print(forward[len(forward)-1])

## Predict Using Linear Regression (Axiata)

In [None]:
import numpy as np
import math
import datetime
import matplotlib.pyplot as plt
from sklearn.linear_model import LinearRegression
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error,confusion_matrix, classification_report

import pandas as pd
import pandas_datareader as web

In [None]:
# Get and encode data
df = web.DataReader('6888.KL', data_source='yahoo', start='2011-05-12',end='2020-05-12')
df.to_csv('stock5.csv')
df = pd.read_csv('stock5.csv')

In [None]:
# Set date as index
df = df.set_index('Date')

# Analysis

In [None]:
df.tail()

In [None]:
from pandas.plotting import register_matplotlib_converters
register_matplotlib_converters()

# Plotting the data
plt.figure(figsize = (12,8))
plt.plot(df['Adj Close'])
plt.xlabel("Year")
plt.ylabel("Price")
plt.show()

In [None]:
# Highest changes in stock price
df['New']=df['Open']-df['Close']
print('Highest increase: MYR' + str(round(max(df['New']),2)) + ' on ' + df['New'].idxmax(axis = 1, skipna = True))
print('Highest decrease: MYR' + str(round(min(df['New']),2)) + ' on ' + df['New'].idxmin(axis = 1, skipna = True)

In [None]:
# Max and min stock price
print('Max: MYR' + str(round(max(df['Close']),2)))
print('Min: MYR' + str(round(min(df['Close']),2)))

In [None]:
# Moving average of stock price
df['Adj Close'].sum() / len(df['Adj Close'])

# Train

In [None]:
# Creating the dataset for the model, extract only the needed variable from original dataset
df2=[]
df2 = df[['Adj Close']]

print(df2.head())

In [None]:
# Generate new column for prediction (price 30 entries ahead)
forecast_out = 30
df2['Prediction'] = df2[['Adj Close']].shift(-forecast_out)

print(df2.head())

In [None]:
# Setting close as X
X = np.array(df2.drop(['Prediction'], 1))

# Reserving 60 data for testing
X_new = X[0:2123]

In [None]:
# Setting prediction as y
y = np.array(df2['Prediction'])

# Reserving 60 data for testing
y_new = y[:-60]

In [None]:
# Splitting data in test and train in 80/20
X_train, X_test, y_train, y_test = train_test_split(X_new, y_new, test_size = 0.2)

In [None]:
# Setting y as prediction, reserving last 30 for forecasting out
y = np.array(df2['Prediction'])
y = y[:-forecast_out]
print(y)
len(y)

# Validation

In [None]:
# Training linear regression
lr = LinearRegression()
lr.fit(X_train, y_train)

In [None]:
# Predicting the last 30 days with the model 
y_pre = lr.predict(X[2153:2183])
print("Predicted last 30 days")
print(y_pre)

In [None]:
# Calculating confidence score
confidence = lr.score(X_test, y_test)
print("Confidence Score: ", confidence)

In [None]:
# Actual data of last 30 days
x_forecast = np.array(df2.drop(['Prediction'], 1))[-forecast_out:]

In [None]:
# PLotting the predicted values with the actual values
plt.plot(y_pre, color='r')
plt.plot(x_forecast, color='b')
plt.legend(['Forecasted', 'Actual'])
plt.xlabel("Days")
plt.ylabel("Price")

In [None]:
# Calculate mse score
print("Mean Squared Error: ", mean_squared_error(y_pre, x_forecast))

# Prediction

In [None]:
# Repeat the prediction with the model 11 times
y_pre2 = [] 
y_pre2.append(np.array(y_pre))
for x in range(11):
    y_pre_alt = y_pre2[x]
    y_pre_in = lr.predict(y_pre_alt.reshape(-1, 1))
    y_pre2.append(np.array(y_pre_in))

In [None]:
# Combine the predicted data into one list
forward = []
for x in y_pre2:
    for y in x:
        forward.append(y)

In [None]:
# Append the predicted data to previous data (for plotting comparison)
xxx = np.append(X[1889:], forward)

In [None]:
#The stock prices predicted from 1 day into the future, to a year into the future
plt.figure(figsize = (12,8))
plt.plot(xxx, color='r')
plt.plot(X[1889:], color='b')
plt.xlabel("Days since 2019")
plt.ylabel("Price")