# Facebook Prophet

## Setup and Import

As always, the first step is to import the required libraries and data. Since we do not want to run the SQL query every time, we can simply import the csv file we created in the first notebook.

In [2]:
# Import packages
import pandas as pd
import numpy as np
from fbprophet import Prophet
from fbprophet.make_holidays import make_holidays_df
from fbprophet.plot import plot_plotly, plot_components_plotly
from fbprophet.diagnostics import cross_validation
from fbprophet.diagnostics import performance_metrics
from fbprophet.plot import add_changepoints_to_plot
from fbprophet.serialize import model_to_json, model_from_json

from ipywidgets import HTML
from io import BytesIO
import base64
import itertools

import seaborn as sns
import matplotlib.pyplot as plt

import warnings
warnings.simplefilter("ignore")

from sklearn.metrics import mean_squared_error
from sklearn.metrics import mean_absolute_percentage_error

import statistics
import pickle



# Turn off the max column width so the images won't be truncated
pd.set_option('display.max_colwidth', None)
# Show all Columns
pd.set_option('display.max_columns', None)
pd. set_option('display.max_rows', None)
 
# Turning off the max column will display all the data
# if gathering into sets / array we might want to restrict to a few items
pd.set_option('display.max_seq_items', 50)
pd.set_option('display.width', 1000)


In [3]:
# Import dataframes for Stock Prices
df = pd.read_csv('../data/out.csv', parse_dates=['Date'])
stock_list_df = pd.read_csv('../data/stock_list.csv')

# Finding the biggest Companies

In [4]:
# Sort Stocks by Market Capitalization, get rid of the ETFs/ENAs and create a new Dataframe with the chosen Stocks
biggest = stock_list_df.sort_values(by=['MarketCapitalization'], ascending=False)
biggestc=biggest.dropna(axis=0, subset=('NewMarketSegment', ))
Cselect = biggestc.head(10)
Cselect

Unnamed: 0,SecuritiesCode,EffectiveDate,Name,Section/Products,NewMarketSegment,33SectorCode,33SectorName,17SectorCode,17SectorName,NewIndexSeriesSizeCode,NewIndexSeriesSize,TradeDate,Close,IssuedShares,MarketCapitalization,Universe0
3065,7203,20211230,TOYOTA MOTOR CORPORATION,First Section (Domestic),Prime Market,3700,Transportation Equipment,6,AUTOMOBILES & TRANSPORTATION EQUIPMENT,1,TOPIX Core30,20211230.0,2105.5,16314990000.0,34351210000000.0,True
2804,6758,20211230,SONY GROUP CORPORATION,First Section (Domestic),Prime Market,3650,Electric Appliances,9,ELECTRIC APPLIANCES & PRECISION INSTRUMENTS,1,TOPIX Core30,20211230.0,14475.0,1261082000.0,18254160000000.0,True
2867,6861,20211230,KEYENCE CORPORATION,First Section (Domestic),Prime Market,3650,Electric Appliances,9,ELECTRIC APPLIANCES & PRECISION INSTRUMENTS,1,TOPIX Core30,20211230.0,72280.0,243207700.0,17579050000000.0,True
2375,6098,20211230,"Recruit Holdings Co.,Ltd.",First Section (Domestic),Prime Market,9050,Services,10,"IT & SERVICES, OTHERS",1,TOPIX Core30,20211230.0,6972.0,1695960000.0,11824230000000.0,True
4158,9432,20211230,NIPPON TELEGRAPH AND TELEPHONE CORPORATION,First Section (Domestic),Prime Market,5250,Information & Communication,10,"IT & SERVICES, OTHERS",1,TOPIX Core30,20211230.0,3150.0,3622013000.0,11409340000000.0,True
3577,8035,20211230,Tokyo Electron Limited,First Section (Domestic),Prime Market,3650,Electric Appliances,9,ELECTRIC APPLIANCES & PRECISION INSTRUMENTS,1,TOPIX Core30,20211230.0,66280.0,157210900.0,10419940000000.0,True
4404,9984,20211230,SoftBank Group Corp.,First Section (Domestic),Prime Market,5250,Information & Communication,10,"IT & SERVICES, OTHERS",1,TOPIX Core30,20211230.0,5434.0,1722954000.0,9362531000000.0,True
1508,4063,20211230,"Shin-Etsu Chemical Co.,Ltd.",First Section (Domestic),Prime Market,3200,Chemicals,4,RAW MATERIALS & CHEMICALS,1,TOPIX Core30,20211230.0,19920.0,416662800.0,8299923000000.0,True
3732,8306,20211230,"Mitsubishi UFJ Financial Group,Inc.",First Section (Domestic),Prime Market,7050,Banks,15,BANKS,1,TOPIX Core30,20211230.0,624.9,13282000000.0,8299919000000.0,True
2707,6594,20211230,NIDEC CORPORATION,First Section (Domestic),Prime Market,3650,Electric Appliances,9,ELECTRIC APPLIANCES & PRECISION INSTRUMENTS,1,TOPIX Core30,20211230.0,13520.0,596284500.0,8061766000000.0,True


In [5]:
#Create a List of the chosen Stocks
c_list = Cselect['SecuritiesCode'].tolist()
c_list

[7203, 6758, 6861, 6098, 9432, 8035, 9984, 4063, 8306, 6594]

In [6]:
#Create a List of Names of the chosen Stocks
c_list_name = Cselect['Name'].tolist()
c_list_name

['TOYOTA MOTOR CORPORATION',
 'SONY GROUP CORPORATION',
 'KEYENCE CORPORATION',
 'Recruit Holdings Co.,Ltd.',
 'NIPPON TELEGRAPH AND TELEPHONE CORPORATION',
 'Tokyo Electron Limited',
 'SoftBank Group Corp.',
 'Shin-Etsu Chemical Co.,Ltd.',
 'Mitsubishi UFJ Financial Group,Inc.',
 'NIDEC CORPORATION']

# Profit Calculatíon

In [7]:
def outcome_forecast(train_df, forecast):
    last_close = train_df['Close'].iloc[-1]
    last_forecast = forecast['yhat'].iloc[-1]
    return round(last_forecast-last_close,2)

In [8]:
def outcome_actual(train_df, test_df, forecast):
    last_close = train_df['Close'].iloc[-1]
    date_last_forecast = forecast['ds'].iloc[-1]
    actual_test = test_df[test_df['Date']==date_last_forecast]['Close'].iloc[-1]
    return round(actual_test-last_close,2)
    

In [9]:
def roi_forecast(train_df, forecast):
    last_close = train_df['Close'].iloc[-1]
    last_forecast = forecast['yhat'].iloc[-1]
    return round((last_forecast-last_close)/last_close,2)

In [10]:
def roi_actual(train_df, test_df, forecast):
    last_close = train_df['Close'].iloc[-1]
    date_last_forecast = forecast['ds'].iloc[-1]
    actual_test = test_df[test_df['Date']==date_last_forecast]['Close'].iloc[-1]
    return round((actual_test-last_close)/last_close,2)
    

In [11]:
of_list=[]
roi_f_list=[]
oa_list=[]
roi_a_list=[]

for Code in c_list:
    forecast=pd.read_csv(f'forecast_{Code}.csv')
    cols = ['Date', 'Open', 'High', 'Low', 'Close', 'Volume', 'SecuritiesCode', 'Target', '17SectorName']
    STOCK = df[df.SecuritiesCode==Code][cols].set_index("Date")

    #Select Testsize in Days
    test_size= 30
    train_df = STOCK[:-test_size].reset_index()
    test_df = STOCK[-test_size:].reset_index()
    
    of=outcome_forecast(train_df, forecast)
    of_list.append(of)
    
    roi_f=roi_forecast(train_df, forecast)
    roi_f_list.append(roi_f)
    
    oa=outcome_actual(train_df,test_df,forecast)
    oa_list.append(oa)
                      
    roi_a=roi_actual(train_df, test_df, forecast)
    roi_a_list.append(roi_a)
    

# Results

In [12]:
df_outcome= pd.DataFrame(list(zip(c_list, c_list_name, of_list, roi_f_list, oa_list, roi_a_list, )),columns=['SecuritieCode', 'Name', 'Profit_per_Stock_forecast', 'ROI_forecast', 'Profit_per_Stock_actual', 'ROI_actual'])

In [13]:
df_outcome

Unnamed: 0,SecuritieCode,Name,Profit_per_Stock_forecast,ROI_forecast,Profit_per_Stock_actual,ROI_actual
0,7203,TOYOTA MOTOR CORPORATION,51.13,0.03,101.0,0.05
1,6758,SONY GROUP CORPORATION,582.93,0.05,1525.0,0.12
2,6861,KEYENCE CORPORATION,-1134.12,-0.02,6230.0,0.09
3,6098,"Recruit Holdings Co.,Ltd.",723.61,0.1,361.0,0.05
4,9432,NIPPON TELEGRAPH AND TELEPHONE CORPORATION,233.17,0.07,46.0,0.01
5,8035,Tokyo Electron Limited,2172.88,0.04,11860.0,0.23
6,9984,SoftBank Group Corp.,-899.18,-0.13,144.0,0.02
7,4063,"Shin-Etsu Chemical Co.,Ltd.",139.97,0.01,725.0,0.04
8,8306,"Mitsubishi UFJ Financial Group,Inc.",41.92,0.06,-26.8,-0.04
9,6594,NIDEC CORPORATION,-529.36,-0.04,840.0,0.07


In [14]:
df_outcome.sort_values('ROI_forecast', ascending=False).reset_index(drop=True)

Unnamed: 0,SecuritieCode,Name,Profit_per_Stock_forecast,ROI_forecast,Profit_per_Stock_actual,ROI_actual
0,6098,"Recruit Holdings Co.,Ltd.",723.61,0.1,361.0,0.05
1,9432,NIPPON TELEGRAPH AND TELEPHONE CORPORATION,233.17,0.07,46.0,0.01
2,8306,"Mitsubishi UFJ Financial Group,Inc.",41.92,0.06,-26.8,-0.04
3,6758,SONY GROUP CORPORATION,582.93,0.05,1525.0,0.12
4,8035,Tokyo Electron Limited,2172.88,0.04,11860.0,0.23
5,7203,TOYOTA MOTOR CORPORATION,51.13,0.03,101.0,0.05
6,4063,"Shin-Etsu Chemical Co.,Ltd.",139.97,0.01,725.0,0.04
7,6861,KEYENCE CORPORATION,-1134.12,-0.02,6230.0,0.09
8,6594,NIDEC CORPORATION,-529.36,-0.04,840.0,0.07
9,9984,SoftBank Group Corp.,-899.18,-0.13,144.0,0.02


# Forcasted Profit

In [15]:
df_outcome_for= df_outcome[['Name', 'Profit_per_Stock_forecast', 'ROI_forecast']]

In [28]:
Top_5_for = df_outcome.sort_values('ROI_forecast', ascending=False).reset_index(drop=True).head(5)
Top_5_for['Profit_20%']=Top_5_for['ROI_forecast']*2000000
Top_5_for= Top_5_for[['Name','ROI_forecast','ROI_actual', 'Profit_20%']]
Top_5_for

Unnamed: 0,Name,ROI_forecast,ROI_actual,Profit_20%
0,"Recruit Holdings Co.,Ltd.",0.1,0.05,200000.0
1,NIPPON TELEGRAPH AND TELEPHONE CORPORATION,0.07,0.01,140000.0
2,"Mitsubishi UFJ Financial Group,Inc.",0.06,-0.04,120000.0
3,SONY GROUP CORPORATION,0.05,0.12,100000.0
4,Tokyo Electron Limited,0.04,0.23,80000.0


In [17]:
ROI_f = Top_5_for['Profit_20%'].sum()
ROI_f

640000.0

# Actual Profit on Forecasted Stocks

In [29]:
df_outcome_for_act= df_outcome[['Name','ROI_forecast', 'Profit_per_Stock_actual', 'ROI_actual']]

In [43]:
Top_5_for_act = df_outcome_for_act.sort_values('ROI_forecast', ascending=False).reset_index(drop=True).head(5)
Top_5_for_act['Profit']=Top_5_for_act['ROI_actual']*2000000
Top_5_for_act= Top_5_for_act[['Name', 'Profit']]
Top_5_for_act

Unnamed: 0,Name,Profit
0,"Recruit Holdings Co.,Ltd.",100000.0
1,NIPPON TELEGRAPH AND TELEPHONE CORPORATION,20000.0
2,"Mitsubishi UFJ Financial Group,Inc.",-80000.0
3,SONY GROUP CORPORATION,240000.0
4,Tokyo Electron Limited,460000.0


In [31]:
ROI_f_a = Top_5_for_act['Profit_20%'].sum()
ROI_f_a

740000.0

# Actual Profit on best performing Stocks

In [38]:
df_outcome_act= df_outcome[['Name', 'Profit_per_Stock_actual', 'ROI_actual', 'ROI_forecast']]

In [42]:
Top_5_act=df_outcome_act.sort_values('ROI_actual', ascending=False).reset_index(drop=True).head(5)
Top_5_act['Profit']=Top_5_act['ROI_actual']*2000000
Top_5_act= Top_5_act[['Name', 'ROI_forecast','ROI_actual', 'Profit']]
Top_5_act

Unnamed: 0,Name,ROI_forecast,ROI_actual,Profit
0,Tokyo Electron Limited,0.04,0.23,460000.0
1,SONY GROUP CORPORATION,0.05,0.12,240000.0
2,KEYENCE CORPORATION,-0.02,0.09,180000.0
3,NIDEC CORPORATION,-0.04,0.07,140000.0
4,TOYOTA MOTOR CORPORATION,0.03,0.05,100000.0


In [34]:
ROI_a = Top_5_act['Profit_20%'].sum()
ROI_a

1120000.0

# Error-Analysis

We will have a look at Stock 8035 Tokyo Electron Limited to see why Facebook Prophet did not well in Predicting.

In [35]:
Code = 8035
cols = ['Date', 'Open', 'High', 'Low', 'Close', 'Volume', 'SecuritiesCode', 'Target', '17SectorName']
STOCK = df[df.SecuritiesCode==Code][cols].set_index("Date")

#Select Testsize in Days
test_size= 30
train_df = STOCK[:-test_size].reset_index()
test_df = STOCK[-test_size:].reset_index()

In [None]:
df= STOCK.reset_index()
df['Date'] = pd.to_datetime(df.Date)
df.index = df['Date']
plt.figure(figsize=(16,8))
plt.plot(df['Close'], label='Close Price history',color='r')
plt.xlabel('Date',size=20)
plt.ylabel('Stock Price Close',size=20)
plt.title('Stock Price over the Years',size=25)

In [None]:

    train_df['Date'] = pd.to_datetime(train_df.Date)
    train_df.index = train_df['Date']

    test_df['Date'] = pd.to_datetime(test_df.Date)
    test_df.index = test_df['Date']
    
    forecast=pd.read_csv('forecast_8035.csv')
    forecast_valid = forecast['yhat'][-test_size:]
    
    test_df['Predictions'] = forecast_valid.values
    plt.figure(figsize=(16,8))
    plt.plot(train_df['Close'])
    plt.plot(test_df[['Close', 'Predictions']])
    plt.xlabel('Date',size=20)
    plt.ylabel('Stock Price',size=20)
    plt.title('Stock Price Prediction by FB Prophet',size=20)
    plt.legend(['Model Training Data','Actual Data','Predicted Data'])
