# Implement VAR to yahoo finance Week 1

In [1]:
import pandas as pd
import numpy as np

## Plotting
import matplotlib.pyplot as plt
import seaborn as sns

from datetime import datetime
from sklearn.metrics import mean_squared_error

from statsmodels.tsa.api import VAR
from statsmodels.tsa.stattools import acf, pacf, grangercausalitytests
from statsmodels.tsa.statespace.varmax import VARMAX

## Import TimeSeriesSplit
from sklearn.model_selection import TimeSeriesSplit
from sklearn.preprocessing import StandardScaler

import yfinance as yf

from statsmodels.tsa.stattools import adfuller
from statsmodels.tsa.stattools import kpss

## def of prediction_class

In [2]:
def prediction_class(df):
    df['Prediction'] = np.where(df['Close'] < df['Close'].shift(-1), 1, 0)
    return df

## Import from Yahoo Finance

In [26]:
Ford = yf.download('F', start = '2024-8-8', end = '2024-11-9')

[*********************100%***********************]  1 of 1 completed


In [27]:
Ford

Unnamed: 0_level_0,Open,High,Low,Close,Adj Close,Volume
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2024-08-08,9.80,10.25,9.80,10.13,9.994209,73007600
2024-08-09,10.08,10.17,9.97,10.10,9.964611,48860800
2024-08-12,10.10,10.10,9.84,9.86,9.727828,56118700
2024-08-13,9.92,10.15,9.77,10.14,10.004075,44766100
2024-08-14,10.16,10.24,10.05,10.18,10.043539,37026400
...,...,...,...,...,...,...
2024-11-04,10.23,10.57,10.23,10.36,10.221126,55323800
2024-11-05,10.33,10.64,10.30,10.60,10.457909,49309000
2024-11-06,10.95,11.23,10.84,11.19,11.039999,100781800
2024-11-07,11.01,11.11,10.83,10.96,10.960000,55407900


In [28]:
Ford['change_in_price'] = Ford['Close'].diff()
Ford

Unnamed: 0_level_0,Open,High,Low,Close,Adj Close,Volume,change_in_price
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
2024-08-08,9.80,10.25,9.80,10.13,9.994209,73007600,
2024-08-09,10.08,10.17,9.97,10.10,9.964611,48860800,-0.030000
2024-08-12,10.10,10.10,9.84,9.86,9.727828,56118700,-0.240001
2024-08-13,9.92,10.15,9.77,10.14,10.004075,44766100,0.280001
2024-08-14,10.16,10.24,10.05,10.18,10.043539,37026400,0.040000
...,...,...,...,...,...,...,...
2024-11-04,10.23,10.57,10.23,10.36,10.221126,55323800,0.139999
2024-11-05,10.33,10.64,10.30,10.60,10.457909,49309000,0.240001
2024-11-06,10.95,11.23,10.84,11.19,11.039999,100781800,0.589999
2024-11-07,11.01,11.11,10.83,10.96,10.960000,55407900,-0.230000


In [29]:
Toyota = yf.download('TM', start = '2024-8-8', end = '2024-11-9')
Toyota['change_in_price'] = Toyota['Close'].diff()

[*********************100%***********************]  1 of 1 completed


## Split the data

In [30]:
df_F_test = Ford[-5:].copy()
df_F_test

Unnamed: 0_level_0,Open,High,Low,Close,Adj Close,Volume,change_in_price
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
2024-11-04,10.23,10.57,10.23,10.36,10.221126,55323800,0.139999
2024-11-05,10.33,10.64,10.3,10.6,10.457909,49309000,0.240001
2024-11-06,10.95,11.23,10.84,11.19,11.039999,100781800,0.589999
2024-11-07,11.01,11.11,10.83,10.96,10.96,55407900,-0.23
2024-11-08,10.93,10.97,10.86,10.97,10.97,45463400,0.01


In [31]:
df = pd.concat([Ford['change_in_price'][: -5], Toyota['change_in_price'][: -5]], axis = 1)

In [32]:
df.columns = ['F_change_in_price', 'TM_change_in_price']

In [33]:
df

Unnamed: 0_level_0,F_change_in_price,TM_change_in_price
Date,Unnamed: 1_level_1,Unnamed: 2_level_1
2024-08-08,,
2024-08-09,-0.030000,-0.779999
2024-08-12,-0.240001,-0.619995
2024-08-13,0.280001,4.419998
2024-08-14,0.040000,4.619995
...,...,...
2024-10-28,0.300000,4.630005
2024-10-29,-0.960000,-0.600006
2024-10-30,0.060000,-0.819992
2024-10-31,-0.180000,-2.190002


## Apply VAR model

In [34]:
df_scaled = df.dropna().copy()
scaler = StandardScaler()
scaler_train = scaler.fit_transform(df.dropna().values)
scaled_train_set = pd.DataFrame(scaler_train, columns = df.columns)
model = VAR(scaled_train_set)
k = 22
results = model.fit(k)
# Inverse scale
pred = scaler.inverse_transform(results.forecast(scaled_train_set.values[-k:], steps=5))
predictions = pred[:, 0]

#model = VAR(df.dropna())
#results = model.fit(22)
#predictions = results.forecast(df.values[-22:], steps=5)[:, 0]

In [35]:
predictions

array([-0.33675875,  0.02999598, -0.42451333,  0.09268001, -0.07655244])

In [36]:
df_F_test

Unnamed: 0_level_0,Open,High,Low,Close,Adj Close,Volume,change_in_price
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
2024-11-04,10.23,10.57,10.23,10.36,10.221126,55323800,0.139999
2024-11-05,10.33,10.64,10.3,10.6,10.457909,49309000,0.240001
2024-11-06,10.95,11.23,10.84,11.19,11.039999,100781800,0.589999
2024-11-07,11.01,11.11,10.83,10.96,10.96,55407900,-0.23
2024-11-08,10.93,10.97,10.86,10.97,10.97,45463400,0.01


In [37]:
accuracy_TM = sum(predictions*df_F_test['change_in_price'] > 0)/5

In [38]:
accuracy_TM

0.2