# Important version

In [None]:
!pip install scikit-learn==1.4.1.post1

# Imports

In [None]:
import yfinance as yf
import pandas as pd
from statsmodels.tsa.holtwinters import ExponentialSmoothing as HWES
from statsmodels.tsa.seasonal import seasonal_decompose
import statsmodels.api as sm
import matplotlib.pyplot as plt
import numpy as np
import sklearn.metrics as sk
from scipy import stats
from datetime import datetime
from statsmodels.stats.diagnostic import acorr_ljungbox
from statsmodels.tsa.stattools import adfuller, kpss

# Download data

In [None]:
df = yf.download("IBM", start= datetime(2022,10,1), end = datetime(2022,12,16),interval='1d')
df

# Data plot

In [None]:
df[['Open','Low','High','Close']].plot()

# Data analysis for Exponential Smoothing

## Additive decompositions

In [None]:
add_decomposition_open = seasonal_decompose(df[["Open"]], model='additive', period=1, extrapolate_trend='freq')
add_decomposition_open.plot()
plt.title("Additive seasonal decomposition of Open dataset")
plt.show()

In [None]:
add_decomposition_close = seasonal_decompose(df[["Close"]], model='additive', period=1, extrapolate_trend='freq')
add_decomposition_close.plot()
plt.title("Additive seasonal decomposition of Close dataset")
plt.show()

There is only trend, there is no information about seasonality

## Multiplicative decompositions

In [None]:
mul_decomposition_open = seasonal_decompose(df[["Open"]], model='multiplicative', period=1, extrapolate_trend='freq')
mul_decomposition_open.plot()
plt.title("Multiplicative seasonal decomposition of Open dataset")
plt.show()

In [None]:
mul_decomposition_close = seasonal_decompose(df[["Close"]], model='multiplicative', period=1, extrapolate_trend='freq')
mul_decomposition_close.plot()
plt.title("Multiplicative seasonal decomposition of Close dataset")
plt.show()

There is only trend, there is no information about seasonality

# Plots of autocorrelation

## Autocorrelation of Open dataset

In [None]:
sm.graphics.tsa.plot_acf(df[["Open"]],title="Autocorrection")
plt.show()

## Autocorrelation of Close dataset

In [None]:
sm.graphics.tsa.plot_acf(df[["Close"]],title="Autocorrection")
plt.show()

# Ljung-Box test of autocorrelation in residuals 
This will help with determining if residuals are autocorrelated. Data is not autocorrelated if p-value is greater than 0,05 this mean that residuals are independent.

## Ljung-Box for Open residuals

In [None]:
#Examplary lags value
acorr_ljungbox(df[['Open']],lags=20,return_df=True)

## Ljung-Box for Close residuals

In [None]:
#Examplary lags value
acorr_ljungbox(df[['Close']],lags=20,return_df=True)

# Augmented Dickey-Fuller (ADF) [More info](https://www.geeksforgeeks.org/machine-learning/augmented-dickey-fuller-adf)
Test used to determined whether time series is stationary. Stationarity ensures that forecast model does not produce misleading forecasts and inferences.

Interpreting results:
- ADF Statistic: If it's significantly lower than the critical values, reject the null.
- p-value: If p < 0.05, the series is likely stationary.
- Critical Values: Used for comparing with the ADF statistic

## ADF for Open dataset

In [None]:
adf_results = adfuller(df[['Open']])
print("ADF Statistic:", adf_results[0])
print("p-value:", adf_results[1])
print("Critical Values:")
for key, value in adf_results[4].items():
    print(f"   {key}: {value}")

## ADF for Close dataset

In [None]:
adf_results = adfuller(df[['Close']])
print("ADF Statistic:", adf_results[0])
print("p-value:", adf_results[1])
print("Critical Values:")
for key, value in adf_results[4].items():
    print(f"   {key}: {value}")

# Kwiatkowski-Phillips-Schmidt-Shin (KPSS) [More info](https://www.geeksforgeeks.org/python/kwiatkowski-phillips-schmidt-shin-kpss)

If p-value is smaller than 0,05 series is likely non-stationary

## KPSS test for Open data

In [None]:
stat, p_value, lags, crit = kpss(df[['Open']])
print("KPSS Statistic:", stat)
print("p-value:", p_value)
print("Critical Values:", crit)
print("Lags used:", lags)

## KPSS test for Close data

In [None]:
stat, p_value, lags, crit = kpss(df[['Close']])
print("KPSS Statistic:", stat)
print("p-value:", p_value)
print("Critical Values:", crit)
print("Lags used:", lags)

# Differentiating to make Open data stationary

In [None]:
open_diff = df[['Open']].diff()
open_diff = open_diff.dropna()

## Additive decomposition of differentiated data

In [None]:
add_decomposition_open = seasonal_decompose(open_diff, model='additive', period=1, extrapolate_trend='freq')
add_decomposition_open.plot()
plt.title("Additive seasonal decomposition")
plt.show()

## Autocorrelation of differentiated data

In [None]:
sm.graphics.tsa.plot_acf(open_diff,title="Autocorrection")
plt.show()

## Ljung-box test

In [None]:
acorr_ljungbox(open_diff,lags=20,return_df=True)

# KPSS test

In [None]:
stat, p_value, lags, crit = kpss(open_diff)
print("KPSS Statistic:", stat)
print("p-value:", p_value)
print("Critical Values:", crit)
print("Lags used:", lags)

# Differentiating to make Close data stationary

In [None]:
close_diff = df[['Close']].diff()
close_diff = close_diff.dropna()

## Additive decomposition of differentiated data

In [None]:
add_decomposition_open = seasonal_decompose(close_diff, model='additive', period=1, extrapolate_trend='freq')
add_decomposition_open.plot()
plt.title("Additive seasonal decomposition")
plt.show()

## Autocorrelation of differentiated data

In [None]:
sm.graphics.tsa.plot_acf(close_diff,title="Autocorrection")
plt.show()

## Ljung-box test

In [None]:
acorr_ljungbox(close_diff,lags=20,return_df=True)

## KPSS test 

In [None]:
stat, p_value, lags, crit = kpss(open_diff)
print("KPSS Statistic:", stat)
print("p-value:", p_value)
print("Critical Values:", crit)
print("Lags used:", lags)