In [None]:
import warnings
warnings.filterwarnings("ignore", category=UserWarning, module="statsmodels")

# Load libraries used throughout the problem set
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import statsmodels.api as sm
from scipy.stats import norm

## Short note before.

Some of the main takeaways from this problem set are
- Build intuition on core processes from this course
- Difference between conditional and unconditional distributions
- Stationarity and existence of moments (and the separation of these two).

# Exercise 1

Consider the ARCH(1) process given by <br> <br>
$x_{t} = \sigma_{t}z_{t}$ <br>
$\sigma_{t}^{2}=\omega + \alpha x_{t-1}^{2}$ <br> <br>
with $z_{t}$ i.i.d $\mathcal{N}(0,1)$


### Exercise 1.1 + 1.2
- Simulate an ARCH(1) process $\{x_{t}\}_{t=0}^{T}$ with $T=1000$, $x_{0}=0$, $\sigma^{2}=1$ and $\alpha=0.4$

- Plot the simulated data, histogram and density

In [None]:
class ARCH1():

    def __init__(self, α, ω=1, x0=0, T=1000, dist='Gaussian', v=6, seed=None):
        """
        A class to simulate and visualize an ARCH(1) process.
    
        Methods:
        simulate(): Simulate the ARCH(1) process.
        plot_results(): Plot simulated data (returns and vol), histogram+density and QQ plot
        plot_tail_comparison(): Plot simulated data for ARCH(1) process and another process y, as well as QQ plots for both.
        """
        # Set seed if supplied to replicate results
        if seed is not None:
            np.random.seed(seed)
        
        # Assign parameters
        self.α         = α    # Coefficient for the ARCH(1) process
        self.ω         = ω    # Constant term for the ARCH(1) process. Default is 1.
        self.T         = T    # Number of time periods for the simulation. Default is 1000.
        self.x0        = x0   # Initial value for the process. Default is 0.
        self.dist      = dist # Distribution to use for random variables ('Gaussian' or 't'). Default is 'Gaussian'.
        self.v         = v    # Degrees of freedom for the t-distribution (if used). Default is 6.
        
        # Assign x and σ2 processes
        self.x         = np.empty(T) 
        self.sigma2    = np.empty(T)
        
        # Choose which distribution to draw the errors from.
        if self.dist == "Gaussian":
            self.z = np.random.normal(size=self.T)
        elif self.dist == "t":
            if self.v > 2:  # Ensure that degrees of freedom is greater than 2 for scaling
                scale_factor = np.sqrt((self.v-2) / self.v) # Scale such that mean is 0 and var is 1
                self.z = np.random.standard_t(df=self.v, size=self.T) * scale_factor
            else:
                raise ValueError("Degrees of freedom must be greater than 2 for scaling. (Cauchy case not implemented here)")
        else:
            raise ValueError("Invalid distribution. Use 'Gaussian' or 't'.")
    
    
    def simulate(self):
        for t in range(self.T):
            if t == 0: # For t=0 we use the initial value x0 in the cond. var process to initialize.
                self.sigma2[t] = self.ω + self.α*self.x0**2
                self.x[t]      = np.sqrt(self.sigma2[t])*self.z[t]
            else: # Else we use x_t-1
                self.sigma2[t] = self.ω + self.α*self.x[t-1]**2
                self.x[t]      = np.sqrt(self.sigma2[t])*self.z[t]

        return self.x, self.sigma2

    
    def plot_results(self, figsize=(13, 13)):
        fig, axs = plt.subplots(2, 2, figsize=figsize)
        axs[0, 0].plot(self.x, lw=0.5, label="ARCH(1) process")
        axs[0, 0].legend()
        axs[0, 1].plot(self.sigma2, lw=0.5, label="Conditional Variance")
        axs[0, 1].legend()
        sns.histplot(self.x, stat="density", bins=int(self.T / 25), kde=True, label='Data', ax=axs[1, 0])
        sns.lineplot(x=self.x, y=norm.pdf(self.x, self.x.mean(), self.x.std()), color='red', label=f'N(s={self.x.std():.2f})', ax=axs[1, 0])
        sm.qqplot(self.x, line='q', fit=True, label='ARCH(1) - QQ plot', ax=axs[1, 1])
        axs[1, 1].legend()
        plt.suptitle(f"Alpha = {self.α}", y=0.91)  # y refers to spacing between suptitle and figures
        plt.legend()
        plt.show()
        
    def plot_tail_comparison(self, y, figsize=(13, 5)):
        fig2, axs2 = plt.subplots(1, 3, figsize=figsize)
        axs2[0].plot(self.x, lw=0.3, label="ARCH(1) process")
        axs2[0].plot(y, lw=0.3, label="iid N(0,V) process")
        axs2[0].legend()
        sm.qqplot(self.x, line='q', fit=True, label='ARCH(1) - QQ plot', ax=axs2[1])
        axs2[1].legend()
        sm.qqplot(y, line='q', fit=True, label='iid N(0,V)', ax=axs2[2])
        axs2[2].legend()
        plt.show()

#### Let us begin by simulating with $\alpha = 0.4$

In [None]:
arch1 = ARCH1(α=0.4, seed=123)
arch1.simulate()
arch1.plot_results(figsize=(14,14))

In [None]:
# Plot to zoom into tail of histogram and density
sns.histplot(arch1.x, stat="density", bins=int(1000/25), kde=True, label='Data')
sns.lineplot(x=arch1.x, y=norm.pdf(arch1.x, arch1.x.mean(), arch1.x.std()), color='red', label=f'N(s={arch1.x.std():.2f})')
plt.xlim(-5,-2)
plt.ylim(0,0.04)
plt.legend()
plt.show()

Comments

- While our model is conditionally normal distributed, the unconditional (marginal) distribution is not normal!
- Even for a modest value of $\alpha$, we have fatter unconditional tails

### Exercise 1.3
With $\alpha = 0.4$, it follows that $x_{t}$ is stationary with variance <br><br> $V(x) = \sigma^{2}/(1-\alpha) = 5/3$. <br><br> 
- Simulate an i.i.d $\mathcal{N}(0,V(X))$ series and compare the tails of the two distributions by simple plots

In [None]:
# Produce the N(0V(X)) series with V(X) = 5/3
V = 5/3
y = np.random.normal(0, np.sqrt(V), size=1000)

arch1.plot_tail_comparison(y)

Comments
- Even though both processes have the same unconditional variance (5/3), we see the difference in the tail behaviour due to the ARCH modelling

## Exercise 1.4

- Repeat 1-3 with $\alpha = 0.9$

In [None]:
α = 0.9

arch1_ex1_4 = ARCH1(α=α, seed=2023)
arch1_ex1_4.simulate()
arch1_ex1_4.plot_results()

In [None]:
V = 5/3
y = np.random.normal(0, np.sqrt(V), size=1000)

arch1_ex1_4.plot_tail_comparison(y)

Comments

- The higher $\alpha$ value allows for much fatter tails as evident by both the QQ plots and the ARCH process itself.

## Exercise 1.5
- Repeat questions 1-2 with $\alpha \in \{ 1,1.1,2 \}$

In [None]:
arch1_ex1_5_1 = ARCH1(α=1, seed=2023)
arch1_ex1_5_1.simulate()
arch1_ex1_5_1.plot_results()

In [None]:
arch1_ex1_5_2 = ARCH1(α=1.1, seed=2023)
arch1_ex1_5_2.simulate()
arch1_ex1_5_2.plot_results()

In [None]:
arch1_ex1_5_3 = ARCH1(α=2, seed=2023)
arch1_ex1_5_3.simulate()
arch1_ex1_5_3.plot_results()

Comments
- As $\alpha \geq 1$, the process $x_{t}$ no longer has a finite second order moment (variance). However it is still a stationary process!
- Critical value for stationarity of an ARCH(1) model is roughly 3.56. See lecture notes (Part I) for table of critical values.
- Tails are VERY fat now

# Exercise 2

Consider the ARCH(1) process given by <br> <br>
$x_{t} = \sigma_{t}z_{t}$ <br>
$\sigma_{t}^{2}=\omega + \alpha x_{t-1}^{2}$ <br> <br>
with $z_{t}$ i.i.d $t_{v}(0,1)$


In [None]:
arch1_ex2 = ARCH1(α=0.4, dist='t', seed=2023)
arch1_ex2.simulate()
arch1_ex2.plot_results()

In [None]:
V = 5/3
y = np.random.normal(0, np.sqrt(V), size=1000)

arch1_ex2.plot_tail_comparison(y)

Comments
- The t distribution takes more extreme values with a higher propability, which in turn allows for more mass in the unconditional tails
- Existance of moments still relies on $\alpha$, but also on $v$ now!
- We now have fatter tails than when we used Gaussian innovations. Consider the QQ plot below (especially y-axis) for the Gaussian case from exercise 1 and the new case with t-distributed innovations.

In [None]:
# Uncomment this if you can't find the models further up in the notebook
#arch1 = ARCH1(α=0.4, seed=123)
#arch1.simulate()
#arch1_ex2 = ARCH1(α=0.4, dist='t', seed=123)
#arch1_ex2.simulate()

fig, axs = plt.subplots(1, 3, figsize=(13,4))
axs[0].plot(arch1.x, lw=0.3, label="N dist errors")
axs[0].plot(arch1_ex2.x, lw=0.3, label="t dist errors")
axs[0].legend()
sm.qqplot(arch1.x, line='q', fit=True, label='N dist errors', ax=axs[1])
axs[1].legend()
sm.qqplot(arch1_ex2.x, line='q', fit=True, label='t dist errors', ax=axs[2])
axs[2].legend()
fig.suptitle("QQ-plots for alpha=0.4 when using N(0,1) and t_v(0,1) (v=6) distributed errors")
plt.show()

## Exercise 2.4

Repeat questions 2.1-2.3 with $\alpha = 0.9$

In [None]:
arch1_ex2_4 = ARCH1(α=0.9, dist='t', seed=2023)
arch1_ex2_4.simulate()
arch1_ex2_4.plot_results()

In [None]:
V = 5/3
y = np.random.normal(0, np.sqrt(V), size=1000)

arch1_ex2_4.plot_tail_comparison(y)

Comments
- For v=6, we still have that the second order moments are finite when $\alpha$ < 1
- Similar to 1.4, but now the t-distributions allows for more mass in the tails 
- More extreme tails

## Exercise 2.5

Repeat questions 1-2 with $\alpha \in \{ 1, 1.1, 2 \}$ and $v \in \{ 4, 8, 99 \}$

In [None]:
# Choose α and v to your liking.

α = 2
v = 4

arch1_ex2_5 = ARCH1(α=α, dist='t', v=v, seed=2023)
arch1_ex2_5.simulate()
arch1_ex2_5.plot_results()

Comments
- Try the different combinations to see how the plots change for different alpha and v values (For v = 6, you may even have alpha < 4.5 and still have a stationary process!)
- For $v \rightarrow \infty $, the t distribution will tend to the Gaussian - Note that it will still NOT be unconditional Gaussian. We are simply in the case as before (exercise 1)

# Exercise 3

Consider the stochastic process as given by

$x_{t} = \mu + \epsilon_{t}$

with $\epsilon_{t}$ an i.i.d sequence. With $\{x_{t}\}_{t=1}^{T}$ the average is given by $\bar{x} = T^{-1}\sum_{t=1}^{T}x_{t}$



## Exercise 3.1+3.2

* With $\epsilon_{t}$ i.i.d $\mathcal{N}(0,1)$ argue that $\bar{x} \xrightarrow p E(x_{t}) = \mu$
* Simulate $x_{t}$ for $T=1000$ and $\mu = 1$. Plot $\bar{x}$ as a function of N, $\bar{x}(N) = N^{-1}\sum_{t=1}^{N} x_{t}$ with $N=10,100,\dots,T/2,\dots,T$. Comment.


In [None]:
np.random.seed(123)
T = 1000
μ   = 1
ϵ = np.random.normal(size=T)
x_t = μ + ϵ

N_values = np.arange(10,T+1,10)
x_bar_values = [np.mean(x_t[:N]) for N in N_values]

plt.figure(figsize=(13,8))
plt.plot(N_values, x_bar_values, lw=0.3, label='$\\bar{x}(N)$')
plt.ylim(0.6,1.4)
plt.axhline(y=μ, color='r', linestyle='--', label='$\mu$')
plt.xlabel('N')
plt.ylabel('$\\bar{x}(N)$')
plt.title('Convergence of $\\bar{x}(N)$ to $\mu$')
plt.legend()
plt.show()

## Exercise 3.3

Assume that $\epsilon_{t}$ is i.i.d $t_{4}(0,1)$ distributed. Argue that $\bar{x} \xrightarrow p E(x_{t}) = \mu$ and repeat question 2

In [None]:
np.random.seed(123)
T = 1000
v = 4
ϵ = np.sqrt((v-2)/v)*np.random.standard_t(df=v, size=T)
x_t = μ + ϵ

N_values = np.arange(10,T+1,10)
x_bar_values = [np.mean(x_t[:N]) for N in N_values]

plt.figure(figsize=(13,8))
plt.plot(N_values, x_bar_values, lw=0.3, label='$\\bar{x}(N)$')
plt.ylim(0.6,1.4)
plt.axhline(y=μ, color='r', linestyle='--', label='$\mu$')
plt.xlabel('N')
plt.ylabel('$\\bar{x}(N)$')
plt.title('Convergence of $\\bar{x}(N)$ to $\mu$')
plt.legend()
plt.show()

## Exercise 3.4

Assume that $\epsilon_{t}$ is i.i.d $t_{1}(0,1)$ (or Cauchy) distributed. In this case, no convergence holds (why not?) - illustrate by simulations as in question 2Argue that $\bar{x} \rightarrow^{p} E(x_{t}) = \mu$ and repeat question 2

In [None]:
np.random.seed(123)
T = 1000
ϵ = np.random.standard_cauchy(size=T)
x_t = μ + ϵ

N_values = np.arange(10,T+1,10)
x_bar_values = [np.mean(x_t[:N]) for N in N_values]

plt.figure(figsize=(13,8))
plt.plot(N_values, x_bar_values, lw=0.3, label='$\\bar{x}(N)$')
#plt.ylim(0.6,1.4)
plt.axhline(y=μ, color='r', linestyle='--', label='$\mu$')
plt.xlabel('N')
plt.ylabel('$\\bar{x}(N)$')
plt.title('Convergence of $\\bar{x}(N)$ to $\mu$')
plt.legend()
plt.show()