In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import statsmodels.api as sm
from scipy import stats
from scipy.stats import t
from scipy.optimize import minimize
import seaborn as sns

from statsmodels.tsa.arima_process import ArmaProcess
from statsmodels.graphics.tsaplots import plot_acf
from statsmodels.graphics.tsaplots import plot_pacf

<font size="5">**Question 1**

In [None]:
q1 = pd.read_csv(r'problem1.csv')

In [None]:
x1 = q1['y'] 
x2 = q1['x']
x2 = sm.add_constant(x2)
m1 = x1.mean()
m2 = x2['x'].mean()
cov = np.cov(x1,x2['x']) #covariance matrix

In [None]:
ols = sm.OLS(x1, x2).fit()
print(ols.summary())

In [None]:
print("Conditional beta_1: ", m1 - (cov[0,1]/cov[1,1])*m2)
print("Conditional beta_0: ", cov[0,1]/cov[1,1])
print()
print("OLS beta_1: ", ols.params[0])
print("OLS beta_0: ", ols.params[1])

<font size="3">Therefore, the estimation of conditional distribution of the Multivariate Normal is same as the OLS estimation.

<font size="5">**Question 2**

In [None]:
q2 = pd.read_csv(r'problem2.csv')

In [None]:
x_q2 = q2['x']
x_q2 = sm.add_constant(x_q2)
y_q2 = q2['y']
print("Mean of x: ", x_q2['x'].mean())
print("Standard deviation of x: ", np.std(x_q2['x']))

In [None]:
ols_q2 = sm.OLS(y_q2, x_q2).fit()
print(ols_q2.summary())

In [None]:
plt.figure(figsize = (7,7))
y_hat = ols_q2.predict(x_q2)
error = y_q2 - y_hat
error_dist = error.plot.kde(label = "Normal Residual")

In [None]:
def MLE_Norm(parameters):
  const, beta, std_dev = parameters
  pred = const + beta*x_q2['x']

  LL = np.sum(stats.norm.logpdf(y_q2, pred, std_dev))
  neg_LL = -1*LL
  return neg_LL

In [None]:
mle_model = minimize(MLE_Norm, np.array([1,1,1]), method='L-BFGS-B')
mle_model

<font size="3">MLE estimator is same as OLS.

In [None]:
t.fit(t.rvs(df=99, size=100, loc=0, scale=1))

In [None]:
plt.figure(figsize = (7,7))
y_hat = ols_q2.predict(x_q2)
error = y_q2 - y_hat
error_dist = error.plot.kde(label = "Normal Residual")

#MLE under the auusmption of normality
mu_hat = x_q2['x'].mean()
sigma_hat = x_q2['x'].std()
x_dom = np.linspace(mu_hat - 3*sigma_hat, mu_hat + 3*sigma_hat, 100)
plt.plot(x_dom, stats.norm.pdf(x_dom, mu_hat, sigma_hat), label = "MLE normality")
plt.legend()

<font size="3">Based on the graph, we can see that MLE fits better in t-distribition.

<font size="5">**Question 3**

In [None]:
#AR(1)
plt.subplot(2,1,1)
ar1 = np.array([1, -0.6])
ma1 = np.array([1])
AR_object1 = ArmaProcess(ar1, ma1)
simulated_data_1 = AR_object1.generate_sample(nsample=1000)
plt.plot(simulated_data_1)

In [None]:
plot_acf(simulated_data_1, alpha=1, lags=20)
plt.title("AR(1) ACF")
plt.show()
plot_pacf(simulated_data_1, lags=20)
plt.title("AR(1) PACF")
plt.show()

In [None]:
#AR(2)
plt.subplot(2,1,1)
ar2 = np.array([1, -0.6, 0.3])
ma2 = np.array([1])
AR_object2 = ArmaProcess(ar2, ma2)
simulated_data_2 = AR_object2.generate_sample(nsample=5000)
plt.plot(simulated_data_2)

In [None]:
plot_acf(simulated_data_2, alpha=1, lags=20)
plt.title("AR(2) ACF")
plt.show()
plot_pacf(simulated_data_2, lags=20)
plt.title("AR(2) PACF")
plt.show()

In [None]:
#AR(3)
plt.subplot(2,1,1)
ar3 = np.array([1, -0.6, 0.3, -0.3])
ma3 = np.array([1])
AR_object3 = ArmaProcess(ar3, ma3)
simulated_data_3 = AR_object3.generate_sample(nsample=1000)
plt.plot(simulated_data_3)

In [None]:
plot_acf(simulated_data_3, alpha=1, lags=20)
plt.title("AR(3) ACF")
plt.show()
plot_pacf(simulated_data_3, lags=20)
plt.title("AR(3) PACF")
plt.show()

In [None]:
#MA(1)
plt.subplot(2,1,1)
ar1 = np.array([1])
ma1 = np.array([1, 0.6])
MA_object1 = ArmaProcess(ar1, ma1)
simulated_data_ma1 = MA_object1.generate_sample(nsample=1000)
plt.plot(simulated_data_1)

In [None]:
plot_acf(simulated_data_ma1, alpha=1, lags=20)
plt.title("MA(1) ACF")
plt.show()
plot_pacf(simulated_data_ma1, lags=20)
plt.title("MA(1) PACF")
plt.show()

In [None]:
#MA(2)
plt.subplot(2,1,1)
ar2 = np.array([1])
ma2 = np.array([1, 0.6, -0.3])
MA_object2 = ArmaProcess(ar2, ma2)
simulated_data_ma2 = MA_object2.generate_sample(nsample=1000)
plt.plot(simulated_data_ma2)

In [None]:
plot_acf(simulated_data_ma2, alpha=1, lags=20)
plt.title("MA(2) ACF")
plt.show()
plot_pacf(simulated_data_ma2, lags=20)
plt.title("MA(2) PACF")
plt.show()

In [None]:
#MA(3)
plt.subplot(2,1,1)
ar3 = np.array([1])
ma3 = np.array([1, 0.6, -0.3, 0.3])
MA_object3 = ArmaProcess(ar3, ma3)
simulated_data_ma3 = MA_object3.generate_sample(nsample=1000)
plt.plot(simulated_data_ma3)

In [None]:
plot_acf(simulated_data_ma3, alpha=1, lags=20)
plt.title("MA(3) ACF")
plt.show()
plot_pacf(simulated_data_ma3, lags=20)
plt.title("MA(3) PACF")
plt.show()