In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

In [None]:
from statsmodels.tsa.stattools import adfuller
from statsmodels.graphics.tsaplots import plot_acf, plot_pacf
from statsmodels.regression.linear_model import OLS

In [None]:
def load_data(data_path):
    data = pd.read_csv(data_path)
    data["DATE"] = pd.to_datetime(data[["YEAR", "MONTH", "DAY"]])
    data.set_index("DATE", inplace=True)
    data.drop(columns=["YEAR", "MONTH", "DAY"], inplace=True)
    return data

In [None]:
TRAINING_DATA_PATH = "data/PSX/raw/train/data.csv"
TESTING_DATA_PATH = "data/PSX/raw/test/data.csv"

In [None]:
training_data = load_data(TRAINING_DATA_PATH)
training_data.info()

In [None]:
training_data.head()

In [None]:
plt.figure(figsize=(12, 6))
plt.plot(training_data["CLOSE"])
plt.title("STOCK PRICES (TRAINING - DAILY)")
plt.xlabel("DATE")
plt.ylabel("CLOSING PRICE")
plt.show()

In [None]:
weekly_training_data = training_data.resample("W").mean().dropna()
weekly_training_data.info()

In [None]:
weekly_training_data.head()

In [None]:
plt.figure(figsize=(12, 6))
plt.plot(weekly_training_data["CLOSE"])
plt.title("STOCK PRICES (TRAINING - WEEKLY)")
plt.xlabel("DATE")
plt.ylabel("CLOSING PRICE")
plt.show()

In [None]:
testing_data = load_data(TESTING_DATA_PATH)
testing_data.info()

In [None]:
testing_data.head()

In [None]:
plt.figure(figsize=(12, 6))
plt.plot(testing_data["CLOSE"])
plt.title("STOCK PRICES (TESTING - DAILY)")
plt.xlabel("DATE")
plt.ylabel("CLOSING PRICE")
plt.show()

In [None]:
weekly_testing_data = testing_data.resample("W").mean().dropna()
weekly_testing_data.info()

In [None]:
weekly_testing_data.head()

In [None]:
plt.figure(figsize=(12, 6))
plt.plot(weekly_testing_data["CLOSE"])
plt.title("STOCK PRICES (TESTING - WEEKLY)")
plt.xlabel("DATE")
plt.ylabel("CLOSING PRICE")
plt.show()

In [None]:
result = adfuller(training_data["CLOSE"])
print("ADF Statistic:", result[0])
print("p-value:", result[1])

In [None]:
THRESH = 0.01

X = training_data["CLOSE"].copy()
plt.plot(X)
while True:
    _, p_value, *_ = adfuller(X)
    if p_value > THRESH:
        X = np.log(X).diff().dropna()
    else:
        break

In [None]:
plt.plot(X)

In [None]:
class ARIMA:
    def __init__(self, data, p, d, q):
        self.data = data
        self.p = p
        self.d = d
        self.q = q

    def fit(self):
        diff_data = self.data
        for _ in range(self.d):
            diff_data = diff_data.diff().dropna()

        plot_acf(self.data, lags=31)
        plot_pacf(self.data, lags=31)

        X = np.column_stack([diff_data.shift(i) for i in range(1, self.p + 1)])
        X = X[self.p :]
        y = diff_data[self.p :]
        self.model_ar = OLS(y, X).fit()

        self.residuals = y - self.model_ar.predict(X)
        self.residuals = self.residuals[self.q :]

        X_residuals = np.column_stack(
            [self.residuals.shift(i) for i in range(1, self.q + 1)]
        )
        X_residuals = X_residuals[self.q :]
        y_residuals = self.residuals[self.q :]

        self.model_ma = OLS(y_residuals, X_residuals).fit()

    def predict(self, steps):
        predictions = []
        last_data = self.data[-self.p :]
        for _ in range(steps):
            ar_part = np.dot(self.model_ar.params, last_data[-self.p :])
            ma_part = np.dot(self.model_ma.params, self.residuals[-self.q :])
            prediction = ar_part + ma_part
            predictions.append(prediction)

            last_data = np.append(last_data, prediction)
            self.residuals = np.append(self.residuals, prediction - ar_part)

        return predictions

In [None]:
arima = ARIMA(training_data["CLOSE"], p=1, d=0, q=253)
arima.fit()

In [None]:
steps = 182
pred_dates = pd.date_range(
    start=testing_data.index[0],
    periods=steps,
    # freq="W-SUN",
)
pred_dates

In [None]:
predictions = arima.predict(steps=steps)
[float(p) for p in predictions]

In [None]:
predictions = pd.Series(predictions, index=pred_dates)
predictions

In [None]:
plt.figure(figsize=(12, 6))
plt.plot(training_data["CLOSE"], label="TRAINING DATA")
plt.plot(testing_data["CLOSE"][:steps], label="TESTING DATA", color="green")
plt.plot(predictions, label="PREDICTIONS", color="red")
plt.title("STOCK PRICES PREDICTIONS BY ARIMA (WEEKLY)")
plt.xlabel("DATE")
plt.ylabel("CLOSING PRICE")
plt.legend()
plt.show()

In [None]:
plt.figure(figsize=(12, 6))
plt.plot(testing_data["CLOSE"][:steps], label="TESTING DATA", color="green")
plt.plot(predictions, label="PREDICTIONS", color="red")
plt.title("STOCK PRICES PREDICTIONS BY ARIMA (WEEKLY)")
plt.xlabel("DATE")
plt.ylabel("CLOSING PRICE")
plt.legend()
plt.show()