In [None]:
import os
import pandas as pd
import numpy as np
import warnings
import ast
import statistics
from tqdm import tqdm
import matplotlib.pyplot as plt

# Import  helper classes
from data_preprocessor import DataPreprocessor
from arima_modeler import ARIMAModeler

In [None]:
# Set directories and parameters
directory_clean = r"D:\Thesis Project\Github Upload\Project 1\Data\Clean Data\All clean"
files = os.listdir(directory_clean)
parameters = ["ICP", "AMP", "RAP"]

In [None]:
# Initialize classes
dp = DataPreprocessor(directory_clean)
am = ARIMAModeler()

### Stationarity Check

In [None]:
# Stationarity check for original data
df_ADF_stationary = pd.DataFrame(index=range(len(files)), columns=parameters + ['Patient'])
df_KPSS_stationary = pd.DataFrame(index=range(len(files)), columns=parameters + ['Patient'])

for file_number, file in enumerate(tqdm(files, desc="Stationarity Check (Original)")):
    file_path = os.path.join(directory_clean, file)
    df = pd.read_csv(file_path)
    df = dp.remove_artifacts(df)
    
    adf_map, kpss_map = am.stationarity_check(df, parameters)
    
    for col in parameters:
        df_ADF_stationary.at[file_number, col] = adf_map[col]
        df_KPSS_stationary.at[file_number, col] = kpss_map[col]
    
    df_ADF_stationary.at[file_number, 'Patient'] = file[:7]
    df_KPSS_stationary.at[file_number, 'Patient'] = file[:7]

# Save results
df_ADF_stationary.to_csv("ADF_test_1min.csv", index=False)
df_KPSS_stationary.to_csv("KPSS_test_1min.csv", index=False)

In [None]:
# Stationarity check for first-order differenced data
df_ADF_diff = pd.DataFrame(index=range(len(files)), columns=parameters + ['Patient'])
df_KPSS_diff = pd.DataFrame(index=range(len(files)), columns=parameters + ['Patient'])

for file_number, file in enumerate(tqdm(files, desc="Stationarity Check (Diff)")):
    file_path = os.path.join(directory_clean, file)
    df = pd.read_csv(file_path)
    df = dp.remove_artifacts(df)
    
    first_order_diff = df.diff()
    adf_map, kpss_map = am.stationarity_check(first_order_diff, parameters)
    
    for col in parameters:
        df_ADF_diff.at[file_number, col] = adf_map[col]
        df_KPSS_diff.at[file_number, col] = kpss_map[col]
    
    df_ADF_diff.at[file_number, 'Patient'] = file[:7]
    df_KPSS_diff.at[file_number, 'Patient'] = file[:7]

# Save results
df_ADF_diff.to_csv("ADF_test_1min_diff.csv", index=False)
df_KPSS_diff.to_csv("KPSS_test_1min_diff.csv", index=False)

### ARIMA Modeling

In [None]:
# ARIMA Grid Search for all patients
"""
See the multiprocessing.ipynb file. 
"""

In [None]:
# Calculate Median Optimal ARIMA

directory = r"D:\Thesis Project\Github Upload\Project 1\Data\CSVs"
file = "ARIMA_optimal_merged.csv"

df_individual= pd.read_csv(os.path.join(directory, file))

for i in range(df_individual.shape[1]-1):
    
    p_lst = []
    q_lst = []
    
    df_col = df_individual.iloc[:, i]
    
    for patient in range(df_individual.shape[0]):
        df_col_lst = ast.literal_eval(df_col[patient])
        
        if  not df_col_lst[0] is None or  not df_col_lst[2] is None:
            p_lst.append(df_col_lst[0])
            q_lst.append(df_col_lst[2])
    
    median_p_lst = statistics.median(p_lst)
    median_q_lst = statistics.median(q_lst)
    
    print("Median Optimal model for ", df_individual.columns[i], " is p = ", median_p_lst, ", d = 1 and ",  "q = ", median_q_lst)

### Residuals Calculation

In [None]:
# 4. Residuals computation and plotting for all patients
residuals_all = {}

for file_number, file in enumerate(tqdm(files, desc="Residuals Calculation")):
    file_path = os.path.join(directory_clean, file)
    df = pd.read_csv(file_path)
    df = dp.remove_artifacts(df)
    
    patient_order = all_optimal_orders[file[:7]]
    residuals_map = am.calculate_residuals(df, parameters, patient_order)
    residuals_all[file[:7]] = residuals_map

In [None]:
# Plot ACF/PACF for RAP residuals (example)
patient = "TBI_013"
residuals_map = residuals_all[patient]
am.plot_residuals_ACF_PACF(residuals_map['RAP'], 'RAP', lags=40, alpha=0.05)