In [None]:
!pip install pandas numpy statsmodels matplotlib

import pandas as pd
import warnings
from statsmodels.tsa.stattools import adfuller
from statsmodels.tsa.arima.model import ARIMA
from itertools import product

# Suppress warnings
warnings.simplefilter(action='ignore', category=UserWarning)

# Load the Excel file
file_path = "C:\\Users\\shofi\\Documents\\TA\\dataTA.xlsx"  # Update with your path
df = pd.read_excel(file_path, sheet_name="data", parse_dates=["Date"])  
# Ensure "Date" column exists

# Display the data for confirmation
print("Loaded Data:\n")
print(df.head())

# Define a function to test stationarity
def test_stationarity(timeseries):
    result = adfuller(timeseries.dropna())  # Ensure no NaN values
    print("ADF Statistic:", result[0])
    print("p-value:", result[1])
    if result[1] <= 0.05:
        print("Data is stationary")
        return True
    else:
        print("Data is not stationary")
        return False

# Define function to find the best ARIMA model
def find_best_arima_model(data, p_range, d_range, q_range):
    best_aic = float("inf")
    best_model = None
    best_order = None

    # Test each combination of p, d, q
    for p, d, q in product(p_range, d_range, q_range):
        try:
            model = ARIMA(data, order=(p, d, q)).fit()
            aic = model.aic
            residuals = model.resid  # Residuals (errors) from the model
            sse = sum(residuals ** 2)  # Sum of Squared Errors

            # Output AIC and SSE for the current model
            print(f"ARIMA({p}, {d}, {q}) - AIC: {aic}, SSE: {sse}")

            # Keep track of the best model
            if aic < best_aic:
                best_aic = aic
                best_model = model
                best_order = (p, d, q)
        except:
            continue

    print("\nBest model:")
    print(f"ARIMA{best_order} - AIC: {best_aic}")
    return best_model, best_order

# Main processing loop: Iterate over all columns except 'Date'
for col in df.columns:
    if col != "Date":
        print(f"\nProcessing column: {col}")

        # Perform stationarity test
        data = df[col]
        is_stationary = test_stationarity(data)

        # Apply differencing if necessary
        if not is_stationary:
            data = data.diff().dropna()
            print(f"\nDifferenced data for {col}:")
            print(data.head())
            is_stationary = test_stationarity(data)

        # Define ranges for ARIMA parameters
        p_range = range(0, 4)  # p: 0, 1, 2, 3
        d_range = range(0, 3)  # d: 0, 1, 2
        q_range = range(0, 4)  # q: 0, 1, 2, 3

        # Find the best ARIMA model
        best_model, best_order = find_best_arima_model(data, p_range, d_range, q_range)

        # Make predictions if the best model is found
        if best_model:
            print(f"\nForecast for column {col}:")
            forecast = best_model.forecast(steps=5)  # Forecast 5 future steps
            print(forecast)

