In [1]:
# Import libraries
import logging
import pandas as pd
import numpy as np
import warnings
import time
from statsmodels.tsa.arima.model import ARIMA
from sklearn.metrics import mean_absolute_error, mean_squared_error
from concurrent.futures import ThreadPoolExecutor

In [2]:
# Set up logging
logging.basicConfig(
    level=logging.INFO,
    format='%(asctime)s - %(levelname)s - %(message)s',
    handlers=[logging.StreamHandler()]
)

warnings.filterwarnings('ignore')

def price_forecast(series, order, name):
    """Fit ARIMA model and return MAE and BIC."""
    try:
        model = ARIMA(series, order=order).fit(method_kwargs={'warn_convergence': False})
        forecast_steps = min(30, len(series))  # Ensure we forecast at most 30 days
        predictions = model.forecast(steps=forecast_steps)
        actual = series[-forecast_steps:]
        mae = mean_absolute_error(actual, predictions)
        bic = model.bic

        # Log results
        logging.info(f"{name} - Order {order}: MAE = {mae:.4f}, BIC = {bic:.4f}")
        return {"mae": mae, "bic": bic}
    except Exception as e:
        logging.error(f"Error for {name} - Order {order}: {e}")
        return {"mae": None, "bic": None}

def predict_commodity(commodity, csv_path, pdq_low, pdq_high):
    """Process predictions for a given commodity."""
    logging.info(f"Processing: {commodity}")
    try:
        df = pd.read_csv(csv_path, index_col='Date', parse_dates=True)
        if 'Low' not in df or 'High' not in df:
            logging.error(f"Missing 'Low' or 'High' columns for {commodity}")
            return {}

        low_results = price_forecast(df['Low'], pdq_low, f"{commodity} Low")
        high_results = price_forecast(df['High'], pdq_high, f"{commodity} High")
        return {"commodity": commodity, "low": low_results, "high": high_results}

    except Exception as e:
        logging.error(f"Error processing {commodity}: {e}")
        return {}

def process_commodities(group, specific_commodities):
    """Process ARIMA modeling for a group of commodities."""
    results = []
    for specific_commodity, pdq_values in specific_commodities.items():
        pdq_low, pdq_high = tuple(pdq_values[1]), tuple(pdq_values[0])
        csv_path = f'../csv/{group}/{specific_commodity}.csv'
        result = predict_commodity(specific_commodity, csv_path, pdq_low, pdq_high)
        if result:
            results.append(result)
    return results

def process_all_groups(groups):
    """Process all commodity groups using parallel execution."""
    start_time = time.time()
    results = []
    with ThreadPoolExecutor() as executor:
        futures = [
            executor.submit(process_commodities, group, commodities)
            for group, commodities in groups.items()
        ]
        for future in futures:
            try:
                results.extend(future.result())
            except Exception as e:
                logging.error(f"Error processing group: {e}")
    end_time = time.time()
    logging.info(f"Completed all groups in {end_time - start_time:.2f} seconds.")
    return results

In [3]:
# Commodity group data with ARIMA orders
commodity_groups = {
    'rice': {
        'regular_milled_rice': [(25, 1, 0), (22, 0, 19)],
        'well_milled_rice': [(24, 0, 4), (27, 0, 2)],
        'premium_rice': [(6, 1, 10), (7, 1, 27)],
        'special_rice': [(0, 0, 1), (15, 1, 13)]
    },
    'meat': {
        'beef_brisket': [(28, 0, 33), (30, 1, 0)],
        'beef_rump': [(4, 1, 25), (27, 1, 1)],
        'whole_chicken': [(15, 1, 28), (1, 0, 0)],
        'pork_belly': [(45, 0, 37), (35, 0, 0)],
        'pork_kasim': [(39, 0, 0), (33, 0, 29)]
    },
    'fish': {
        'alumahan': [(28, 0, 33), (42, 1, 28)],
        'bangus': [(33, 1, 38), (33, 1, 45)],
        'galunggong': [(28, 0, 0), (28, 1, 0)],
        'tilapia': [(43, 0, 39), (29, 1, 0)]
    },
    'fruits': {
        'banana_lakatan': [(7, 0, 10), (15, 1, 17)],
        'calamansi': [(29, 0, 36), (45, 0, 32)],
        'mango': [(47, 2, 0), (49, 0, 0)],
        'papaya': [(31, 1, 34), (42, 1, 0)]
    },
    'vegetables': {
        'cabbage': [(49, 0, 0), (49, 0, 49)],
        'carrots': [(31, 0, 30), (40, 0, 0)],
        'eggplant': [(31, 1, 39), (35, 0, 0)],
        'tomato': [(0, 0, 15), (40, 0, 0)],
        'white_potato': [(47, 1, 32), (28, 2, 32)]
    },
    'spices': {
        'garlic': [(29, 1, 30), (31, 2, 0)],
        'red_onion': [(1, 1, 27), (5, 1, 11)]
    }
}

In [4]:
# Process all groups and output the results
mae_bic_results = process_all_groups(commodity_groups)
logging.info("All processing completed.")

2024-12-18 14:50:15,816 - INFO - Processing: regular_milled_rice
2024-12-18 14:50:15,826 - INFO - Processing: beef_brisket
2024-12-18 14:50:15,831 - INFO - Processing: alumahan
2024-12-18 14:50:15,868 - INFO - Processing: banana_lakatan
2024-12-18 14:50:15,877 - INFO - Processing: cabbage
2024-12-18 14:50:15,884 - INFO - Processing: garlic
2024-12-18 14:50:35,295 - INFO - beef_brisket Low - Order (30, 1, 0): MAE = 10.3623, BIC = 3940.9065
2024-12-18 14:50:46,148 - INFO - garlic Low - Order (31, 2, 0): MAE = 22.0614, BIC = 5642.3400
2024-12-18 14:51:23,238 - INFO - banana_lakatan Low - Order (15, 1, 17): MAE = 4.6331, BIC = 3338.8207
2024-12-18 14:51:44,213 - INFO - regular_milled_rice Low - Order (22, 0, 19): MAE = 0.3943, BIC = 2297.3365
2024-12-18 14:51:54,621 - INFO - banana_lakatan High - Order (7, 0, 10): MAE = 2.1272, BIC = 3359.0910
2024-12-18 14:51:54,624 - INFO - Processing: calamansi
2024-12-18 14:51:57,130 - INFO - regular_milled_rice High - Order (25, 1, 0): MAE = 1.3739, B

In [None]:
mae_bic_results