In [None]:
from sklearn.model_selection import GridSearchCV
from sklearn.metrics import mean_absolute_error
from statsmodels.tsa.arima.model import ARIMA
from sklearn.base import BaseEstimator
import pandas as pd
import logging
import time

# Set up logging
logging.basicConfig(
    level=logging.INFO,
    format='%(asctime)s - %(levelname)s - %(message)s',
    handlers=[logging.StreamHandler()]
)

# ARIMA Wrapper for GridSearchCV
class ARIMAWrapper(BaseEstimator):
    def __init__(self, p=1, d=1, q=1):
        self.p = p
        self.d = d
        self.q = q
        self.model = None

    def fit(self, series):
        """Fit the ARIMA model."""
        order = (self.p, self.d, self.q)
        self.model = ARIMA(series, order=order).fit(method_kwargs={'warn_convergence': False})
        return self

    def forecast(self, steps):
        """Forecast the next 'steps' number of points."""
        if self.model is None:
            raise ValueError("Model must be fitted before making predictions.")
        return self.model.forecast(steps=steps)

    def score(self, y_true, y_pred):
        """Return the negative MAE for GridSearchCV."""
        return -mean_absolute_error(y_true, y_pred)

# Function to perform grid search on ARIMA parameters
def price_forecast_with_gridsearch(series, p_range, d_range, q_range):
    """Performs GridSearchCV to find the best ARIMA model."""
    # Create parameter grid for ARIMA (p, d, q) values
    param_grid = {
        'p': p_range,
        'd': d_range,
        'q': q_range
    }

    # Create ARIMAWrapper instance for GridSearchCV
    arima_model = ARIMAWrapper()

    # Setup GridSearchCV
    grid_search = GridSearchCV(
        estimator=arima_model,
        param_grid=param_grid,
        scoring='neg_mean_absolute_error',  # We want to minimize MAE
        cv=3,  # 3-fold cross-validation
        verbose=1
    )

    # Perform GridSearchCV
    grid_search.fit(series.values.reshape(-1, 1))  # Reshape for GridSearchCV

    # Best parameters and score
    best_params = grid_search.best_params_
    best_mae = -grid_search.best_score_  # Convert to positive MAE
    logging.info(f"Best ARIMA parameters: {best_params} with MAE: {best_mae}")
    
    return best_params, best_mae

def predict_commodity_with_gridsearch(commodity, csv_path, p_range, d_range, q_range):
    """Process predictions for a given commodity using GridSearch."""
    logging.info(f"Processing: {commodity}")
    try:
        df = pd.read_csv(csv_path, index_col='Date', parse_dates=True)
        if 'Low' not in df or 'High' not in df:
            logging.error(f"Missing 'Low' or 'High' columns for {commodity}")
            return {}

        # Perform grid search for Low and High prices
        low_best_params, low_best_mae = price_forecast_with_gridsearch(df['Low'], p_range, d_range, q_range)
        high_best_params, high_best_mae = price_forecast_with_gridsearch(df['High'], p_range, d_range, q_range)
        
        return {
            "commodity": commodity,
            "low": {"params": low_best_params, "mae": low_best_mae},
            "high": {"params": high_best_params, "mae": high_best_mae}
        }

    except Exception as e:
        logging.error(f"Error processing {commodity}: {e}")
        return {}

def process_commodities_with_gridsearch(group, specific_commodities, p_range, d_range, q_range):
    """Process ARIMA modeling with GridSearch for a group of commodities."""
    results = []
    for specific_commodity, pdq_values in specific_commodities.items():
        csv_path = f'../csv/{group}/{specific_commodity}.csv'
        result = predict_commodity_with_gridsearch(specific_commodity, csv_path, p_range, d_range, q_range)
        if result:
            results.append(result)
    return results

def main():
    # Define ranges for ARIMA parameters
    p_range = range(0, 30)  # p range from 0 to 30
    d_range = range(0, 3)   # d range from 0 to 3
    q_range = range(0, 30)  # q range from 0 to 30

    # Example of processing commodity groups
    COMMODITY_GROUPS = {
        "Meat": {
            'Whole Chicken': '../csv/meat/whole_chicken.csv',
        },
        "Fruits": {            
            'Mango': '../csv/fruits/mango.csv',
        },
        "Vegetables": {
            'Tomato': '../csv/vegetables/tomato.csv',
        },
        "Spices": {
            'Garlic': '../csv/spices/garlic.csv',
        }
    }

    # Process the commodities and print the results
    results = []
    for group, commodities in COMMODITY_GROUPS.items():
        for specific_commodity, csv_path in commodities.items():
            logging.info(f"Processing {specific_commodity} from {group}")
            results.append(predict_commodity_with_gridsearch(specific_commodity, csv_path, p_range, d_range, q_range))
    
    logging.info(f"Results: {results}")

if __name__ == "__main__":
    main()

2024-12-18 22:16:07,428 - Processing Whole Chicken from Meat
2024-12-18 22:16:07,430 - Processing: Whole Chicken


Fitting 3 folds for each of 2700 candidates, totalling 8100 fits
