In [None]:
# Import libraries
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from statsmodels.tsa.statespace.sarimax import SARIMAX
from sklearn.metrics import mean_squared_error
import logging
import os
import time
import warnings

warnings.filterwarnings('ignore')

logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(message)s')

pd.set_option('display.max_rows', None)


def greedy_sarima(df, column, p_range=(25, 50), d_range=(0, 3), q_range=(25, 50), seasonal_range=(0, 2)):
    start_time = time.time()  # Start timing the function
    data = df[column].dropna()
    best_score, best_params = float('inf'), None

    # Search for best (p, d, q) parameters
    for p in range(*p_range):
        for q in range(*q_range):
            try:
                model = SARIMAX(data, order=(p, 0, q), seasonal_order=(0, 0, 0, 0)).fit(disp=False)
                predictions = model.fittedvalues
                mse = mean_squared_error(data[0:], predictions)
                if mse < best_score:
                    best_score, best_params = mse, (p, 0, q, 0, 0, 0)
            except Exception:
                continue

    best_p, best_d, best_q, _, _, _ = best_params
    # Search for best seasonal (P, D, Q, S) parameters
    for P in range(*seasonal_range):
        for Q in range(*seasonal_range):
            for S in [12]:  # Common seasonality period (e.g., monthly data with 12 periods)
                try:
                    model = SARIMAX(data, order=(best_p, best_d, best_q),
                                    seasonal_order=(P, 0, Q, S)).fit(disp=False)
                    predictions = model.fittedvalues
                    mse = mean_squared_error(data[best_d:], predictions[best_d:])
                    if mse < best_score:
                        best_score, best_params = mse, (best_p, best_d, best_q, P, 0, Q, S)
                except Exception:
                    continue

    end_time = time.time()
    logging.info(f"Best Parameters: {best_params} with MSE: {best_score}")
    logging.info(f"Runtime for greedy_sarima: {end_time - start_time:.2f} seconds")
    return best_params, best_score


def train_sarima_model(df, column, params):
    start_time = time.time()
    model = SARIMAX(df[column], order=params[:3], seasonal_order=params[3:]).fit(disp=False)
    end_time = time.time()
    logging.info(f"Runtime for train_sarima_model: {end_time - start_time:.2f} seconds")
    return model


def main():
    overall_start_time = time.time()

    for group_id, (group_name, commodities) in COMMODITY_GROUPS.items():
        logging.info(f"Processing commodity group: {group_name}")
        
        for commodity_id, (commodity_name, csv_path) in commodities.items():
            logging.info(f"Processing commodity: {commodity_name}")
            
            if not os.path.exists(csv_path):
                logging.error(f"File not found: {csv_path}")
                continue

            df = pd.read_csv(csv_path, index_col='Date', parse_dates=True)
            logging.info(f"Data loaded successfully for {commodity_name}.")

            for column in ["High", "Low"]:
                if column not in df.columns:
                    logging.warning(f"Column '{column}' not found in the dataset. Skipping analysis for this column.")
                    continue

                logging.info(f"Analyzing {column} prices for {commodity_name}.")
                params, mse = greedy_sarima(df, column)
                if params:
                    logging.info(f"Optimal parameters for {column} in {commodity_name}: {params} with MSE: {mse}")
                    model = train_sarima_model(df, column, params)
                else:
                    logging.warning(f"No valid parameters found for {column} in {commodity_name}.")

    overall_end_time = time.time()
    logging.info(f"Total runtime for the script: {overall_end_time - overall_start_time:.2f} seconds")


if __name__ == "__main__":    
    COMMODITY_GROUPS = {
        # 1: ("Rice", {
        #     1: ('Regular-Milled Rice', '../csv/rice/regular_milled_rice.csv'),
        #     2: ('Well-Milled Rice', '../csv/rice/well_milled_rice.csv'),
        #     3: ('Premium Rice', '../csv/rice/premium_rice.csv'),
        #     4: ('Special Rice', '../csv/rice/special_rice.csv'),
        # }),
        2: ("Meat", {
            1: ('Beef Brisket', '../csv/meat/beef_brisket.csv'),
            2: ('Beef Rump', '../csv/meat/beef_rump.csv'),
            3: ('Whole Chicken', '../csv/meat/whole_chicken.csv'),
            4: ('Pork Belly', '../csv/meat/pork_belly.csv'),
            5: ('Pork Ham', '../csv/meat/pork_kasim.csv'),
        }),
        3: ("Fish", {
            1: ('Alumahan', '../csv/fish/alumahan.csv'),
            2: ('Bangus', '../csv/fish/bangus.csv'),
            3: ('Galunggong', '../csv/fish/galunggong.csv'),
            4: ('Tilapia', '../csv/fish/tilapia.csv'),
        }),
        4: ("Fruits", {
            1: ('Banana (Lakatan)', '../csv/fruits/banana_lakatan.csv'),
            2: ('Calamansi', '../csv/fruits/calamansi.csv'),
            3: ('Mango', '../csv/fruits/mango.csv'),
            4: ('Papaya', '../csv/fruits/papaya.csv'),
        }),
        5: ("Vegetables", {
            1: ('Cabbage', '../csv/vegetables/cabbage.csv'),
            2: ('Carrots', '../csv/vegetables/carrots.csv'),
            3: ('Eggplant', '../csv/vegetables/eggplant.csv'),
            4: ('Tomato', '../csv/vegetables/tomato.csv'),
            5: ('Potato', '../csv/vegetables/white_potato.csv'),
        }),
        6: ("Spices", {
            1: ('Garlic', '../csv/spices/garlic.csv'),
            2: ('Red Onion', '../csv/spices/red_onion.csv'),
        })
    }
    
    main()

In [1]:
# Import libraries
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from statsmodels.tsa.arima.model import ARIMA
from sklearn.metrics import mean_squared_error
from sklearn.model_selection import train_test_split
import logging
import os
import time
import warnings

warnings.filterwarnings('ignore')

logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(message)s')

pd.set_option('display.max_rows', None)


def greedy_arima(df, column, p_range=(0, 28), d_range=(0, 3), q_range=(0, 28), test_size=0.2):
    start_time = time.time()  # Start timing the function
    data = df[column].dropna()

    # Train-test split
    train_data, test_data = train_test_split(data, test_size=test_size, shuffle=False)
    
    best_score, best_params = float('inf'), None

    # Grid search for best p, d, q
    for p in range(*p_range):
        try:
            model = ARIMA(train_data, order=(p, 0, 0)).fit()
            predictions = model.predict(start=len(train_data), end=len(train_data) + len(test_data) - 1)
            mse = mean_squared_error(test_data, predictions)
            if mse < best_score:
                best_score, best_params = mse, (p, 0, 0)
        except Exception:
            continue

    best_p, _, best_q = best_params
    for d in range(*d_range):
        try:
            model = ARIMA(train_data, order=(best_p, d, best_q)).fit()
            predictions = model.predict(start=len(train_data), end=len(train_data) + len(test_data) - 1)
            mse = mean_squared_error(test_data, predictions)
            if mse < best_score:
                best_score, best_params = mse, (best_p, d, best_q)
        except Exception:
            continue

    best_p, best_d, _ = best_params
    for q in range(*q_range):
        try:
            model = ARIMA(train_data, order=(best_p, best_d, q)).fit()
            predictions = model.predict(start=len(train_data), end=len(train_data) + len(test_data) - 1)
            mse = mean_squared_error(test_data, predictions)
            if mse < best_score:
                best_score, best_params = mse, (best_p, best_d, q)
        except Exception:
            continue

    end_time = time.time()
    logging.info(f"Best Parameters: {best_params} with MSE: {best_score}")
    logging.info(f"Runtime for greedy_arima: {end_time - start_time:.2f} seconds")
    return best_params, best_score


def train_arima_model(df, column, params):
    start_time = time.time()
    model = ARIMA(df[column], order=params).fit()
    end_time = time.time()
    logging.info(f"Runtime for train_arima_model: {end_time - start_time:.2f} seconds")
    return model


def main():
    overall_start_time = time.time()

    for group_id, (group_name, commodities) in COMMODITY_GROUPS.items():
        logging.info(f"Processing commodity group: {group_name}")
        
        for commodity_id, (commodity_name, csv_path) in commodities.items():
            logging.info(f"Processing commodity: {commodity_name}")
            
            if not os.path.exists(csv_path):
                logging.error(f"File not found: {csv_path}")
                continue

            df = pd.read_csv(csv_path, index_col='Date', parse_dates=True)
            logging.info(f"Data loaded successfully for {commodity_name}.")

            for column in ["High", "Low"]:
                if column not in df.columns:
                    logging.warning(f"Column '{column}' not found in the dataset. Skipping analysis for this column.")
                    continue

                logging.info(f"Analyzing {column} prices for {commodity_name}.")
                params, mse = greedy_arima(df, column)
                if params:
                    logging.info(f"Optimal parameters for {column} in {commodity_name}: {params} with MSE: {mse}")
                    model = train_arima_model(df, column, params)
                else:
                    logging.warning(f"No valid parameters found for {column} in {commodity_name}.")

    overall_end_time = time.time()
    logging.info(f"Total runtime for the script: {overall_end_time - overall_start_time:.2f} seconds")


if __name__ == "__main__":    
    COMMODITY_GROUPS = {
        # 1: ("Rice", {
        #     1: ('Regular-Milled Rice', '../csv/rice/regular_milled_rice.csv'),
        #     2: ('Well-Milled Rice', '../csv/rice/well_milled_rice.csv'),
        #     3: ('Premium Rice', '../csv/rice/premium_rice.csv'),
        #     4: ('Special Rice', '../csv/rice/special_rice.csv'),
        # }),
        # 2: ("Meat", {
            # 1: ('Beef Brisket', '../csv/meat/beef_brisket.csv'),
            # 2: ('Beef Rump', '../csv/meat/beef_rump.csv'),
            # 3: ('Whole Chicken', '../csv/meat/whole_chicken.csv'),
            # 4: ('Pork Belly', '../csv/meat/pork_belly.csv'),
            # 5: ('Pork Ham', '../csv/meat/pork_kasim.csv'),
        # }),
        3: ("Fish", {
            1: ('Alumahan', '../csv/fish/alumahan.csv'),
            2: ('Bangus', '../csv/fish/bangus.csv'),
            3: ('Galunggong', '../csv/fish/galunggong.csv'),
            4: ('Tilapia', '../csv/fish/tilapia.csv'),
        }),
        4: ("Fruits", {
            # 1: ('Banana (Lakatan)', '../csv/fruits/banana_lakatan.csv'),
            2: ('Calamansi', '../csv/fruits/calamansi.csv'),
            3: ('Mango', '../csv/fruits/mango.csv'),
            4: ('Papaya', '../csv/fruits/papaya.csv'),
        }),
        5: ("Vegetables", {
            1: ('Cabbage', '../csv/vegetables/cabbage.csv'),
            2: ('Carrots', '../csv/vegetables/carrots.csv'),
            3: ('Eggplant', '../csv/vegetables/eggplant.csv'),
            4: ('Tomato', '../csv/vegetables/tomato.csv'),
            5: ('Potato', '../csv/vegetables/white_potato.csv'),
        }),
        6: ("Spices", {
            1: ('Garlic', '../csv/spices/garlic.csv'),
            # 2: ('Red Onion', '../csv/spices/red_onion.csv'),
        })
    }
    
    main()

2024-12-16 00:34:05,331 - Processing commodity group: Fish
2024-12-16 00:34:05,332 - Processing commodity: Bangus
2024-12-16 00:34:05,363 - Data loaded successfully for Bangus.
2024-12-16 00:34:05,363 - Analyzing High prices for Bangus.
2024-12-16 00:36:47,562 - Best Parameters: (16, 1, 13) with MSE: 97.46040012432361
2024-12-16 00:36:47,563 - Runtime for greedy_arima: 162.20 seconds
2024-12-16 00:36:47,570 - Optimal parameters for High in Bangus: (16, 1, 13) with MSE: 97.46040012432361
2024-12-16 00:36:51,275 - Runtime for train_arima_model: 3.70 seconds
2024-12-16 00:36:51,276 - Analyzing Low prices for Bangus.
2024-12-16 00:39:19,427 - Best Parameters: (14, 1, 20) with MSE: 120.47269309700394
2024-12-16 00:39:19,427 - Runtime for greedy_arima: 148.15 seconds
2024-12-16 00:39:19,435 - Optimal parameters for Low in Bangus: (14, 1, 20) with MSE: 120.47269309700394
2024-12-16 00:39:25,180 - Runtime for train_arima_model: 5.74 seconds
2024-12-16 00:39:25,182 - Processing commodity: Galun