<h1> <center> METRO MANILA: COMMODITY PRICE CHECKER </center> </h1>

<h1 style="background-color: #3cb371; padding: 20px;"> <center> <span style="color:white">
<br> INSTALLATION
</span> </center> </h1>

In [None]:
pip install pmdarima

<h1 style="background-color: #00427c; padding: 20px;"> <center> <span style="color:white">
<br> CODE
</span> </center> </h1>

In [None]:
# Import libraries
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from statsmodels.tsa.arima.model import ARIMA
from sklearn.metrics import mean_squared_error
import itertools
import logging
import os
import time  # Import for runtime measurement

import warnings
warnings.filterwarnings('ignore')

# Configure logging
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(message)s')

pd.set_option('display.max_rows', None)

# Helper function: Prompt user input
def prompt_choice(prompt, options):
    print(prompt)
    for key, value in options.items():
        print(f"[{key}] {value}")
    while True:
        try:
            choice = int(input("Choose an option: "))
            if choice in options:
                return options[choice]
            else:
                print("Invalid choice. Please try again.")
        except ValueError:
            print("Invalid input. Please enter a number.")

# Define commodity mappings
COMMODITY_GROUPS = {
    1: ("Rice", {
        1: ('Regular-Milled Rice', '../csv/rice/regular_milled_rice.csv'),
        2: ('Well-Milled Rice', '../csv/rice/well_milled_rice.csv'),
        3: ('Premium Rice', '../csv/rice/premium_rice.csv'),
        4: ('Special Rice', '../csv/rice/special_rice.csv'),
    }),
    2: ("Meat", {
        1: ('Beef Brisket', '../csv/meat/beef_brisket.csv'),
        2: ('Beef Rump', '../csv/meat/beef_rump.csv'),
        3: ('Whole Chicken', '../csv/meat/whole_chicken.csv'),
        4: ('Pork Belly', '../csv/meat/pork_belly.csv'),
        5: ('Pork Ham', '../csv/meat/pork_kasim.csv'),
    }),
    3: ("Fish", {
        1: ('Alumahan', '../csv/fish/alumahan.csv'),
        2: ('Bangus', '../csv/fish/bangus.csv'),
        3: ('Galunggong', '../csv/fish/galunggong.csv'),
        4: ('Tilapia', '../csv/fish/tilapia.csv'),
    }),
    4: ("Fruits", {
        1: ('Banana (Lakatan)', '../csv/fruits/banana_lakatan.csv'),
        2: ('Calamansi', '../csv/fruits/calamansi.csv'),
        3: ('Mango', '../csv/fruits/mango.csv'),
        4: ('Papaya', '../csv/fruits/papaya.csv'),
    }),
    5: ("Vegetables", {
        1: ('Cabbage', '../csv/vegetables/cabbage.csv'),
        2: ('Carrots', '../csv/vegetables/carrots.csv'),
        3: ('Eggplant', '../csv/vegetables/eggplant.csv'),
        4: ('Tomato', '../csv/vegetables/tomato.csv'),
        5: ('Potato', '../csv/vegetables/white_potato.csv'),
    }),
    6: ("Spices", {
        1: ('Garlic', '../csv/spices/garlic.csv'),
        2: ('Red Onion', '../csv/spices/onion.csv'),
    })
}

# Grid search for ARIMA parameters
def grid_search_arima(df, column, p_range=(0, 10), d_range=(0, 3), q_range=(0, 10)):
    start_time = time.time()  # Start timing the grid search
    data = df[column].dropna()
    best_score, best_params = float('inf'), None

    for p, d, q in itertools.product(range(*p_range), range(*d_range), range(*q_range)):
        try:
            model = ARIMA(data, order=(p, d, q)).fit()
            predictions = model.fittedvalues
            mse = mean_squared_error(data[d:], predictions[d:])
            if mse < best_score:
                best_score, best_params = mse, (p, d, q)
        except Exception:
            continue

    end_time = time.time()  # End timing the grid search
    logging.info(f"Grid Search Runtime: {end_time - start_time:.2f} seconds")
    logging.info(f"Best Parameters: {best_params} with MSE: {best_score}")
    return best_params, best_score

# Train ARIMA model
def train_arima_model(df, column, params):
    start_time = time.time()  # Start timing the training process
    model = ARIMA(df[column], order=params).fit()
    end_time = time.time()  # End timing the training process
    logging.info(f"Training Runtime: {end_time - start_time:.2f} seconds")
    logging.info(f"Model Summary: \n{model.summary()}")
    return model

# Main function
def main():
    overall_start_time = time.time()  # Start timing the entire script

    # Choose commodity group
    group_choice = prompt_choice("Select a commodity group:", {k: v[0] for k, v in COMMODITY_GROUPS.items()})
    if not group_choice:
        logging.error("Invalid commodity group selected.")
        return

    # Map back to the correct key from the selected group
    commodity_group_key = next((k for k, v in COMMODITY_GROUPS.items() if v[0] == group_choice), None)
    if commodity_group_key is None:
        logging.error("Invalid commodity group mapping.")
        return

    # Choose specific commodity
    commodity_details = prompt_choice("Select a specific commodity:", COMMODITY_GROUPS[commodity_group_key][1])
    if not commodity_details:
        logging.error("Invalid commodity selected.")
        return

    commodity, csv_path = commodity_details
    logging.info(f"You have selected {commodity}.")

    # Check if file exists
    if not os.path.exists(csv_path):
        logging.error(f"File not found: {csv_path}")
        return

    # Load data
    df = pd.read_csv(csv_path, index_col='Date', parse_dates=True)
    logging.info("Data loaded successfully.")

    # Check if required columns exist
    for column in ["High", "Low"]:
        if column not in df.columns:
            logging.warning(f"Column '{column}' not found in the dataset. Skipping analysis for this column.")
            continue

        logging.info(f"Analyzing {column} prices.")
        params, mse = grid_search_arima(df, column)
        if params:
            logging.info(f"Optimal parameters for {column}: {params} with MSE: {mse}")
        else:
            logging.warning(f"No valid parameters found for {column}.")

    overall_end_time = time.time()  # End timing the entire script
    logging.info(f"Total Runtime: {overall_end_time - overall_start_time:.2f} seconds")

# Run the application
if __name__ == "__main__":
    main()

<h1 style="background-color: #00427c; padding: 20px;"> <center> <span style="color:white">
<br> PSEUDOCODE
</span> </center> </h1>

1. IMPORT required libraries for data handling, visualization, machine learning, and logging.

2. CONFIGURE logging for structured debug and status messages.

3. DEFINE a function `prompt_choice`:
   - Display a prompt and a list of options.
   - Continuously prompt the user for input until a valid option is chosen.
   - RETURN the selected option.

4. DEFINE a dictionary `COMMODITY_GROUPS`:
   - Map commodity groups to specific commodities and their respective file paths.

5. DEFINE function `greedy_arima`:
   - INPUT: DataFrame, column name, and ranges for ARIMA parameters (p, d, q).
   - Initialize `best_score` to infinity and `best_params` to None.

   - STEP 1: Greedily optimize p (AR term):
     a. FOR each p in the range:
        - Attempt to fit an ARIMA model with (p, 0, 0).
        - Calculate MSE between predictions and actual data.
        - IF MSE improves the best score, update `best_score` and `best_params`.

   - STEP 2: Optimize d (Differencing term):
     a. Use the best p from STEP 1.
     b. FOR each d in the range:
        - Fit an ARIMA model with (best_p, d, 0).
        - Calculate MSE and update `best_score` and `best_params` if MSE improves.

   - STEP 3: Optimize q (MA term):
     a. Use the best p and d from previous steps.
     b. FOR each q in the range:
        - Fit an ARIMA model with (best_p, best_d, q).
        - Calculate MSE and update `best_score` and `best_params` if MSE improves.

   - RETURN `best_params` and `best_score`.

6. DEFINE function `train_arima_model`:
   - INPUT: DataFrame, column name, ARIMA parameters.
   - Train the ARIMA model with the provided parameters.
   - RETURN the trained model.

7. DEFINE `main` function:
   - PROMPT user to select a commodity group using `prompt_choice`.
   - MAP the selected group to its corresponding key in `COMMODITY_GROUPS`.

   - PROMPT user to select a specific commodity.
   - IF file for the selected commodity does not exist, log an error and exit.

   - LOAD the dataset and check for required columns ("High" and "Low").
   - FOR each column:
     - APPLY the `greedy_arima` function to identify optimal parameters.
     - IF valid parameters are found:
       - Train and log the ARIMA model using the identified parameters.

8. EXECUTE the `main` function if the script is run directly.

<h1 style="background-color: #00427c; padding: 20px;"> <center> <span style="color:white">
<br> CODE WITH GREEDY ALGORITHM
</span> </center> </h1>

In [None]:
# Import libraries
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from statsmodels.tsa.arima.model import ARIMA
from sklearn.metrics import mean_squared_error
import itertools
import logging
import os
import time
import warnings

warnings.filterwarnings('ignore')

logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(message)s')

pd.set_option('display.max_rows', None)

def prompt_choice(prompt, options):
    print(prompt)
    for key, value in options.items():
        print(f"[{key}] {value}")
    while True:
        try:
            choice = int(input("Choose an option: "))
            if choice in options:
                return options[choice]
            else:
                print("Invalid choice. Please try again.")
        except ValueError:
            print("Invalid input. Please enter a number.")


def greedy_arima(df, column, p_range=(0, 28), d_range=(0, 3), q_range=(0, 28)):
    start_time = time.time()  # Start timing the function
    data = df[column].dropna()
    best_score, best_params = float('inf'), None

    for p in range(*p_range):
        try:
            model = ARIMA(data, order=(p, 0, 0)).fit()
            predictions = model.fittedvalues
            mse = mean_squared_error(data[0:], predictions)
            if mse < best_score:
                best_score, best_params = mse, (p, 0, 0)
        except Exception:
            continue

    best_p, _, best_q = best_params
    for d in range(*d_range):
        try:
            model = ARIMA(data, order=(best_p, d, best_q)).fit()
            predictions = model.fittedvalues
            mse = mean_squared_error(data[d:], predictions[d:])
            if mse < best_score:
                best_score, best_params = mse, (best_p, d, best_q)
        except Exception:
            continue

    best_p, best_d, _ = best_params
    for q in range(*q_range):
        try:
            model = ARIMA(data, order=(best_p, best_d, q)).fit()
            predictions = model.fittedvalues
            mse = mean_squared_error(data[q:], predictions[q:])
            if mse < best_score:
                best_score, best_params = mse, (best_p, best_d, q)
        except Exception:
            continue

    end_time = time.time()
    logging.info(f"Best Parameters: {best_params} with MSE: {best_score}")
    logging.info(f"Runtime for greedy_arima: {end_time - start_time:.2f} seconds")
    return best_params, best_score


def train_arima_model(df, column, params):
    start_time = time.time()
    model = ARIMA(df[column], order=params).fit()
    end_time = time.time()
    logging.info(f"Runtime for train_arima_model: {end_time - start_time:.2f} seconds")
    return model


def main():
    overall_start_time = time.time()

    group_choice = prompt_choice("Select a commodity group:", {k: v[0] for k, v in COMMODITY_GROUPS.items()})
    if not group_choice:
        logging.error("Invalid commodity group selected.")
        return

    commodity_group_key = next((k for k, v in COMMODITY_GROUPS.items() if v[0] == group_choice), None)
    if commodity_group_key is None:
        logging.error("Invalid commodity group mapping.")
        return

    commodity_details = prompt_choice("Select a specific commodity:", COMMODITY_GROUPS[commodity_group_key][1])
    if not commodity_details:
        logging.error("Invalid commodity selected.")
        return

    commodity, csv_path = commodity_details
    logging.info(f"You have selected {commodity}.")

    if not os.path.exists(csv_path):
        logging.error(f"File not found: {csv_path}")
        return

    df = pd.read_csv(csv_path, index_col='Date', parse_dates=True)
    logging.info("Data loaded successfully.")

    for column in ["High", "Low"]:
        if column not in df.columns:
            logging.warning(f"Column '{column}' not found in the dataset. Skipping analysis for this column.")
            continue

        logging.info(f"Analyzing {column} prices.")
        params, mse = greedy_arima(df, column)
        if params:
            logging.info(f"Optimal parameters for {column}: {params} with MSE: {mse}")
            model = train_arima_model(df, column, params)
        else:
            logging.warning(f"No valid parameters found for {column}.")

    overall_end_time = time.time()
    logging.info(f"Total runtime for the script: {overall_end_time - overall_start_time:.2f} seconds")


if __name__ == "__main__":    
    COMMODITY_GROUPS = {
        1: ("Rice", {
            1: ('Regular-Milled Rice', '../csv/rice/regular_milled_rice.csv'),
            2: ('Well-Milled Rice', '../csv/rice/well_milled_rice.csv'),
            3: ('Premium Rice', '../csv/rice/premium_rice.csv'),
            4: ('Special Rice', '../csv/rice/special_rice.csv'),
        }),
        2: ("Meat", {
            1: ('Beef Brisket', '../csv/meat/beef_brisket.csv'),
            2: ('Beef Rump', '../csv/meat/beef_rump.csv'),
            3: ('Whole Chicken', '../csv/meat/whole_chicken.csv'),
            4: ('Pork Belly', '../csv/meat/pork_belly.csv'),
            5: ('Pork Ham', '../csv/meat/pork_kasim.csv'),
        }),
        3: ("Fish", {
            1: ('Alumahan', '../csv/fish/alumahan.csv'),
            2: ('Bangus', '../csv/fish/bangus.csv'),
            3: ('Galunggong', '../csv/fish/galunggong.csv'),
            4: ('Tilapia', '../csv/fish/tilapia.csv'),
        }),
        4: ("Fruits", {
            1: ('Banana (Lakatan)', '../csv/fruits/banana_lakatan.csv'),
            2: ('Calamansi', '../csv/fruits/calamansi.csv'),
            3: ('Mango', '../csv/fruits/mango.csv'),
            4: ('Papaya', '../csv/fruits/papaya.csv'),
        }),
        5: ("Vegetables", {
            1: ('Cabbage', '../csv/vegetables/cabbage.csv'),
            2: ('Carrots', '../csv/vegetables/carrots.csv'),
            3: ('Eggplant', '../csv/vegetables/eggplant.csv'),
            4: ('Tomato', '../csv/vegetables/tomato.csv'),
            5: ('Potato', '../csv/vegetables/white_potato.csv'),
        }),
        6: ("Spices", {
            1: ('Garlic', '../csv/spices/garlic.csv'),
            2: ('Red Onion', '../csv/spices/onion.csv'),
        })
    }
    
    main()

<h1 style="background-color: #00427c; padding: 20px;"> <center> <span style="color:white">
<br> CODE WITHOUT USER PROMPT
</span> </center> </h1>

In [None]:
# Import libraries
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from statsmodels.tsa.arima.model import ARIMA
from sklearn.metrics import mean_squared_error
import logging
import os
import time
import warnings

warnings.filterwarnings('ignore')

logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(message)s')

pd.set_option('display.max_rows', None)


def greedy_arima(df, column, p_range=(0, 28), d_range=(0, 3), q_range=(0, 28)):
    start_time = time.time()  # Start timing the function
    data = df[column].dropna()
    best_score, best_params = float('inf'), None

    for p in range(*p_range):
        try:
            model = ARIMA(data, order=(p, 0, 0)).fit()
            predictions = model.fittedvalues
            mse = mean_squared_error(data[0:], predictions)
            if mse < best_score:
                best_score, best_params = mse, (p, 0, 0)
        except Exception:
            continue

    best_p, _, best_q = best_params
    for d in range(*d_range):
        try:
            model = ARIMA(data, order=(best_p, d, best_q)).fit()
            predictions = model.fittedvalues
            mse = mean_squared_error(data[d:], predictions[d:])
            if mse < best_score:
                best_score, best_params = mse, (best_p, d, best_q)
        except Exception:
            continue

    best_p, best_d, _ = best_params
    for q in range(*q_range):
        try:
            model = ARIMA(data, order=(best_p, best_d, q)).fit()
            predictions = model.fittedvalues
            mse = mean_squared_error(data[q:], predictions[q:])
            if mse < best_score:
                best_score, best_params = mse, (best_p, best_d, q)
        except Exception:
            continue

    end_time = time.time()
    logging.info(f"Best Parameters: {best_params} with MSE: {best_score}")
    logging.info(f"Runtime for greedy_arima: {end_time - start_time:.2f} seconds")
    return best_params, best_score


def train_arima_model(df, column, params):
    start_time = time.time()
    model = ARIMA(df[column], order=params).fit()
    end_time = time.time()
    logging.info(f"Runtime for train_arima_model: {end_time - start_time:.2f} seconds")
    return model


def main():
    overall_start_time = time.time()

    for group_id, (group_name, commodities) in COMMODITY_GROUPS.items():
        logging.info(f"Processing commodity group: {group_name}")
        
        for commodity_id, (commodity_name, csv_path) in commodities.items():
            logging.info(f"Processing commodity: {commodity_name}")
            
            if not os.path.exists(csv_path):
                logging.error(f"File not found: {csv_path}")
                continue

            df = pd.read_csv(csv_path, index_col='Date', parse_dates=True)
            logging.info(f"Data loaded successfully for {commodity_name}.")

            for column in ["High", "Low"]:
                if column not in df.columns:
                    logging.warning(f"Column '{column}' not found in the dataset. Skipping analysis for this column.")
                    continue

                logging.info(f"Analyzing {column} prices for {commodity_name}.")
                params, mse = greedy_arima(df, column)
                if params:
                    logging.info(f"Optimal parameters for {column} in {commodity_name}: {params} with MSE: {mse}")
                    model = train_arima_model(df, column, params)
                else:
                    logging.warning(f"No valid parameters found for {column} in {commodity_name}.")

    overall_end_time = time.time()
    logging.info(f"Total runtime for the script: {overall_end_time - overall_start_time:.2f} seconds")


if __name__ == "__main__":    
    COMMODITY_GROUPS = {
        1: ("Rice", {
            1: ('Regular-Milled Rice', '../csv/rice/regular_milled_rice.csv'),
            2: ('Well-Milled Rice', '../csv/rice/well_milled_rice.csv'),
            3: ('Premium Rice', '../csv/rice/premium_rice.csv'),
            4: ('Special Rice', '../csv/rice/special_rice.csv'),
        }),
        2: ("Meat", {
            1: ('Beef Brisket', '../csv/meat/beef_brisket.csv'),
            2: ('Beef Rump', '../csv/meat/beef_rump.csv'),
            3: ('Whole Chicken', '../csv/meat/whole_chicken.csv'),
            4: ('Pork Belly', '../csv/meat/pork_belly.csv'),
            5: ('Pork Ham', '../csv/meat/pork_kasim.csv'),
        }),
        3: ("Fish", {
            1: ('Alumahan', '../csv/fish/alumahan.csv'),
            2: ('Bangus', '../csv/fish/bangus.csv'),
            3: ('Galunggong', '../csv/fish/galunggong.csv'),
            4: ('Tilapia', '../csv/fish/tilapia.csv'),
        }),
        4: ("Fruits", {
            1: ('Banana (Lakatan)', '../csv/fruits/banana_lakatan.csv'),
            2: ('Calamansi', '../csv/fruits/calamansi.csv'),
            3: ('Mango', '../csv/fruits/mango.csv'),
            4: ('Papaya', '../csv/fruits/papaya.csv'),
        }),
        5: ("Vegetables", {
            1: ('Cabbage', '../csv/vegetables/cabbage.csv'),
            2: ('Carrots', '../csv/vegetables/carrots.csv'),
            3: ('Eggplant', '../csv/vegetables/eggplant.csv'),
            4: ('Tomato', '../csv/vegetables/tomato.csv'),
            5: ('Potato', '../csv/vegetables/white_potato.csv'),
        }),
        6: ("Spices", {
            1: ('Garlic', '../csv/spices/garlic.csv'),
            2: ('Red Onion', '../csv/spices/red_onion.csv'),
        })
    }
    
    main()

In [1]:
# Import libraries
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from statsmodels.tsa.arima.model import ARIMA
from sklearn.metrics import mean_squared_error
from sklearn.model_selection import train_test_split
import logging
import os
import time
import warnings

warnings.filterwarnings('ignore')

logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(message)s')

pd.set_option('display.max_rows', None)


def greedy_arima(df, column, p_range=(0, 28), d_range=(0, 3), q_range=(0, 28), test_size=0.2):
    start_time = time.time()  # Start timing the function
    data = df[column].dropna()

    # Train-test split
    train_data, test_data = train_test_split(data, test_size=test_size, shuffle=False)
    
    best_score, best_params = float('inf'), None

    # Grid search for best p, d, q
    for p in range(*p_range):
        try:
            model = ARIMA(train_data, order=(p, 0, 0)).fit()
            predictions = model.predict(start=len(train_data), end=len(train_data) + len(test_data) - 1)
            mse = mean_squared_error(test_data, predictions)
            if mse < best_score:
                best_score, best_params = mse, (p, 0, 0)
        except Exception:
            continue

    best_p, _, best_q = best_params
    for d in range(*d_range):
        try:
            model = ARIMA(train_data, order=(best_p, d, best_q)).fit()
            predictions = model.predict(start=len(train_data), end=len(train_data) + len(test_data) - 1)
            mse = mean_squared_error(test_data, predictions)
            if mse < best_score:
                best_score, best_params = mse, (best_p, d, best_q)
        except Exception:
            continue

    best_p, best_d, _ = best_params
    for q in range(*q_range):
        try:
            model = ARIMA(train_data, order=(best_p, best_d, q)).fit()
            predictions = model.predict(start=len(train_data), end=len(train_data) + len(test_data) - 1)
            mse = mean_squared_error(test_data, predictions)
            if mse < best_score:
                best_score, best_params = mse, (best_p, best_d, q)
        except Exception:
            continue

    end_time = time.time()
    logging.info(f"Best Parameters: {best_params} with MSE: {best_score}")
    logging.info(f"Runtime for greedy_arima: {end_time - start_time:.2f} seconds")
    return best_params, best_score


def train_arima_model(df, column, params):
    start_time = time.time()
    model = ARIMA(df[column], order=params).fit()
    end_time = time.time()
    logging.info(f"Runtime for train_arima_model: {end_time - start_time:.2f} seconds")
    return model


def main():
    overall_start_time = time.time()

    for group_id, (group_name, commodities) in COMMODITY_GROUPS.items():
        logging.info(f"Processing commodity group: {group_name}")
        
        for commodity_id, (commodity_name, csv_path) in commodities.items():
            logging.info(f"Processing commodity: {commodity_name}")
            
            if not os.path.exists(csv_path):
                logging.error(f"File not found: {csv_path}")
                continue

            df = pd.read_csv(csv_path, index_col='Date', parse_dates=True)
            logging.info(f"Data loaded successfully for {commodity_name}.")

            for column in ["High", "Low"]:
                if column not in df.columns:
                    logging.warning(f"Column '{column}' not found in the dataset. Skipping analysis for this column.")
                    continue

                logging.info(f"Analyzing {column} prices for {commodity_name}.")
                params, mse = greedy_arima(df, column)
                if params:
                    logging.info(f"Optimal parameters for {column} in {commodity_name}: {params} with MSE: {mse}")
                    model = train_arima_model(df, column, params)
                else:
                    logging.warning(f"No valid parameters found for {column} in {commodity_name}.")

    overall_end_time = time.time()
    logging.info(f"Total runtime for the script: {overall_end_time - overall_start_time:.2f} seconds")


if __name__ == "__main__":    
    COMMODITY_GROUPS = {
        1: ("Rice", {
            1: ('Regular-Milled Rice', '../csv/rice/regular_milled_rice.csv'),
            2: ('Well-Milled Rice', '../csv/rice/well_milled_rice.csv'),
            3: ('Premium Rice', '../csv/rice/premium_rice.csv'),
            4: ('Special Rice', '../csv/rice/special_rice.csv'),
        }),
        2: ("Meat", {
            1: ('Beef Brisket', '../csv/meat/beef_brisket.csv'),
            2: ('Beef Rump', '../csv/meat/beef_rump.csv'),
            3: ('Whole Chicken', '../csv/meat/whole_chicken.csv'),
            4: ('Pork Belly', '../csv/meat/pork_belly.csv'),
            5: ('Pork Ham', '../csv/meat/pork_kasim.csv'),
        }),
        3: ("Fish", {
            1: ('Alumahan', '../csv/fish/alumahan.csv'),
            2: ('Bangus', '../csv/fish/bangus.csv'),
            3: ('Galunggong', '../csv/fish/galunggong.csv'),
            4: ('Tilapia', '../csv/fish/tilapia.csv'),
        }),
        4: ("Fruits", {
            1: ('Banana (Lakatan)', '../csv/fruits/banana_lakatan.csv'),
            2: ('Calamansi', '../csv/fruits/calamansi.csv'),
            3: ('Mango', '../csv/fruits/mango.csv'),
            4: ('Papaya', '../csv/fruits/papaya.csv'),
        }),
        5: ("Vegetables", {
            1: ('Cabbage', '../csv/vegetables/cabbage.csv'),
            2: ('Carrots', '../csv/vegetables/carrots.csv'),
            3: ('Eggplant', '../csv/vegetables/eggplant.csv'),
            4: ('Tomato', '../csv/vegetables/tomato.csv'),
            5: ('Potato', '../csv/vegetables/white_potato.csv'),
        }),
        6: ("Spices", {
            1: ('Garlic', '../csv/spices/garlic.csv'),
            2: ('Red Onion', '../csv/spices/red_onion.csv'),
        })
    }
    
    main()

2024-12-15 16:50:34,108 - Processing commodity group: Rice
2024-12-15 16:50:34,110 - Processing commodity: Regular-Milled Rice
2024-12-15 16:50:34,206 - Data loaded successfully for Regular-Milled Rice.
2024-12-15 16:50:34,207 - Analyzing High prices for Regular-Milled Rice.
2024-12-15 16:54:58,665 - Best Parameters: (25, 1, 0) with MSE: 2.5544882306968804
2024-12-15 16:54:58,666 - Runtime for greedy_arima: 264.46 seconds
2024-12-15 16:54:58,671 - Optimal parameters for High in Regular-Milled Rice: (25, 1, 0) with MSE: 2.5544882306968804
2024-12-15 16:54:59,375 - Runtime for train_arima_model: 0.70 seconds
2024-12-15 16:54:59,376 - Analyzing Low prices for Regular-Milled Rice.
2024-12-15 16:58:00,473 - Best Parameters: (22, 0, 19) with MSE: 3.659613354245944
2024-12-15 16:58:00,474 - Runtime for greedy_arima: 181.10 seconds
2024-12-15 16:58:00,479 - Optimal parameters for Low in Regular-Milled Rice: (22, 0, 19) with MSE: 3.659613354245944
2024-12-15 16:58:06,513 - Runtime for train_ari

KeyboardInterrupt: 

In [2]:
# Import libraries
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from statsmodels.tsa.arima.model import ARIMA
from sklearn.metrics import mean_squared_error
from sklearn.model_selection import train_test_split
import logging
import os
import time
import warnings

warnings.filterwarnings('ignore')

logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(message)s')

pd.set_option('display.max_rows', None)


def greedy_arima(df, column, p_range=(0, 50), d_range=(0, 3), q_range=(0, 50), test_size=0.2):
    start_time = time.time()  # Start timing the function
    data = df[column].dropna()

    # Train-test split
    train_data, test_data = train_test_split(data, test_size=test_size, shuffle=False)
    
    best_score, best_params = float('inf'), None

    # Grid search for best p, d, q
    for p in range(*p_range):
        try:
            model = ARIMA(train_data, order=(p, 0, 0)).fit()
            predictions = model.predict(start=len(train_data), end=len(train_data) + len(test_data) - 1)
            mse = mean_squared_error(test_data, predictions)
            if mse < best_score:
                best_score, best_params = mse, (p, 0, 0)
        except Exception:
            continue

    best_p, _, best_q = best_params
    for d in range(*d_range):
        try:
            model = ARIMA(train_data, order=(best_p, d, best_q)).fit()
            predictions = model.predict(start=len(train_data), end=len(train_data) + len(test_data) - 1)
            mse = mean_squared_error(test_data, predictions)
            if mse < best_score:
                best_score, best_params = mse, (best_p, d, best_q)
        except Exception:
            continue

    best_p, best_d, _ = best_params
    for q in range(*q_range):
        try:
            model = ARIMA(train_data, order=(best_p, best_d, q)).fit()
            predictions = model.predict(start=len(train_data), end=len(train_data) + len(test_data) - 1)
            mse = mean_squared_error(test_data, predictions)
            if mse < best_score:
                best_score, best_params = mse, (best_p, best_d, q)
        except Exception:
            continue

    end_time = time.time()
    logging.info(f"Best Parameters: {best_params} with MSE: {best_score}")
    logging.info(f"Runtime for greedy_arima: {end_time - start_time:.2f} seconds")
    return best_params, best_score


def train_arima_model(df, column, params):
    start_time = time.time()
    model = ARIMA(df[column], order=params).fit()
    end_time = time.time()
    logging.info(f"Runtime for train_arima_model: {end_time - start_time:.2f} seconds")
    return model


def main():
    overall_start_time = time.time()

    for group_id, (group_name, commodities) in COMMODITY_GROUPS.items():
        logging.info(f"Processing commodity group: {group_name}")
        
        for commodity_id, (commodity_name, csv_path) in commodities.items():
            logging.info(f"Processing commodity: {commodity_name}")
            
            if not os.path.exists(csv_path):
                logging.error(f"File not found: {csv_path}")
                continue

            df = pd.read_csv(csv_path, index_col='Date', parse_dates=True)
            logging.info(f"Data loaded successfully for {commodity_name}.")

            for column in ["High", "Low"]:
                if column not in df.columns:
                    logging.warning(f"Column '{column}' not found in the dataset. Skipping analysis for this column.")
                    continue

                logging.info(f"Analyzing {column} prices for {commodity_name}.")
                params, mse = greedy_arima(df, column)
                if params:
                    logging.info(f"Optimal parameters for {column} in {commodity_name}: {params} with MSE: {mse}")
                    model = train_arima_model(df, column, params)
                else:
                    logging.warning(f"No valid parameters found for {column} in {commodity_name}.")

    overall_end_time = time.time()
    logging.info(f"Total runtime for the script: {overall_end_time - overall_start_time:.2f} seconds")


if __name__ == "__main__":    
    COMMODITY_GROUPS = {
        1: ("Rice", {
            1: ('Regular-Milled Rice', '../csv/rice/regular_milled_rice.csv'),
            2: ('Well-Milled Rice', '../csv/rice/well_milled_rice.csv'),
            3: ('Premium Rice', '../csv/rice/premium_rice.csv'),
            4: ('Special Rice', '../csv/rice/special_rice.csv'),
        }),
        2: ("Meat", {
            1: ('Beef Brisket', '../csv/meat/beef_brisket.csv'),
            2: ('Beef Rump', '../csv/meat/beef_rump.csv'),
            3: ('Whole Chicken', '../csv/meat/whole_chicken.csv'),
            4: ('Pork Belly', '../csv/meat/pork_belly.csv'),
            5: ('Pork Ham', '../csv/meat/pork_kasim.csv'),
        }),
        3: ("Fish", {
            1: ('Alumahan', '../csv/fish/alumahan.csv'),
            2: ('Bangus', '../csv/fish/bangus.csv'),
            3: ('Galunggong', '../csv/fish/galunggong.csv'),
            4: ('Tilapia', '../csv/fish/tilapia.csv'),
        }),
        4: ("Fruits", {
            1: ('Banana (Lakatan)', '../csv/fruits/banana_lakatan.csv'),
            2: ('Calamansi', '../csv/fruits/calamansi.csv'),
            3: ('Mango', '../csv/fruits/mango.csv'),
            4: ('Papaya', '../csv/fruits/papaya.csv'),
        }),
        5: ("Vegetables", {
            1: ('Cabbage', '../csv/vegetables/cabbage.csv'),
            2: ('Carrots', '../csv/vegetables/carrots.csv'),
            3: ('Eggplant', '../csv/vegetables/eggplant.csv'),
            4: ('Tomato', '../csv/vegetables/tomato.csv'),
            5: ('Potato', '../csv/vegetables/white_potato.csv'),
        }),
        6: ("Spices", {
            1: ('Garlic', '../csv/spices/garlic.csv'),
            2: ('Red Onion', '../csv/spices/red_onion.csv'),
        })
    }
    
    main()

2024-12-15 17:48:22,445 - Processing commodity group: Rice
2024-12-15 17:48:22,447 - Processing commodity: Regular-Milled Rice
2024-12-15 17:48:22,451 - Data loaded successfully for Regular-Milled Rice.
2024-12-15 17:48:22,452 - Analyzing High prices for Regular-Milled Rice.
2024-12-15 18:21:25,347 - Best Parameters: (42, 1, 48) with MSE: 2.635675875855916
2024-12-15 18:21:25,348 - Runtime for greedy_arima: 1982.89 seconds
2024-12-15 18:21:25,365 - Optimal parameters for High in Regular-Milled Rice: (42, 1, 48) with MSE: 2.635675875855916
2024-12-15 18:22:34,802 - Runtime for train_arima_model: 69.43 seconds
2024-12-15 18:22:34,802 - Analyzing Low prices for Regular-Milled Rice.
2024-12-15 18:37:10,623 - Best Parameters: (22, 0, 19) with MSE: 3.659613354245944
2024-12-15 18:37:10,624 - Runtime for greedy_arima: 875.82 seconds
2024-12-15 18:37:10,638 - Optimal parameters for Low in Regular-Milled Rice: (22, 0, 19) with MSE: 3.659613354245944
2024-12-15 18:37:16,756 - Runtime for train_a

KeyboardInterrupt: 

# SARIMA MODELS

In [None]:
# Import libraries
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from statsmodels.tsa.statespace.sarimax import SARIMAX
from sklearn.metrics import mean_squared_error
import logging
import os
import time
import warnings

warnings.filterwarnings('ignore')

logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(message)s')

pd.set_option('display.max_rows', None)


def greedy_sarima(df, column, p_range=(0, 28), d_range=(0, 3), q_range=(0, 28), seasonal_range=(0, 2)):
    start_time = time.time()  # Start timing the function
    data = df[column].dropna()
    best_score, best_params = float('inf'), None

    # Search for best (p, d, q) parameters
    for p in range(*p_range):
        for q in range(*q_range):
            try:
                model = SARIMAX(data, order=(p, 0, q), seasonal_order=(0, 0, 0, 0)).fit(disp=False)
                predictions = model.fittedvalues
                mse = mean_squared_error(data[0:], predictions)
                if mse < best_score:
                    best_score, best_params = mse, (p, 0, q, 0, 0, 0)
            except Exception:
                continue

    best_p, best_d, best_q, _, _, _ = best_params
    # Search for best seasonal (P, D, Q, S) parameters
    for P in range(*seasonal_range):
        for Q in range(*seasonal_range):
            for S in [12]:  # Common seasonality period (e.g., monthly data with 12 periods)
                try:
                    model = SARIMAX(data, order=(best_p, best_d, best_q),
                                    seasonal_order=(P, 0, Q, S)).fit(disp=False)
                    predictions = model.fittedvalues
                    mse = mean_squared_error(data[best_d:], predictions[best_d:])
                    if mse < best_score:
                        best_score, best_params = mse, (best_p, best_d, best_q, P, 0, Q, S)
                except Exception:
                    continue

    end_time = time.time()
    logging.info(f"Best Parameters: {best_params} with MSE: {best_score}")
    logging.info(f"Runtime for greedy_sarima: {end_time - start_time:.2f} seconds")
    return best_params, best_score


def train_sarima_model(df, column, params):
    start_time = time.time()
    model = SARIMAX(df[column], order=params[:3], seasonal_order=params[3:]).fit(disp=False)
    end_time = time.time()
    logging.info(f"Runtime for train_sarima_model: {end_time - start_time:.2f} seconds")
    return model


def main():
    overall_start_time = time.time()

    for group_id, (group_name, commodities) in COMMODITY_GROUPS.items():
        logging.info(f"Processing commodity group: {group_name}")
        
        for commodity_id, (commodity_name, csv_path) in commodities.items():
            logging.info(f"Processing commodity: {commodity_name}")
            
            if not os.path.exists(csv_path):
                logging.error(f"File not found: {csv_path}")
                continue

            df = pd.read_csv(csv_path, index_col='Date', parse_dates=True)
            logging.info(f"Data loaded successfully for {commodity_name}.")

            for column in ["High", "Low"]:
                if column not in df.columns:
                    logging.warning(f"Column '{column}' not found in the dataset. Skipping analysis for this column.")
                    continue

                logging.info(f"Analyzing {column} prices for {commodity_name}.")
                params, mse = greedy_sarima(df, column)
                if params:
                    logging.info(f"Optimal parameters for {column} in {commodity_name}: {params} with MSE: {mse}")
                    model = train_sarima_model(df, column, params)
                else:
                    logging.warning(f"No valid parameters found for {column} in {commodity_name}.")

    overall_end_time = time.time()
    logging.info(f"Total runtime for the script: {overall_end_time - overall_start_time:.2f} seconds")


if __name__ == "__main__":    
    COMMODITY_GROUPS = {
        1: ("Rice", {
            1: ('Regular-Milled Rice', '../csv/rice/regular_milled_rice.csv'),
            2: ('Well-Milled Rice', '../csv/rice/well_milled_rice.csv'),
            3: ('Premium Rice', '../csv/rice/premium_rice.csv'),
            4: ('Special Rice', '../csv/rice/special_rice.csv'),
        }),
        2: ("Meat", {
            1: ('Beef Brisket', '../csv/meat/beef_brisket.csv'),
            2: ('Beef Rump', '../csv/meat/beef_rump.csv'),
            3: ('Whole Chicken', '../csv/meat/whole_chicken.csv'),
            4: ('Pork Belly', '../csv/meat/pork_belly.csv'),
            5: ('Pork Ham', '../csv/meat/pork_kasim.csv'),
        }),
        3: ("Fish", {
            1: ('Alumahan', '../csv/fish/alumahan.csv'),
            2: ('Bangus', '../csv/fish/bangus.csv'),
            3: ('Galunggong', '../csv/fish/galunggong.csv'),
            4: ('Tilapia', '../csv/fish/tilapia.csv'),
        }),
        4: ("Fruits", {
            1: ('Banana (Lakatan)', '../csv/fruits/banana_lakatan.csv'),
            2: ('Calamansi', '../csv/fruits/calamansi.csv'),
            3: ('Mango', '../csv/fruits/mango.csv'),
            4: ('Papaya', '../csv/fruits/papaya.csv'),
        }),
        5: ("Vegetables", {
            1: ('Cabbage', '../csv/vegetables/cabbage.csv'),
            2: ('Carrots', '../csv/vegetables/carrots.csv'),
            3: ('Eggplant', '../csv/vegetables/eggplant.csv'),
            4: ('Tomato', '../csv/vegetables/tomato.csv'),
            5: ('Potato', '../csv/vegetables/white_potato.csv'),
        }),
        6: ("Spices", {
            1: ('Garlic', '../csv/spices/garlic.csv'),
            2: ('Red Onion', '../csv/spices/red_onion.csv'),
        })
    }
    
    main()