In [1]:
import math
import os


os.environ["KERAS_BACKEND"] = "torch"


import numpy as np
import pandas as pd
from keras.layers import LSTM, Dense, Dropout, Normalization, Input
from keras.models import Sequential
from matplotlib import pyplot as plt


def is_prime(n):
    if n == 1:
        return False
    if n == 2:
        return True
    for ith in range(3, math.ceil(math.sqrt(n)), 2):
        if n % ith == 0:
            return False
            
    return True

In [2]:
df = pd.read_csv("../../.data/lowhigh_2023_2025.csv")
del df["Unnamed: 0"]
df = df[df["low"] != "NOT AVAILABLE"]
df = df.dropna()

In [6]:
import re


def treat_doubles(s):
    reg = re.compile(r"(\d+\.*\d*) (\d+\.*\d*)")
    if match := reg.search(s):
        return match.group(1)
    return s


murphy = df[df["market"] == "Murphy Public Market"]
murphy[murphy["commodity"] == "galunggong"]["low"].apply(treat_doubles).values
df[df["low"] == "45.00 NOT AVAILABLE"]

set(df["commodity"])

{'ampalaya',
 'cabbage',
 'cabbage (rareball)',
 'cabbage (scorpio)',
 'cabbage (wonderball)',
 'corn (white)',
 'corn (yellow)',
 'egg (medium)',
 'eggplant',
 'fresh pork kasim/pigue',
 'fresh pork liempo',
 'fresh whole chicken',
 'frozen pork kasim/pigue',
 'frozen pork liempo',
 'galunggong',
 'galunggong (local)',
 'pechay baguio',
 'red onion (local)',
 'sugar (refined)',
 'sugar (washed)',
 'tilapia',
 'tomato',
 'well-milled rice (local)'}

In [None]:
data_by_commodities = {}

for commodity in set(df["commodity"]):
    # print("[INFO] processing:", commodity)
    data_by_commodities[commodity] = df.loc[df["commodity"] == commodity]

data_by_commodities

In [None]:
data_by_market = {}
for market in set(df["market"]):
    data_by_market[market] = df[df["market"] == market]

data_by_market

In [None]:
# df_commodity = data_by_commodities["cabbage (scorpio)"]
# df_commodity = data_by_commodities["eggplant"]  # NOTE: data too low, do not include
# df_commodity = data_by_commodities[""]
# df_commodity_market = df_commodity[df_commodity["market"] == "Pateros Market"]

df_market = data_by_market["Pateros Market"]
df_commodity_market = df_market[df_market["commodity"] == "egg (medium)"]
x = pd.to_datetime(df_commodity_market["date"])
low = pd.to_numeric(df_commodity_market["low"])
high = pd.to_numeric(df_commodity_market["high"])
df_commodity_market

In [None]:
plt.figure(figsize=(30, 6))
plt.xticks(rotation=45)
plt.plot(x, low, "o-b", x, high, "o-r")
plt.legend(["low", "high"])

In [None]:
df_train = df_commodity_market[["high", "low"]]
if is_prime(df_train.shape[0]):
    df_train = df_train.iloc[1:]
df_train = df_train.iloc[::-1]
df_train.shape

np.var(df_train[["low", "high"]].to_numpy(dtype="float32"))

In [None]:
train_x = []
train_y = []
train_n_future = 1
train_n_past = 5

for ith in range(0, df_train.shape[0], train_n_past + train_n_future):
    tail = ith + train_n_past
    past = df_train.iloc[ith:tail].to_numpy(dtype="float32")
    if past.shape[0] == train_n_past:
        train_x.append(past)
    else:
        continue
    train_y.append(df_train.iloc[tail:tail + train_n_future, 0:1].to_numpy(dtype="float32"))

train_x, train_y

In [None]:
train_x = np.array(train_x)
train_y = np.array(train_y)
train_x.shape, train_y.shape

In [None]:
mean = np.mean([low, high])
var = np.var([low, high])

model = Sequential()
model.add(Input(shape=(train_x.shape[1], train_x.shape[2])))
model.add(Normalization(mean=mean, variance=var))
model.add(LSTM(128, activation="relu", return_sequences=True))
model.add(LSTM(64, activation="relu", return_sequences=False))
model.add(Dropout(0.3))
model.add(Dense(train_y.shape[1]))
model.add(Normalization(mean=mean, variance=var, invert=True))

model.compile(optimizer="nadam", loss="mse")
model.summary()

mean, var

In [None]:
hist = model.fit(train_x, train_y, epochs=200, batch_size=16, validation_split=0.1, verbose=1)
plt.plot(hist.history["loss"], label="Loss")
plt.plot(hist.history["val_loss"], label="Validation loss")
plt.legend()

In [None]:
days_to_predict = train_x.shape[0]
train_x_rev = train_x[::-1] 
date = pd.to_datetime(df.iloc[df.shape[0] - 1]["date"])
dates = [date + pd.Timedelta(days=ith) for ith in range(days_to_predict)]
prices = model.predict(train_x_rev[-days_to_predict:])
plt.figure(figsize=(10, 6))
plt.plot(dates, prices, "o-")

In [None]:
prices