In [None]:
import re
import polars as pl

# --- Parsing functions ---
def parse_items(block):
    return [item.strip("' ") for item in block.strip().splitlines() if item.strip()]

def parse_tuples(block):
    return [tuple(re.findall(r"'([^']+)'", line)) for line in block.strip().splitlines()]

def parse_param_triples(block):
    return [(*re.findall(r"'([^']+)'", line), int(re.search(r"\] (\d+)", line).group(1)))
            for line in block.strip().splitlines()]

def parse_param_pairs(block):
    return [(*re.findall(r"'([^']+)'", line), int(re.search(r"\] (\d+)", line).group(1)))
            for line in block.strip().splitlines()]

# --- Load data from file ---
with open('../../data/prodopt.dat') as f:
    raw = f.read()

# --- Extract using regex ---
products = re.search(r"set PRODUCTS :=(.*?)\s*;", raw, re.S).group(1)
stores = re.search(r"set STORES :=(.*?)\s*;", raw, re.S).group(1)
workshops = re.search(r"set WORKSHOPS :=(.*?)\s*;", raw, re.S).group(1)
procurements = re.search(r"param PROCUREMENTS :=(.*?)\s*;", raw, re.S).group(1)
transport_links = re.search(r"param TRANSPORT_LINKS :=(.*?)\s*;", raw, re.S).group(1)
periods = re.search(r"set PERIODS :=(.*?)\s*;", raw, re.S).group(1)
demand = re.search(r"param Demand :=(.*?)\s*;", raw, re.S).group(1)
inventory = re.search(r"param InitialInventory :=(.*?)\s*;", raw, re.S).group(1)
penalties = dict(re.findall(r"param (\w+) := (\d+);", raw))

# --- Create DataFrames ---
df_products = pl.DataFrame({"p_name": parse_items(products)})
df_stores = pl.DataFrame({"s_name": parse_items(stores)})
df_workshops = pl.DataFrame({"w_name": parse_items(workshops)})
df_periods = pl.DataFrame({"period": parse_items(periods)})
df_periods = df_periods.with_columns(
    pl.col("period").str.strptime(pl.Date, "%Y-%d-%m")
)

# Don't enforce types at creation, do it after
df_demand_forecast = pl.DataFrame(parse_param_triples(demand), schema=["p_name", "l_name", "period", "mean"])
df_demand_forecast = df_demand_forecast.with_columns([
    pl.col("period").str.strptime(pl.Date, "%Y-%d-%m"),
    pl.col("mean").cast(pl.UInt32)
])

df_procurements = pl.DataFrame(parse_tuples(procurements), schema=["pc_p_name", "pc_s_name", "pc_active_from", "pc_active_upto"])
df_procurements = df_procurements.with_columns(
    pl.col("pc_active_from").str.strptime(pl.Date, "%Y-%d-%m"),
    pl.col("pc_active_upto").str.strptime(pl.Date, "%Y-%d-%m")
)

df_trankport_links = pl.DataFrame(parse_param_pairs(transport_links), schema=["tl_w_name", "tl_s_name", "tl_cost"])

df_inventory = pl.DataFrame(parse_param_pairs(inventory), schema=["p_name", "l_name", "initial_inventory"])
df_inventory = df_inventory.with_columns(
    pl.col("initial_inventory").cast(pl.UInt32),
    ending_inventory=pl.col('initial_inventory').cast(pl.UInt32),
    period=pl.lit("2023-12-31").str.strptime(pl.Date, "%Y-%m-%d")
)

# --- Penalty values ---
unmet_penalty = int(penalties.get("UnmetDemandPenalty", 0))
ending_penalty = int(penalties.get("EndingInventoryPenalty", 0))

# Optional: inspect one DataFrame
print(df_demand_forecast.head())


shape: (5, 4)
┌─────────┬─────────┬────────────┬──────┐
│ p_name  ┆ l_name  ┆ period     ┆ mean │
│ ---     ┆ ---     ┆ ---        ┆ ---  │
│ str     ┆ str     ┆ date       ┆ u32  │
╞═════════╪═════════╪════════════╪══════╡
│ Biscuit ┆ Market2 ┆ 2024-01-01 ┆ 10   │
│ Biscuit ┆ Market2 ┆ 2024-01-02 ┆ 10   │
│ Biscuit ┆ Market2 ┆ 2024-01-03 ┆ 10   │
│ Biscuit ┆ Market2 ┆ 2024-01-04 ┆ 5    │
│ Biscuit ┆ Market2 ┆ 2024-01-05 ┆ 5    │
└─────────┴─────────┴────────────┴──────┘


In [4]:
df_stores

l_name
str
"""Market1"""
"""Market2"""
"""Market3"""


In [23]:
df_products

p_name
str
"""Biscuit"""
"""Bread"""
"""Cookie"""
"""Croissant"""
"""Donut"""


In [24]:
df_workshops

w_name
str
"""Bakery1"""
"""Bakery2"""


In [25]:
df_procurements

pc_p_name,pc_s_name,pc_active_from,pc_active_upto
str,str,date,date
"""Biscuit""","""Market2""",2024-01-01,2024-07-01
"""Biscuit""","""Market3""",2024-01-01,2024-07-01
"""Bread""","""Market1""",2024-01-01,2024-07-01
"""Bread""","""Market2""",2024-01-01,2024-07-01
"""Bread""","""Market3""",2024-01-01,2024-07-01
…,…,…,…
"""Croissant""","""Market1""",2024-01-01,2024-07-01
"""Croissant""","""Market2""",2024-01-01,2024-07-01
"""Croissant""","""Market3""",2024-01-01,2024-07-01
"""Donut""","""Market2""",2024-01-01,2024-07-01


In [7]:
import duckdb

In [45]:
with duckdb.connect(database="./data/duckdb.db", read_only=False) as con:
    con.execute(
    """
        CREATE SEQUENCE IF NOT EXISTS products_id_seq START 1;
        CREATE SEQUENCE IF NOT EXISTS stores_id_seq START 1;
        CREATE SEQUENCE IF NOT EXISTS workshops_id_seq START 1;
        CREATE SEQUENCE IF NOT EXISTS transport_links_id_seq START 1;
        CREATE SEQUENCE IF NOT EXISTS procurements_id_seq START 1;
        CREATE SEQUENCE IF NOT EXISTS demand_predictions_id_seq START 1;
        CREATE SEQUENCE IF NOT EXISTS stocks_id_seq START 1;

        CREATE TABLE IF NOT EXISTS products (
            p_id INTEGER PRIMARY KEY DEFAULT nextval('products_id_seq'),
            p_name VARCHAR
        );
    
        CREATE TABLE IF NOT EXISTS stores (
            s_id INTEGER PRIMARY KEY DEFAULT nextval('stores_id_seq'),
            s_name VARCHAR
        );

        CREATE TABLE IF NOT EXISTS workshops (
            w_id INTEGER PRIMARY KEY DEFAULT nextval('workshops_id_seq'),
            w_name VARCHAR
        );

        CREATE TABLE IF NOT EXISTS transport_links (
            tl_id INTEGER PRIMARY KEY DEFAULT nextval('transport_links_id_seq'),
            tl_w_id INTEGER,
            tl_s_id INTEGER,
            tl_cost DOUBLE DEFAULT 0.0 CHECK (tl_cost >= 0),
            UNIQUE (tl_w_id, tl_s_id),
            FOREIGN KEY (tl_w_id) REFERENCES workshops(w_id),
            FOREIGN KEY (tl_s_id) REFERENCES stores(s_id)
        );
                
        CREATE TABLE IF NOT EXISTS procurements (
            pc_id INTEGER PRIMARY KEY DEFAULT nextval('procurements_id_seq'),
            pc_p_id INTEGER,
            pc_s_id INTEGER,
            pc_active_from DATE,
            pc_active_upto DATE CHECK (pc_active_from < pc_active_upto),
            UNIQUE (pc_p_id, pc_s_id, pc_active_from, pc_active_upto),
            FOREIGN KEY (pc_p_id) REFERENCES products(p_id),
            FOREIGN KEY (pc_s_id) REFERENCES stores(s_id) 
        );

        CREATE TABLE IF NOT EXISTS demand_predictions (
            dp_id INTEGER PRIMARY KEY DEFAULT nextval('demand_predictions_id_seq'),
            dp_p_id INTEGER,
            dp_s_id INTEGER,
            dp_period DATE,
            dp_mean INTEGER CHECK (dp_mean >= 0),
            UNIQUE (dp_p_id, dp_s_id, dp_period),
            FOREIGN KEY (dp_p_id) REFERENCES products(p_id),
            FOREIGN KEY (dp_s_id) REFERENCES stores(s_id) 
        );

        CREATE TABLE IF NOT EXISTS stocks (
            sk_id INTEGER PRIMARY KEY DEFAULT nextval('stocks_id_seq'),
            sk_p_id INTEGER,
            sk_s_id INTEGER,
            sk_period DATE,
            sk_starting_inventory INTEGER CHECK (sk_starting_inventory >= 0),
            sk_ending_inventory INTEGER CHECK (sk_ending_inventory >= 0),
            UNIQUE (sk_p_id, sk_s_id, sk_period),
            FOREIGN KEY (sk_p_id) REFERENCES products(p_id),
            FOREIGN KEY (sk_s_id) REFERENCES stores(s_id) 
        );
""")
# con.execute(f"""
#     CREATE TABLE IF NOT EXISTS 'products' AS SELECT * FROM df_products;
# """)

In [10]:
with duckdb.connect(database="./data/duckdb.db", read_only=False) as con:
    # Insert data into products table
    con.execute("INSERT INTO products (p_name) (SELECT * FROM df_products)").pl()
    con.execute("INSERT INTO stores (s_name) (SELECT * FROM df_stores)").pl()
    con.execute("INSERT INTO workshops (w_name) (SELECT * FROM df_workshops)").pl()

In [26]:
with duckdb.connect(database="./data/duckdb.db", read_only=False) as con:
    df_products = con.execute("""SELECT * FROM products""").pl()
    df_stores = con.execute("""SELECT * FROM stores""").pl()
    df_workshops = con.execute("""SELECT * FROM workshops""").pl()

In [17]:
df_procurements = df_procurements.join(
    df_products, left_on="pc_p_name", right_on="p_name"
).join(
    df_stores, left_on="pc_s_name", right_on="s_name"
).select(
    pl.col("p_id").alias("pc_p_id"),
    pl.col("s_id").alias("pc_s_id"),
    pl.col("pc_active_from"),
    pl.col("pc_active_upto")
)

In [18]:
with duckdb.connect(database="./data/duckdb.db", read_only=False) as con:
    con.execute("INSERT INTO procurements (pc_p_id, pc_s_id, pc_active_from, pc_active_upto) (SELECT * FROM df_procurements)")

In [27]:
# read procurements table to verify in duckdb
with duckdb.connect(database="./data/duckdb.db", read_only=False) as con:
    df_procurements = con.execute("""SELECT * FROM procurements""").pl()
df_procurements

pc_id,pc_p_id,pc_s_id,pc_active_from,pc_active_upto
i32,i32,i32,date,date
1,1,2,2024-01-01,2024-07-01
2,1,3,2024-01-01,2024-07-01
3,2,1,2024-01-01,2024-07-01
4,2,2,2024-01-01,2024-07-01
5,2,3,2024-01-01,2024-07-01
…,…,…,…,…
8,4,1,2024-01-01,2024-07-01
9,4,2,2024-01-01,2024-07-01
10,4,3,2024-01-01,2024-07-01
11,5,2,2024-01-01,2024-07-01


In [32]:
df_workshops

w_id,w_name
i32,str
1,"""Bakery1"""
2,"""Bakery2"""


In [34]:
df_trankport_links = df_trankport_links.join(
    df_workshops, left_on="tl_w_name", right_on="w_name"
).join(
    df_stores, left_on="tl_s_name", right_on="s_name"
).select(
    pl.col("w_id").alias("tl_w_id"),
    pl.col("s_id").alias("tl_s_id"),
    pl.col("tl_cost")
)
# df_trankport_links

In [None]:
with duckdb.connect(database="./data/duckdb.db", read_only=False) as con:
    con.execute("INSERT INTO transport_links (tl_w_id, tl_s_id, tl_cost) (SELECT * FROM df_trankport_links)")
    # read transport_links table to verify in duckdb 
    df_trankport_links = con.execute("""SELECT * FROM transport_links""").pl()
df_trankport_links

tl_id,tl_w_id,tl_s_id,tl_cost
i32,i32,i32,f64
1,1,1,1.0
2,1,2,3.0
3,1,3,9.0
4,2,1,3.0
5,2,2,9.0
6,2,3,1.0


In [39]:
df_demand_forecast = df_demand_forecast.join(
        df_products, left_on="p_name", right_on="p_name"
    ).join(
        df_stores, left_on="l_name", right_on="s_name"
    ).select(
        pl.col("p_id").alias("dp_p_id"),
        pl.col("s_id").alias("dp_s_id"),
        pl.col("period").alias("dp_period"),
        pl.col("mean").alias("dp_mean")
    )

In [40]:
with duckdb.connect(database="./data/duckdb.db", read_only=False) as con:
    #insert demand predictions
    con.execute("INSERT INTO demand_predictions (dp_p_id, dp_s_id, dp_period, dp_mean) (SELECT * FROM df_demand_forecast)")
    # read demand_predictions table to verify in duckdb
    df_demand_forecast = con.execute("""SELECT * FROM demand_predictions""").pl()
df_demand_forecast

dp_id,dp_p_id,dp_s_id,dp_period,dp_mean
i32,i32,i32,date,i32
1,1,2,2024-01-01,10
2,1,2,2024-01-02,10
3,1,2,2024-01-03,10
4,1,2,2024-01-04,5
5,1,2,2024-01-05,5
…,…,…,…,…
68,5,3,2024-01-02,10
69,5,3,2024-01-03,7
70,5,3,2024-01-04,10
71,5,3,2024-01-05,7


In [49]:
df_inventory = df_inventory.join(
    df_products, left_on="p_name", right_on="p_name"
).join(
    df_stores, left_on="l_name", right_on="s_name"
).select(
    pl.col("p_id").alias("sk_p_id"),
    pl.col("s_id").alias("sk_s_id"),
    pl.col("period").alias("sk_period"),
    pl.col("initial_inventory").alias("sk_starting_inventory"),
    pl.col("ending_inventory").alias("sk_ending_inventory")
)

In [50]:
with duckdb.connect(database="./data/duckdb.db", read_only=False) as con:
    con.execute("INSERT INTO stocks (sk_p_id, sk_s_id, sk_period, sk_starting_inventory, sk_ending_inventory) (SELECT * FROM df_inventory)")
    # read stocks table to verify in duckdb
    df_inventory = con.execute("""SELECT * FROM stocks""").pl()
df_inventory

sk_id,sk_p_id,sk_s_id,sk_period,sk_starting_inventory,sk_ending_inventory
i32,i32,i32,date,i32,i32
1,3,1,2023-12-31,5,5
2,3,3,2023-12-31,5,5
3,2,1,2023-12-31,10,10
4,2,2,2023-12-31,10,10
5,2,3,2023-12-31,5,5
…,…,…,…,…,…
8,4,1,2023-12-31,5,5
9,4,2,2023-12-31,5,5
10,4,3,2023-12-31,0,0
11,1,2,2023-12-31,5,5
