In [None]:
import polars as pl
from src.main import GetTanksReq, fetch_tank_data

req = GetTanksReq(primo_ids={"69419"})

df = await fetch_tank_data(req)
if df is None:
    raise ValueError("df is None")
lf = df.lazy()
lf.collect()

In [None]:
lf = lf.drop("unique_id")
lf.collect()

In [None]:
tank_metrics = ["Level", "Volume", "InchesUntilAlarm", "InchesToESD", "TimeUntilESD", "Capacity", "ID"]
tank_types = ["Water", "Oil"]

tank_metrics_str= "|".join(tank_metrics)
tank_types_str = "|".join(tank_types)

pattern = f'^(?<is_ESD>ESD-)?(?<tank_type>{tank_types_str})Tank(?<tank_number>[0-9]*)(?<tank_metric>{tank_metrics_str})'
lf = lf.with_columns(separated_metrics = pl.col("metric_nice_name").str.extract_groups(pattern))
lf.collect()

In [None]:
lf = lf.unnest("separated_metrics")
lf.collect()

In [None]:
lf = lf.with_columns(pl.col("tank_number").cast(pl.UInt8, strict=False))
lf.collect()

In [None]:
#pivoting the data
values = pl.col("value")
columns = pl.col("tank_metric")
pivoted_lf = lf.group_by("primo_id", "tank_type", "tank_number", "scada_id").agg(
    values.filter(columns == metric).first().alias(metric) for metric in tank_metrics
)

pivoted_lf.collect()

In [None]:
null_condition = pl.col("tank_number").is_null()
not_null_condition = pl.col("tank_number").is_not_null()

null_tanks = pivoted_lf.filter(null_condition)
numbered_tanks = pivoted_lf.filter(not_null_condition)

null_tanks = null_tanks.with_columns(pl.col("ID").alias("tank_number"))
null_tanks = null_tanks.drop("ID")
null_tanks = null_tanks.with_columns(pl.col("tank_number").cast(pl.UInt8, strict=False))

null_tanks.collect()

In [None]:
joined_lf = numbered_tanks.join(null_tanks, on=["primo_id", "tank_type", "tank_number"], how="left")
joined_lf.collect()

In [None]:
final_lf = joined_lf.join(lf, on=["scada_id"], how="left")
final_lf = final_lf.group_by("primo_id", "tank_type", "tank_number").agg(pl.all().first())
final_lf.collect()

In [None]:
final_lf = final_lf.with_columns(
    pl.col("primo_id"),
    pl.col("tank_type"),
    pl.col("tank_number"),
    pl.coalesce(pl.col("Level"), pl.col("Level_right")).alias("Level"),
    pl.coalesce(pl.col("Volume"), pl.col("Volume_right")).alias("Volume"),
    pl.coalesce(pl.col("InchesToESD"), pl.col("InchesUntilAlarm_right")).alias("InchesToESD"),
    pl.coalesce(pl.col("TimeUntilESD"), pl.col("TimeUntilESD_right")).alias("TimeUntilESD"),
    pl.coalesce(pl.col("Capacity"), pl.col("tanksize")).alias("Capacity")
)

required_columns = ["primo_id", "tank_type", "tank_number", "Level", "Volume", "InchesToESD", "TimeUntilESD", "Capacity"]
final_lf = final_lf.select(required_columns)
final_lf.collect()

In [None]:
final_lf = final_lf.sort("primo_id", "tank_type", "tank_number")
final_lf.collect()

In [None]:
percent_tank_full = (pl.col("Volume") / pl.col("Capacity") * 100).round().cast(pl.UInt8)
final_lf = final_lf.with_columns(percent_tank_full.alias("percent_full"))

final_lf.collect()

In [None]:
capacity_rounded = pl.col("Capacity").round()
final_lf = final_lf.with_columns(capacity_rounded.alias("Capacity"))

final_lf.collect()

In [None]:
volume_to_feet = pl.col("Volume").round().cast(pl.UInt64)

final_lf = final_lf.with_columns(volume_to_feet.alias("Volume"))

final_lf.collect()