In [1]:
import polars as pl
from src.main import GetTanksReq, fetch_tank_data

req = GetTanksReq(property_ids={"98840"})

df = await fetch_tank_data(req)
if df is None:
    raise ValueError("df is None")
lf = df.lazy()
lf.collect()

unique_id,property_id,source_key,tank_name,uom,timestamp,value,tanksize
object,str,str,str,str,datetime[μs],f64,f64
0d96d173-3c6a-469f-901b-5cea8aa4210e,"""98840""","""9884005""","""OilTank5Level""","""in""",2015-05-01 05:05:54.083239,74.240695,790.375905
106e51eb-4faa-4d5c-b43e-210c92016f98,"""98840""","""98840W2""","""WaterTank2Volume""","""bbl""",2023-08-16 19:52:13.787936,357.148125,653.135767
1158ad41-4c78-4d2f-83ec-2ff717c2d445,"""98840""","""9884003""","""OilTank3Volume""","""bbl""",2024-02-11 04:10:55.839466,284.213132,624.069952
2748bdd4-4b4b-4add-aa16-251efbb55639,"""98840""","""98840FAC""","""ESD-OilTankID""","""""",2024-07-05 18:19:24.387492,4.0,
29576772-cde6-4767-a478-c7337a1c8c39,"""98840""","""9884004""","""OilTank4Level""","""in""",2021-06-18 15:33:10.557032,75.002393,417.5281
…,…,…,…,…,…,…,…
b5a48faa-1f40-4887-a0e9-7a84a3a83366,"""98840""","""98840W2""","""WaterTank2Level""","""in""",2019-04-05 01:41:07.809116,76.456344,653.135767
c4abef08-5ca3-43b2-a9a4-fdf5aa6210df,"""98840""","""98840FAC""","""ESD-WaterTankInchesUntilAlarm""","""in""",2024-08-16 04:45:27.763083,257.847738,
db762e5e-9c0c-4f77-864b-df5e66865f2e,"""98840""","""9884005""","""OilTank5Volume""","""bbl""",2023-03-17 20:34:17.444289,300.447955,790.375905
f3c718bf-58e8-488e-9445-99fae307d251,"""98840""","""98840FAC""","""ESD-OilTankInchesUntilAlarm""","""in""",2024-08-22 10:59:43.894659,102.478181,


In [4]:
lf = df.lazy()

tank_metrics = ["Level", "Volume", "InchesUntilAlarm", "InchesToESD", "TimeUntilESD", "Capacity", "ID"]
tank_types = ["Water", "Oil"]

tank_metrics_str= "|".join(tank_metrics)
tank_types_str = "|".join(tank_types)

pattern = f"^(?<is_ESD>ESD-)?(?<tank_type>{tank_types_str})Tank(?<tank_number>[0-9]*)(?<tank_metric>{tank_metrics_str})"
lf = lf.with_columns(
    separated_metrics=pl.col("tank_name").str.extract_groups(pattern)
)
lf = lf.unnest("separated_metrics")

lf = lf.with_columns(pl.col("tank_number").cast(pl.UInt8, strict=False))


# pivoting the data
values = pl.col("value")
columns = pl.col("tank_metric")
pivoted_lf = lf.group_by("property_id", "tank_type", "tank_number", "source_key").agg(
    values.filter(columns == metric).first().alias(metric) for metric in tank_metrics
)

null_condition = pl.col("tank_number").is_null()
not_null_condition = pl.col("tank_number").is_not_null()

null_tanks = pivoted_lf.filter(null_condition)
numbered_tanks = pivoted_lf.filter(not_null_condition)

null_tanks = null_tanks.with_columns(pl.col("ID").alias("tank_number"))
null_tanks = null_tanks.drop("ID")
null_tanks = null_tanks.with_columns(
    pl.col("tank_number").cast(pl.UInt8, strict=False)
)

joined_lf = numbered_tanks.join(
    null_tanks, on=["property_id", "tank_type", "tank_number"], how="left"
)

final_lf = joined_lf.join(lf, on=["source_key"], how="left")
final_lf = final_lf.group_by("property_id", "tank_type", "tank_number").agg(
    pl.all().first()
)

final_lf = final_lf.with_columns(
    pl.col("property_id"),
    pl.col("tank_type"),
    pl.col("tank_number"),
    pl.coalesce(pl.col("Level"), pl.col("Level_right")).alias("level"),
    pl.coalesce(pl.col("Volume"), pl.col("Volume_right")).alias("volume"),
    pl.coalesce(pl.col("InchesToESD"), pl.col("InchesUntilAlarm_right")).alias(
        "inches_to_esd"
    ),
    pl.coalesce(pl.col("TimeUntilESD"), pl.col("TimeUntilESD_right")).alias(
        "time_until_esd"
    ),
    pl.coalesce(pl.col("Capacity"), pl.col("tanksize")).alias("capacity"),
    pl.coalesce(pl.col("unique_id")).alias("unique_id")
)

required_columns = [
    "unique_id",
    "property_id",
    "source_key",
    "tank_type",
    "tank_number",
    "level",
    "volume",
    "inches_to_esd",
    "time_until_esd",
    "capacity",
]
final_lf = final_lf.select(required_columns)

final_lf = final_lf.sort("property_id", "tank_type", "tank_number")

percent_tank_full = (
    (pl.col("volume") / pl.col("capacity") * 100).round().cast(pl.UInt8)
)
final_lf = final_lf.with_columns(percent_tank_full.alias("percent_full"))

capacity_rounded = pl.col("capacity").round()
final_lf = final_lf.with_columns(capacity_rounded.alias("capacity"))

volume_to_feet = pl.col("volume").round().cast(pl.UInt64)
final_lf = final_lf.with_columns(volume_to_feet.alias("volume"))

result = final_lf.collect()
result

unique_id,property_id,source_key,tank_type,tank_number,level,volume,inches_to_esd,time_until_esd,capacity,percent_full
object,str,str,str,u8,f64,u64,f64,f64,f64,u8
4ac0b7d9-55fe-4865-bc29-6c202ff6349e,"""69419""","""6941901""","""Oil""",1,62.159021,173,,,757.0,23
6467561f-e9ac-4ba0-ac6b-1f1584ee029f,"""69419""","""6941902""","""Oil""",2,88.524576,246,25.078137,,732.0,34
a5de3be7-73fe-489b-b861-2ab88c3b0029,"""69419""","""6941903""","""Oil""",3,44.370742,123,,,822.0,15
0c7822d0-97c0-4b75-b3f3-b8b17c2dabd5,"""69419""","""6941904""","""Oil""",4,139.593031,388,,,464.0,84
0f7f1002-a4d6-4707-a01b-c9c6bf440f34,"""69419""","""69419W1""","""Water""",1,49.260341,137,,,479.0,29
79a95737-e857-45fa-b887-23f4a9e85821,"""69419""","""69419W2""","""Water""",2,51.414077,143,284.342497,,870.0,16
322f605e-bf6d-4299-8c93-e58883ac65e8,"""69419""","""69419W3""","""Water""",3,136.960391,380,,,534.0,71


In [3]:
lf = df.lazy()

tank_metrics = ["Level", "Volume", "InchesUntilAlarm", "InchesToESD", "TimeUntilESD", "Capacity", "ID"]
tank_types = ["Water", "Oil"]

tank_metrics_str= "|".join(tank_metrics)
tank_types_str = "|".join(tank_types)

pattern = f'^(?<is_esd>ESD-)?(?<tank_type>{tank_types_str})Tank(?<tank_number>[0-9]*)(?<tank_metric>{tank_metrics_str})'
lf = lf.with_columns(separated_metrics = pl.col("tank_name").str.extract_groups(pattern))

lf = lf.unnest("separated_metrics")

lf = lf.with_columns(pl.col("tank_number").cast(pl.UInt8, strict=False))

values = pl.col("value")
columns = pl.col("tank_metric")
pivoted_lf = lf.group_by("property_id", "tank_type", "tank_number", "source_key", "unique_id").agg(
    values.filter(columns == metric).first().alias(metric) for metric in tank_metrics
)

null_condition = pl.col("tank_number").is_null()
not_null_condition = pl.col("tank_number").is_not_null()

null_tanks = pivoted_lf.filter(null_condition)
numbered_tanks = pivoted_lf.filter(not_null_condition)

null_tanks = null_tanks.with_columns(pl.col("ID").alias("tank_number"))
null_tanks = null_tanks.drop("ID")
null_tanks = null_tanks.with_columns(pl.col("tank_number").cast(pl.UInt8, strict=False))

numbered_tanks = numbered_tanks.with_columns(pl.col("unique_id").alias("identifier"))
numbered_tanks = numbered_tanks.drop("unique_id")

null_tanks_merged = null_tanks.group_by(["property_id", "tank_type", "source_key"]).agg(
    [
        pl.col("tank_number").max(),  # Use max or min to fill missing values
        pl.col("unique_id").last(),  # Use first() or last() for string-like columns
        pl.col("Level").max(),
        pl.col("Volume").max(),
        pl.col("InchesUntilAlarm").max(),
        pl.col("InchesToESD").max(),
        pl.col("TimeUntilESD").max(),
        pl.col("Capacity").max(),
    ]
)

joined_lf = numbered_tanks.join(null_tanks_merged, on=["property_id", "tank_type", "tank_number"], how="left")

final_lf = joined_lf.join(lf, on=["source_key"], how="left")

final_lf = final_lf.with_columns(
    pl.col("unique_id").alias("identifier"),
    pl.col("property_id"),
    pl.col("tank_type"),
    pl.col("tank_number"),
    pl.coalesce(pl.col("Level"), pl.col("Level_right")).alias("level"),
    pl.coalesce(pl.col("Volume"), pl.col("Volume_right")).alias("volume"),
    pl.coalesce(pl.col("InchesToESD"), pl.col("InchesUntilAlarm_right")).alias("inches_to_esd"),
    pl.coalesce(pl.col("TimeUntilESD"), pl.col("TimeUntilESD_right")).alias("time_until_esd"),
    pl.coalesce(pl.col("Capacity"), pl.col("tanksize")).alias("capacity")
)

required_columns = ["identifier", "property_id", "source_key", "tank_type", "tank_number", "level", "volume", "inches_to_esd", "time_until_esd", "capacity"]
final_lf = final_lf.select(required_columns)

final_lf = final_lf.sort("property_id", "tank_type", "tank_number")

percent_tank_full = (pl.col("volume") / pl.col("capacity") * 100).round().cast(pl.UInt8)
final_lf = final_lf.with_columns(percent_tank_full.alias("percent_full"))

capacity_rounded = pl.col("capacity").round()
final_lf = final_lf.with_columns(capacity_rounded.alias("capacity"))

volume_to_feet = pl.col("volume").round().cast(pl.UInt64)

final_lf = final_lf.with_columns(volume_to_feet.alias("volume"))

result = final_lf.group_by(["property_id", "source_key", "tank_type", "tank_number"]).agg(
    [
        pl.col("identifier").first(),  # Use first() or last() for non-numeric columns
        pl.col("level").max(),  # Use max() to get the highest value (fill missing)
        pl.col("volume").max(),
        pl.col("inches_to_esd").max(),
        pl.col("time_until_esd").max(),
        pl.col("capacity").max(),
        pl.col("percent_full").max(),
    ]
)

result = result.sort("property_id", "tank_type", "tank_number")
result = result.select(["identifier", "property_id", "source_key", "tank_type", "tank_number", "level", "volume", "inches_to_esd", "time_until_esd", "capacity", "percent_full"])
result = result.with_columns(pl.col("identifier").map_elements(lambda x: str(x) if x is not None else None, return_dtype=pl.Utf8).alias("identifier"))
result.collect()


identifier,property_id,source_key,tank_type,tank_number,level,volume,inches_to_esd,time_until_esd,capacity,percent_full
str,str,str,str,u8,f64,u64,f64,f64,f64,u8
,"""98840""","""9884001""","""Oil""",1,79.02198,299,,,554.0,54
,"""98840""","""9884002""","""Oil""",2,77.590071,294,,,736.0,40
,"""98840""","""9884003""","""Oil""",3,77.392763,284,,,624.0,46
"""f3c718bf-58e8-488e-9445-99fae3…","""98840""","""9884004""","""Oil""",4,75.002393,268,102.478181,,418.0,64
,"""98840""","""9884005""","""Oil""",5,74.240695,300,,,790.0,38
,"""98840""","""9884006""","""Oil""",6,82.279163,310,,,400.0,77
,"""98840""","""98840W1""","""Water""",1,79.941493,268,,,697.0,38
"""c4abef08-5ca3-43b2-a9a4-fdf5aa…","""98840""","""98840W2""","""Water""",2,76.456344,357,257.847738,,653.0,55


In [9]:
lf = df.lazy()


In [10]:
tank_metrics = ["Level", "Volume", "InchesUntilAlarm", "InchesToESD", "TimeUntilESD", "Capacity", "ID"]
tank_types = ["Water", "Oil"]

tank_metrics_str= "|".join(tank_metrics)
tank_types_str = "|".join(tank_types)

pattern = f'^(?<is_esd>ESD-)?(?<tank_type>{tank_types_str})Tank(?<tank_number>[0-9]*)(?<tank_metric>{tank_metrics_str})'
lf = lf.with_columns(separated_metrics = pl.col("metric_nice_name").str.extract_groups(pattern))
lf.collect()

property_id,scada_id,metric_nice_name,unique_id,uom,timestamp,value,tanksize,separated_metrics
str,str,str,object,str,datetime[μs],f64,f64,struct[4]
"""98840""","""9884005""","""OilTank5Level""",0d96d173-3c6a-469f-901b-5cea8aa4210e,"""in""",2015-05-01 05:05:54.083239,74.240695,790.375905,"{null,""Oil"",""5"",""Level""}"
"""98840""","""98840W2""","""WaterTank2Volume""",106e51eb-4faa-4d5c-b43e-210c92016f98,"""bbl""",2023-08-16 19:52:13.787936,357.148125,653.135767,"{null,""Water"",""2"",""Volume""}"
"""98840""","""9884003""","""OilTank3Volume""",1158ad41-4c78-4d2f-83ec-2ff717c2d445,"""bbl""",2024-02-11 04:10:55.839466,284.213132,624.069952,"{null,""Oil"",""3"",""Volume""}"
"""98840""","""98840FAC""","""ESD-OilTankID""",2748bdd4-4b4b-4add-aa16-251efbb55639,"""""",2024-07-05 18:19:24.387492,4.0,,"{""ESD-"",""Oil"","""",""ID""}"
"""98840""","""9884004""","""OilTank4Level""",29576772-cde6-4767-a478-c7337a1c8c39,"""in""",2021-06-18 15:33:10.557032,75.002393,417.5281,"{null,""Oil"",""4"",""Level""}"
…,…,…,…,…,…,…,…,…
"""98840""","""98840W2""","""WaterTank2Level""",b5a48faa-1f40-4887-a0e9-7a84a3a83366,"""in""",2019-04-05 01:41:07.809116,76.456344,653.135767,"{null,""Water"",""2"",""Level""}"
"""98840""","""98840FAC""","""ESD-WaterTankInchesUntilAlarm""",c4abef08-5ca3-43b2-a9a4-fdf5aa6210df,"""in""",2024-08-16 04:45:27.763083,257.847738,,"{""ESD-"",""Water"","""",""InchesUntilAlarm""}"
"""98840""","""9884005""","""OilTank5Volume""",db762e5e-9c0c-4f77-864b-df5e66865f2e,"""bbl""",2023-03-17 20:34:17.444289,300.447955,790.375905,"{null,""Oil"",""5"",""Volume""}"
"""98840""","""98840FAC""","""ESD-OilTankInchesUntilAlarm""",f3c718bf-58e8-488e-9445-99fae307d251,"""in""",2024-08-22 10:59:43.894659,102.478181,,"{""ESD-"",""Oil"","""",""InchesUntilAlarm""}"


In [11]:
lf = lf.unnest("separated_metrics")
lf.collect()

property_id,scada_id,metric_nice_name,unique_id,uom,timestamp,value,tanksize,is_esd,tank_type,tank_number,tank_metric
str,str,str,object,str,datetime[μs],f64,f64,str,str,str,str
"""98840""","""9884005""","""OilTank5Level""",0d96d173-3c6a-469f-901b-5cea8aa4210e,"""in""",2015-05-01 05:05:54.083239,74.240695,790.375905,,"""Oil""","""5""","""Level"""
"""98840""","""98840W2""","""WaterTank2Volume""",106e51eb-4faa-4d5c-b43e-210c92016f98,"""bbl""",2023-08-16 19:52:13.787936,357.148125,653.135767,,"""Water""","""2""","""Volume"""
"""98840""","""9884003""","""OilTank3Volume""",1158ad41-4c78-4d2f-83ec-2ff717c2d445,"""bbl""",2024-02-11 04:10:55.839466,284.213132,624.069952,,"""Oil""","""3""","""Volume"""
"""98840""","""98840FAC""","""ESD-OilTankID""",2748bdd4-4b4b-4add-aa16-251efbb55639,"""""",2024-07-05 18:19:24.387492,4.0,,"""ESD-""","""Oil""","""""","""ID"""
"""98840""","""9884004""","""OilTank4Level""",29576772-cde6-4767-a478-c7337a1c8c39,"""in""",2021-06-18 15:33:10.557032,75.002393,417.5281,,"""Oil""","""4""","""Level"""
…,…,…,…,…,…,…,…,…,…,…,…
"""98840""","""98840W2""","""WaterTank2Level""",b5a48faa-1f40-4887-a0e9-7a84a3a83366,"""in""",2019-04-05 01:41:07.809116,76.456344,653.135767,,"""Water""","""2""","""Level"""
"""98840""","""98840FAC""","""ESD-WaterTankInchesUntilAlarm""",c4abef08-5ca3-43b2-a9a4-fdf5aa6210df,"""in""",2024-08-16 04:45:27.763083,257.847738,,"""ESD-""","""Water""","""""","""InchesUntilAlarm"""
"""98840""","""9884005""","""OilTank5Volume""",db762e5e-9c0c-4f77-864b-df5e66865f2e,"""bbl""",2023-03-17 20:34:17.444289,300.447955,790.375905,,"""Oil""","""5""","""Volume"""
"""98840""","""98840FAC""","""ESD-OilTankInchesUntilAlarm""",f3c718bf-58e8-488e-9445-99fae307d251,"""in""",2024-08-22 10:59:43.894659,102.478181,,"""ESD-""","""Oil""","""""","""InchesUntilAlarm"""


In [12]:
lf = lf.with_columns(pl.col("tank_number").cast(pl.UInt8, strict=False))
lf.collect()

property_id,scada_id,metric_nice_name,unique_id,uom,timestamp,value,tanksize,is_esd,tank_type,tank_number,tank_metric
str,str,str,object,str,datetime[μs],f64,f64,str,str,u8,str
"""98840""","""9884005""","""OilTank5Level""",0d96d173-3c6a-469f-901b-5cea8aa4210e,"""in""",2015-05-01 05:05:54.083239,74.240695,790.375905,,"""Oil""",5,"""Level"""
"""98840""","""98840W2""","""WaterTank2Volume""",106e51eb-4faa-4d5c-b43e-210c92016f98,"""bbl""",2023-08-16 19:52:13.787936,357.148125,653.135767,,"""Water""",2,"""Volume"""
"""98840""","""9884003""","""OilTank3Volume""",1158ad41-4c78-4d2f-83ec-2ff717c2d445,"""bbl""",2024-02-11 04:10:55.839466,284.213132,624.069952,,"""Oil""",3,"""Volume"""
"""98840""","""98840FAC""","""ESD-OilTankID""",2748bdd4-4b4b-4add-aa16-251efbb55639,"""""",2024-07-05 18:19:24.387492,4.0,,"""ESD-""","""Oil""",,"""ID"""
"""98840""","""9884004""","""OilTank4Level""",29576772-cde6-4767-a478-c7337a1c8c39,"""in""",2021-06-18 15:33:10.557032,75.002393,417.5281,,"""Oil""",4,"""Level"""
…,…,…,…,…,…,…,…,…,…,…,…
"""98840""","""98840W2""","""WaterTank2Level""",b5a48faa-1f40-4887-a0e9-7a84a3a83366,"""in""",2019-04-05 01:41:07.809116,76.456344,653.135767,,"""Water""",2,"""Level"""
"""98840""","""98840FAC""","""ESD-WaterTankInchesUntilAlarm""",c4abef08-5ca3-43b2-a9a4-fdf5aa6210df,"""in""",2024-08-16 04:45:27.763083,257.847738,,"""ESD-""","""Water""",,"""InchesUntilAlarm"""
"""98840""","""9884005""","""OilTank5Volume""",db762e5e-9c0c-4f77-864b-df5e66865f2e,"""bbl""",2023-03-17 20:34:17.444289,300.447955,790.375905,,"""Oil""",5,"""Volume"""
"""98840""","""98840FAC""","""ESD-OilTankInchesUntilAlarm""",f3c718bf-58e8-488e-9445-99fae307d251,"""in""",2024-08-22 10:59:43.894659,102.478181,,"""ESD-""","""Oil""",,"""InchesUntilAlarm"""


In [13]:
#pivoting the data
values = pl.col("value")
columns = pl.col("tank_metric")
pivoted_lf = lf.group_by("property_id", "tank_type", "tank_number", "scada_id", "unique_id").agg(
    values.filter(columns == metric).first().alias(metric) for metric in tank_metrics
)

pivoted_lf.collect()

property_id,tank_type,tank_number,scada_id,unique_id,Level,Volume,InchesUntilAlarm,InchesToESD,TimeUntilESD,Capacity,ID
str,str,u8,str,object,f64,f64,f64,f64,f64,f64,f64
"""98840""","""Oil""",5,"""9884005""",0d96d173-3c6a-469f-901b-5cea8aa4210e,74.240695,,,,,,
"""98840""","""Oil""",5,"""9884005""",db762e5e-9c0c-4f77-864b-df5e66865f2e,,300.447955,,,,,
"""98840""","""Oil""",2,"""9884002""",686fc368-3d98-4608-aef0-5af0eb416e8c,77.590071,,,,,,
"""98840""","""Oil""",2,"""9884002""",fa6552b3-1dce-4452-a35b-b5fb35868a02,,293.977423,,,,,
"""98840""","""Oil""",3,"""9884003""",1158ad41-4c78-4d2f-83ec-2ff717c2d445,,284.213132,,,,,
…,…,…,…,…,…,…,…,…,…,…,…
"""98840""","""Water""",,"""98840FAC""",c4abef08-5ca3-43b2-a9a4-fdf5aa6210df,,,257.847738,,,,
"""98840""","""Oil""",1,"""9884001""",8748b483-ee0b-460d-bc01-d1e37319d283,,299.173063,,,,,
"""98840""","""Oil""",1,"""9884001""",ad86132f-da25-46d0-a4d2-6ad6f29be58f,79.02198,,,,,,
"""98840""","""Oil""",,"""98840FAC""",2748bdd4-4b4b-4add-aa16-251efbb55639,,,,,,,4.0


In [14]:
null_condition = pl.col("tank_number").is_null()
not_null_condition = pl.col("tank_number").is_not_null()

null_tanks = pivoted_lf.filter(null_condition)
numbered_tanks = pivoted_lf.filter(not_null_condition)

null_tanks = null_tanks.with_columns(pl.col("ID").alias("tank_number"))
null_tanks = null_tanks.drop("ID")
null_tanks = null_tanks.with_columns(pl.col("tank_number").cast(pl.UInt8, strict=False))

null_tanks.collect()

property_id,tank_type,tank_number,scada_id,unique_id,Level,Volume,InchesUntilAlarm,InchesToESD,TimeUntilESD,Capacity
str,str,u8,str,object,f64,f64,f64,f64,f64,f64
"""98840""","""Oil""",4.0,"""98840FAC""",2748bdd4-4b4b-4add-aa16-251efbb55639,,,,,,
"""98840""","""Oil""",,"""98840FAC""",f3c718bf-58e8-488e-9445-99fae307d251,,,102.478181,,,
"""98840""","""Water""",2.0,"""98840FAC""",32771b87-74d0-475a-b213-9351f5b5b907,,,,,,
"""98840""","""Water""",,"""98840FAC""",c4abef08-5ca3-43b2-a9a4-fdf5aa6210df,,,257.847738,,,


In [21]:
numbered_tanks.collect()

property_id,tank_type,tank_number,scada_id,unique_id,Level,Volume,InchesUntilAlarm,InchesToESD,TimeUntilESD,Capacity,ID
str,str,u8,str,object,f64,f64,f64,f64,f64,f64,f64
"""98840""","""Oil""",6,"""9884006""",63176073-1766-4d40-8a15-72822e6c7220,82.279163,,,,,,
"""98840""","""Oil""",6,"""9884006""",a47c894d-19fc-4a95-9f7c-18e743de30c2,,309.826949,,,,,
"""98840""","""Oil""",4,"""9884004""",29576772-cde6-4767-a478-c7337a1c8c39,75.002393,,,,,,
"""98840""","""Oil""",4,"""9884004""",376c3e87-6b87-4fc0-910f-f952e11d663b,,268.335022,,,,,
"""98840""","""Oil""",2,"""9884002""",686fc368-3d98-4608-aef0-5af0eb416e8c,77.590071,,,,,,
…,…,…,…,…,…,…,…,…,…,…,…
"""98840""","""Water""",2,"""98840W2""",b5a48faa-1f40-4887-a0e9-7a84a3a83366,76.456344,,,,,,
"""98840""","""Water""",1,"""98840W1""",2cd89218-f881-41d7-a97a-7d06deacb5f5,,267.90299,,,,,
"""98840""","""Water""",1,"""98840W1""",b40ee794-21e3-4424-b0c5-b7fa205eb2f8,79.941493,,,,,,
"""98840""","""Oil""",1,"""9884001""",8748b483-ee0b-460d-bc01-d1e37319d283,,299.173063,,,,,


In [23]:
numbered_tanks = numbered_tanks.with_columns(pl.col("unique_id").alias("identifier"))
numbered_tanks = numbered_tanks.drop("unique_id")
numbered_tanks.collect()

property_id,tank_type,tank_number,scada_id,Level,Volume,InchesUntilAlarm,InchesToESD,TimeUntilESD,Capacity,ID,identifier
str,str,u8,str,f64,f64,f64,f64,f64,f64,f64,object
"""98840""","""Water""",1,"""98840W1""",,267.90299,,,,,,2cd89218-f881-41d7-a97a-7d06deacb5f5
"""98840""","""Water""",1,"""98840W1""",79.941493,,,,,,,b40ee794-21e3-4424-b0c5-b7fa205eb2f8
"""98840""","""Water""",2,"""98840W2""",,357.148125,,,,,,106e51eb-4faa-4d5c-b43e-210c92016f98
"""98840""","""Water""",2,"""98840W2""",76.456344,,,,,,,b5a48faa-1f40-4887-a0e9-7a84a3a83366
"""98840""","""Oil""",4,"""9884004""",75.002393,,,,,,,29576772-cde6-4767-a478-c7337a1c8c39
…,…,…,…,…,…,…,…,…,…,…,…
"""98840""","""Oil""",5,"""9884005""",,300.447955,,,,,,db762e5e-9c0c-4f77-864b-df5e66865f2e
"""98840""","""Oil""",3,"""9884003""",,284.213132,,,,,,1158ad41-4c78-4d2f-83ec-2ff717c2d445
"""98840""","""Oil""",3,"""9884003""",77.392763,,,,,,,9db75bdf-afaf-4a54-9715-2e7fad8d2d85
"""98840""","""Oil""",6,"""9884006""",82.279163,,,,,,,63176073-1766-4d40-8a15-72822e6c7220


In [25]:
pp = null_tanks.group_by(["property_id", "tank_type", "scada_id"]).agg(
    [
        pl.col("tank_number").max(),  # Use max or min to fill missing values
        pl.col("unique_id").last(),  # Use first() or last() for string-like columns
        pl.col("Level").max(),
        pl.col("Volume").max(),
        pl.col("InchesUntilAlarm").max(),
        pl.col("InchesToESD").max(),
        pl.col("TimeUntilESD").max(),
        pl.col("Capacity").max(),
    ]
)

pp.collect()

property_id,tank_type,scada_id,tank_number,unique_id,Level,Volume,InchesUntilAlarm,InchesToESD,TimeUntilESD,Capacity
str,str,str,u8,object,f64,f64,f64,f64,f64,f64
"""98840""","""Water""","""98840FAC""",2,c4abef08-5ca3-43b2-a9a4-fdf5aa6210df,,,257.847738,,,
"""98840""","""Oil""","""98840FAC""",4,f3c718bf-58e8-488e-9445-99fae307d251,,,102.478181,,,


In [26]:
joined_lf = numbered_tanks.join(pp, on=["property_id", "tank_type", "tank_number"], how="left")
joined_lf.collect()

property_id,tank_type,tank_number,scada_id,Level,Volume,InchesUntilAlarm,InchesToESD,TimeUntilESD,Capacity,ID,identifier,scada_id_right,unique_id,Level_right,Volume_right,InchesUntilAlarm_right,InchesToESD_right,TimeUntilESD_right,Capacity_right
str,str,u8,str,f64,f64,f64,f64,f64,f64,f64,object,str,object,f64,f64,f64,f64,f64,f64
"""98840""","""Oil""",6,"""9884006""",82.279163,,,,,,,63176073-1766-4d40-8a15-72822e6c7220,,,,,,,,
"""98840""","""Oil""",6,"""9884006""",,309.826949,,,,,,a47c894d-19fc-4a95-9f7c-18e743de30c2,,,,,,,,
"""98840""","""Oil""",2,"""9884002""",77.590071,,,,,,,686fc368-3d98-4608-aef0-5af0eb416e8c,,,,,,,,
"""98840""","""Oil""",2,"""9884002""",,293.977423,,,,,,fa6552b3-1dce-4452-a35b-b5fb35868a02,,,,,,,,
"""98840""","""Oil""",1,"""9884001""",,299.173063,,,,,,8748b483-ee0b-460d-bc01-d1e37319d283,,,,,,,,
…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…
"""98840""","""Water""",1,"""98840W1""",79.941493,,,,,,,b40ee794-21e3-4424-b0c5-b7fa205eb2f8,,,,,,,,
"""98840""","""Oil""",3,"""9884003""",,284.213132,,,,,,1158ad41-4c78-4d2f-83ec-2ff717c2d445,,,,,,,,
"""98840""","""Oil""",3,"""9884003""",77.392763,,,,,,,9db75bdf-afaf-4a54-9715-2e7fad8d2d85,,,,,,,,
"""98840""","""Water""",2,"""98840W2""",,357.148125,,,,,,106e51eb-4faa-4d5c-b43e-210c92016f98,"""98840FAC""",c4abef08-5ca3-43b2-a9a4-fdf5aa6210df,,,257.847738,,,


In [30]:
final_lf = joined_lf.join(lf, on=["scada_id"], how="left")
final_lf.collect()

property_id,tank_type,tank_number,scada_id,Level,Volume,InchesUntilAlarm,InchesToESD,TimeUntilESD,Capacity,ID,identifier,scada_id_right,unique_id,Level_right,Volume_right,InchesUntilAlarm_right,InchesToESD_right,TimeUntilESD_right,Capacity_right,property_id_right,metric_nice_name,unique_id_right,uom,timestamp,value,tanksize,is_esd,tank_type_right,tank_number_right,tank_metric
str,str,u8,str,f64,f64,f64,f64,f64,f64,f64,object,str,object,f64,f64,f64,f64,f64,f64,str,str,object,str,datetime[μs],f64,f64,str,str,u8,str
"""98840""","""Oil""",3,"""9884003""",,284.213132,,,,,,1158ad41-4c78-4d2f-83ec-2ff717c2d445,,,,,,,,,"""98840""","""OilTank3Volume""",1158ad41-4c78-4d2f-83ec-2ff717c2d445,"""bbl""",2024-02-11 04:10:55.839466,284.213132,624.069952,,"""Oil""",3,"""Volume"""
"""98840""","""Oil""",3,"""9884003""",,284.213132,,,,,,1158ad41-4c78-4d2f-83ec-2ff717c2d445,,,,,,,,,"""98840""","""OilTank3Level""",9db75bdf-afaf-4a54-9715-2e7fad8d2d85,"""in""",2015-11-29 12:01:20.722028,77.392763,624.069952,,"""Oil""",3,"""Level"""
"""98840""","""Oil""",3,"""9884003""",77.392763,,,,,,,9db75bdf-afaf-4a54-9715-2e7fad8d2d85,,,,,,,,,"""98840""","""OilTank3Volume""",1158ad41-4c78-4d2f-83ec-2ff717c2d445,"""bbl""",2024-02-11 04:10:55.839466,284.213132,624.069952,,"""Oil""",3,"""Volume"""
"""98840""","""Oil""",3,"""9884003""",77.392763,,,,,,,9db75bdf-afaf-4a54-9715-2e7fad8d2d85,,,,,,,,,"""98840""","""OilTank3Level""",9db75bdf-afaf-4a54-9715-2e7fad8d2d85,"""in""",2015-11-29 12:01:20.722028,77.392763,624.069952,,"""Oil""",3,"""Level"""
"""98840""","""Oil""",1,"""9884001""",,299.173063,,,,,,8748b483-ee0b-460d-bc01-d1e37319d283,,,,,,,,,"""98840""","""OilTank1Volume""",8748b483-ee0b-460d-bc01-d1e37319d283,"""bbl""",2018-01-11 09:12:20.082822,299.173063,554.086643,,"""Oil""",1,"""Volume"""
…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…
"""98840""","""Oil""",5,"""9884005""",,300.447955,,,,,,db762e5e-9c0c-4f77-864b-df5e66865f2e,,,,,,,,,"""98840""","""OilTank5Volume""",db762e5e-9c0c-4f77-864b-df5e66865f2e,"""bbl""",2023-03-17 20:34:17.444289,300.447955,790.375905,,"""Oil""",5,"""Volume"""
"""98840""","""Oil""",4,"""9884004""",75.002393,,,,,,,29576772-cde6-4767-a478-c7337a1c8c39,"""98840FAC""",f3c718bf-58e8-488e-9445-99fae307d251,,,102.478181,,,,"""98840""","""OilTank4Level""",29576772-cde6-4767-a478-c7337a1c8c39,"""in""",2021-06-18 15:33:10.557032,75.002393,417.5281,,"""Oil""",4,"""Level"""
"""98840""","""Oil""",4,"""9884004""",75.002393,,,,,,,29576772-cde6-4767-a478-c7337a1c8c39,"""98840FAC""",f3c718bf-58e8-488e-9445-99fae307d251,,,102.478181,,,,"""98840""","""OilTank4Volume""",376c3e87-6b87-4fc0-910f-f952e11d663b,"""bbl""",2016-07-11 16:10:15.036230,268.335022,417.5281,,"""Oil""",4,"""Volume"""
"""98840""","""Oil""",4,"""9884004""",,268.335022,,,,,,376c3e87-6b87-4fc0-910f-f952e11d663b,"""98840FAC""",f3c718bf-58e8-488e-9445-99fae307d251,,,102.478181,,,,"""98840""","""OilTank4Level""",29576772-cde6-4767-a478-c7337a1c8c39,"""in""",2021-06-18 15:33:10.557032,75.002393,417.5281,,"""Oil""",4,"""Level"""


In [31]:
final_lf = final_lf.with_columns(
    pl.col("unique_id").alias("identifier"),
    pl.col("property_id"),
    pl.col("tank_type"),
    pl.col("tank_number"),
    pl.coalesce(pl.col("Level"), pl.col("Level_right")).alias("level"),
    pl.coalesce(pl.col("Volume"), pl.col("Volume_right")).alias("volume"),
    pl.coalesce(pl.col("InchesToESD"), pl.col("InchesUntilAlarm_right")).alias("inches_to_esd"),
    pl.coalesce(pl.col("TimeUntilESD"), pl.col("TimeUntilESD_right")).alias("time_until_esd"),
    pl.coalesce(pl.col("Capacity"), pl.col("tanksize")).alias("capacity")
)

required_columns = ["identifier", "property_id", "scada_id", "tank_type", "tank_number", "level", "volume", "inches_to_esd", "time_until_esd", "capacity"]
final_lf = final_lf.select(required_columns)
final_lf.collect()

identifier,property_id,scada_id,tank_type,tank_number,level,volume,inches_to_esd,time_until_esd,capacity
object,str,str,str,u8,f64,f64,f64,f64,f64
f3c718bf-58e8-488e-9445-99fae307d251,"""98840""","""9884004""","""Oil""",4,75.002393,,102.478181,,417.5281
f3c718bf-58e8-488e-9445-99fae307d251,"""98840""","""9884004""","""Oil""",4,75.002393,,102.478181,,417.5281
f3c718bf-58e8-488e-9445-99fae307d251,"""98840""","""9884004""","""Oil""",4,,268.335022,102.478181,,417.5281
f3c718bf-58e8-488e-9445-99fae307d251,"""98840""","""9884004""","""Oil""",4,,268.335022,102.478181,,417.5281
,"""98840""","""9884006""","""Oil""",6,82.279163,,,,400.362155
…,…,…,…,…,…,…,…,…,…
,"""98840""","""9884005""","""Oil""",5,,300.447955,,,790.375905
,"""98840""","""9884001""","""Oil""",1,,299.173063,,,554.086643
,"""98840""","""9884001""","""Oil""",1,,299.173063,,,554.086643
,"""98840""","""9884001""","""Oil""",1,79.02198,,,,554.086643


In [32]:
final_lf = final_lf.sort("property_id", "tank_type", "tank_number")
final_lf.collect()

identifier,property_id,scada_id,tank_type,tank_number,level,volume,inches_to_esd,time_until_esd,capacity
object,str,str,str,u8,f64,f64,f64,f64,f64
,"""98840""","""9884001""","""Oil""",1,79.02198,,,,554.086643
,"""98840""","""9884001""","""Oil""",1,79.02198,,,,554.086643
,"""98840""","""9884001""","""Oil""",1,,299.173063,,,554.086643
,"""98840""","""9884001""","""Oil""",1,,299.173063,,,554.086643
,"""98840""","""9884002""","""Oil""",2,,293.977423,,,735.848157
…,…,…,…,…,…,…,…,…,…
,"""98840""","""98840W1""","""Water""",1,,267.90299,,,696.762014
c4abef08-5ca3-43b2-a9a4-fdf5aa6210df,"""98840""","""98840W2""","""Water""",2,,357.148125,257.847738,,653.135767
c4abef08-5ca3-43b2-a9a4-fdf5aa6210df,"""98840""","""98840W2""","""Water""",2,,357.148125,257.847738,,653.135767
c4abef08-5ca3-43b2-a9a4-fdf5aa6210df,"""98840""","""98840W2""","""Water""",2,76.456344,,257.847738,,653.135767


In [33]:
percent_tank_full = (pl.col("volume") / pl.col("capacity") * 100).round().cast(pl.UInt8)
final_lf = final_lf.with_columns(percent_tank_full.alias("percent_full"))

final_lf.collect()

identifier,property_id,scada_id,tank_type,tank_number,level,volume,inches_to_esd,time_until_esd,capacity,percent_full
object,str,str,str,u8,f64,f64,f64,f64,f64,u8
,"""98840""","""9884001""","""Oil""",1,79.02198,,,,554.086643,
,"""98840""","""9884001""","""Oil""",1,79.02198,,,,554.086643,
,"""98840""","""9884001""","""Oil""",1,,299.173063,,,554.086643,54
,"""98840""","""9884001""","""Oil""",1,,299.173063,,,554.086643,54
,"""98840""","""9884002""","""Oil""",2,77.590071,,,,735.848157,
…,…,…,…,…,…,…,…,…,…,…
,"""98840""","""98840W1""","""Water""",1,,267.90299,,,696.762014,38
c4abef08-5ca3-43b2-a9a4-fdf5aa6210df,"""98840""","""98840W2""","""Water""",2,,357.148125,257.847738,,653.135767,55
c4abef08-5ca3-43b2-a9a4-fdf5aa6210df,"""98840""","""98840W2""","""Water""",2,,357.148125,257.847738,,653.135767,55
c4abef08-5ca3-43b2-a9a4-fdf5aa6210df,"""98840""","""98840W2""","""Water""",2,76.456344,,257.847738,,653.135767,


In [34]:
capacity_rounded = pl.col("capacity").round()
final_lf = final_lf.with_columns(capacity_rounded.alias("capacity"))
final_lf.collect()

identifier,property_id,scada_id,tank_type,tank_number,level,volume,inches_to_esd,time_until_esd,capacity,percent_full
object,str,str,str,u8,f64,f64,f64,f64,f64,u8
,"""98840""","""9884001""","""Oil""",1,79.02198,,,,554.0,
,"""98840""","""9884001""","""Oil""",1,79.02198,,,,554.0,
,"""98840""","""9884001""","""Oil""",1,,299.173063,,,554.0,54
,"""98840""","""9884001""","""Oil""",1,,299.173063,,,554.0,54
,"""98840""","""9884002""","""Oil""",2,77.590071,,,,736.0,
…,…,…,…,…,…,…,…,…,…,…
,"""98840""","""98840W1""","""Water""",1,79.941493,,,,697.0,
c4abef08-5ca3-43b2-a9a4-fdf5aa6210df,"""98840""","""98840W2""","""Water""",2,,357.148125,257.847738,,653.0,55
c4abef08-5ca3-43b2-a9a4-fdf5aa6210df,"""98840""","""98840W2""","""Water""",2,76.456344,,257.847738,,653.0,
c4abef08-5ca3-43b2-a9a4-fdf5aa6210df,"""98840""","""98840W2""","""Water""",2,76.456344,,257.847738,,653.0,


In [35]:
volume_to_feet = pl.col("volume").round().cast(pl.UInt64)

final_lf = final_lf.with_columns(volume_to_feet.alias("volume"))

final_lf.collect()

identifier,property_id,scada_id,tank_type,tank_number,level,volume,inches_to_esd,time_until_esd,capacity,percent_full
object,str,str,str,u8,f64,u64,f64,f64,f64,u8
,"""98840""","""9884001""","""Oil""",1,,299,,,554.0,54
,"""98840""","""9884001""","""Oil""",1,,299,,,554.0,54
,"""98840""","""9884001""","""Oil""",1,79.02198,,,,554.0,
,"""98840""","""9884001""","""Oil""",1,79.02198,,,,554.0,
,"""98840""","""9884002""","""Oil""",2,77.590071,,,,736.0,
…,…,…,…,…,…,…,…,…,…,…
,"""98840""","""98840W1""","""Water""",1,,268,,,697.0,38
c4abef08-5ca3-43b2-a9a4-fdf5aa6210df,"""98840""","""98840W2""","""Water""",2,,357,257.847738,,653.0,55
c4abef08-5ca3-43b2-a9a4-fdf5aa6210df,"""98840""","""98840W2""","""Water""",2,,357,257.847738,,653.0,55
c4abef08-5ca3-43b2-a9a4-fdf5aa6210df,"""98840""","""98840W2""","""Water""",2,76.456344,,257.847738,,653.0,


In [39]:
new_pp = final_lf.group_by(["property_id", "scada_id", "tank_type", "tank_number"]).agg(
    [
        pl.col("identifier").first(),  # Use first() or last() for non-numeric columns
        pl.col("level").max(),  # Use max() to get the highest value (fill missing)
        pl.col("volume").max(),
        pl.col("inches_to_esd").max(),
        pl.col("time_until_esd").max(),
        pl.col("capacity").max(),
        pl.col("percent_full").max(),
    ]
)



new_pp.collect()

property_id,scada_id,tank_type,tank_number,identifier,level,volume,inches_to_esd,time_until_esd,capacity,percent_full
str,str,str,u8,object,f64,u64,f64,f64,f64,u8
"""98840""","""9884005""","""Oil""",5,,74.240695,300,,,790.0,38
"""98840""","""9884004""","""Oil""",4,f3c718bf-58e8-488e-9445-99fae307d251,75.002393,268,102.478181,,418.0,64
"""98840""","""9884006""","""Oil""",6,,82.279163,310,,,400.0,77
"""98840""","""98840W1""","""Water""",1,,79.941493,268,,,697.0,38
"""98840""","""9884003""","""Oil""",3,,77.392763,284,,,624.0,46
"""98840""","""98840W2""","""Water""",2,c4abef08-5ca3-43b2-a9a4-fdf5aa6210df,76.456344,357,257.847738,,653.0,55
"""98840""","""9884002""","""Oil""",2,,77.590071,294,,,736.0,40
"""98840""","""9884001""","""Oil""",1,,79.02198,299,,,554.0,54


In [41]:
new_pp = new_pp.sort("property_id", "tank_type", "tank_number")
new_pp.collect()

property_id,scada_id,tank_type,tank_number,identifier,level,volume,inches_to_esd,time_until_esd,capacity,percent_full
str,str,str,u8,object,f64,u64,f64,f64,f64,u8
"""98840""","""9884001""","""Oil""",1,,79.02198,299,,,554.0,54
"""98840""","""9884002""","""Oil""",2,,77.590071,294,,,736.0,40
"""98840""","""9884003""","""Oil""",3,,77.392763,284,,,624.0,46
"""98840""","""9884004""","""Oil""",4,f3c718bf-58e8-488e-9445-99fae307d251,75.002393,268,102.478181,,418.0,64
"""98840""","""9884005""","""Oil""",5,,74.240695,300,,,790.0,38
"""98840""","""9884006""","""Oil""",6,,82.279163,310,,,400.0,77
"""98840""","""98840W1""","""Water""",1,,79.941493,268,,,697.0,38
"""98840""","""98840W2""","""Water""",2,c4abef08-5ca3-43b2-a9a4-fdf5aa6210df,76.456344,357,257.847738,,653.0,55
