# Hack4Rail - Challenge 9

Sample notebook to access battery data on Snowflake

In [None]:
from dotenv import load_dotenv
import plotly.express as px
from charged.snowflake_utils import create_session
import snowflake.snowpark.functions as F
 
# load environment variables from .env file
load_dotenv()

# create snowpark session
session = create_session()

## Time Series

In [None]:
# query tables BATTERIELOK_DATA, VEHICLES, VEHICLE_TYPE
table = "BATTERIELOK_DATA"
sdf = (
    session
    .table(table)
    .filter(F.col("VEHICLE_ID").isNotNull())
    .with_column(
        "TIMESTAMP_TRUNC",
        F.from_unixtime(
            F.round(F.unix_timestamp(F.col("TIMESTAMP_VEHICLE")) / 60) * 60
        ).cast("TIMESTAMP"),
    )
    .with_column(
        "DATE",
        F.date_trunc("DAY", "TIMESTAMP_VEHICLE").cast("DATE"),
    )
)
sdf.show()

In [None]:
# sdf.columns

columns = [
    "VEHICLE_OUTSIDE_TEMP",
    "BATTERY_SOC",
    "BATTERY_SOH",
    "BATTERY_COOLING_TEMP",
    "BATTERY_1_TEMP",
    "BATTERY_1_VOLTAGE",
    "BATTERY_1_CURRENT",
    "BATTERY_2_TEMP",
    "BATTERY_2_VOLTAGE",
    "BATTERY_2_CURRENT",
    "BATTERY_3_TEMP",
    "BATTERY_3_VOLTAGE",
    "BATTERY_3_CURRENT",
    "BATTERY_4_TEMP",
    "BATTERY_4_VOLTAGE",
    "BATTERY_4_CURRENT",
    "BATTERY_5_VOLTAGE"
]

from_date = '2025-06-15'
to_date = '2025-06-23'

In [None]:
# groupby vehicle and 60 sec
aggregations = []
for column in columns:
    aggregations.append(F.avg(F.col(column)).alias(f"{column}_AVG"))

df = (
    sdf
    .filter(F.to_date(F.col("TIMESTAMP_VEHICLE")) >= from_date)
    .filter(F.to_date(F.col("TIMESTAMP_VEHICLE")) <= to_date)
    .group_by(["VEHICLE_ID", "TIMESTAMP_TRUNC"])
    .agg(*aggregations)
    .order_by(["VEHICLE_ID", "TIMESTAMP_TRUNC"], ascending=[True, True])
    .to_pandas()
)
df.head()


In [None]:
# show 
fig = px.line(
    df,
    x="TIMESTAMP_TRUNC",
    y="BATTERY_SOC_AVG",
    markers=True,
    # color="VEHICLE_ID",
    facet_row="VEHICLE_ID",
    render_mode="svg",
)

fig.show()

## Overview

Overview of battery health per vehicle and battery

In [None]:
sdf.show()

In [None]:
# aggregate per day
columns = [
    "BATTERY_SOH"
]

# groupby vehicle and 60 sec
aggregations = []
for column in columns:
    aggregations.append(F.avg(F.col(column)).alias(f"{column}_AVG"))

df = (
    sdf
    .filter(F.to_date(F.col("TIMESTAMP_VEHICLE")) >= from_date)
    .filter(F.to_date(F.col("TIMESTAMP_VEHICLE")) <= to_date)
    .group_by(["VEHICLE_ID", "DATE", "TIMESTAMP_TRUNC"])
    .agg(*aggregations)
    .order_by(["VEHICLE_ID", "TIMESTAMP_TRUNC"], ascending=[True, True])
    .to_pandas()
)
df.head()

In [None]:
fig = px.box(
    df,
    x="DATE",
    y="BATTERY_SOH_AVG",
    # color="VEHICLE_ID",
    facet_row="VEHICLE_ID",
    # render_mode="svg",
)
fig.update_yaxes(matches=None, showticklabels=True)

fig.show()

## Errors

In [None]:
# ('ERRORS', ArrayType(StringType()), nullable=True)
sdf.schema

In [None]:
df = (
    sdf
    .filter(F.to_date(F.col("TIMESTAMP_VEHICLE")) >= '2025-01-01')
    .filter(F.to_date(F.col("TIMESTAMP_VEHICLE")) <= to_date)
    .filter(F.size(F.col("ERRORS")) > 0)
    .group_by(["VEHICLE_ID", "DATE"])
    .count()
    .order_by(["VEHICLE_ID", "DATE"], ascending=[True, True])
    .to_pandas()
)
df.head()

In [None]:
fig = px.bar(
    df,
    x="DATE",
    y="COUNT",
    facet_row="VEHICLE_ID",
)
fig.update_yaxes(matches=None, showticklabels=True)

fig.show()

## KPIs

In [None]:
df = (
    sdf
    .filter(F.to_date(F.col("TIMESTAMP_VEHICLE")) >= '2025-06-01')
    .filter(F.to_date(F.col("TIMESTAMP_VEHICLE")) <= '2025-06-24')
    .group_by(["VEHICLE_ID", "DATE"])
    .agg(F.avg(F.col("BATTERY_SOH")).alias("BATTERY_SOG_AVG"))
    .order_by(["VEHICLE_ID", "DATE"], ascending=[True, True])
    .to_pandas()
)
df.head()

In [None]:
fig = px.line(
    df,
    x="DATE",
    y="BATTERY_SOG_AVG",
    facet_row="VEHICLE_ID",
)
fig.update_yaxes(matches=None, showticklabels=True)

fig.show()