# IoT Telemetry Weekly Report (Oracle ‚Üí Pandas)

Weekly report (last 7 days) from `IOT_TELEMETRY` generated by `02_generate_iot_telemetry.py`.

It covers:
- **Rentals**: start/stop timestamps, duration, distance (odometer delta)
- **Fuel (gasoil)**: fuel % at **ENGINE_START** vs **ENGINE_STOP**, plus **REFUEL** events
- Daily trends + charts


In [22]:
import pandas as pd
from sqlalchemy import create_engine, text

sys_engine = create_engine(
    "oracle+oracledb://",
    connect_args={"user": "system", "password": "Admin#123", "dsn": "localhost:1521/XEPDB1"},
    pool_pre_ping=True,
)

with sys_engine.connect() as conn:
    print(conn.execute(text("select user, sys_context('USERENV','CON_NAME') from dual")).fetchone())

('SYSTEM', 'XEPDB1')


In [23]:
with sys_engine.connect() as conn:
    owners = pd.read_sql(
        text("""
        SELECT owner, object_name, object_type
        FROM dba_objects
        WHERE object_name = 'IOT_TELEMETRY'
        ORDER BY owner
        """),
        conn
    )

owners

Unnamed: 0,owner,object_name,object_type
0,CARRENTAL,IOT_TELEMETRY,SYNONYM
1,RAW_LAYER,IOT_TELEMETRY,TABLE


In [24]:
with sys_engine.begin() as conn:
    conn.execute(text("GRANT SELECT ON raw_layer.iot_telemetry TO carrental"))
    # optional: allow finding tables via ALL_TABLES / ALL_OBJECTS without errors
    # conn.execute(text("GRANT SELECT_CATALOG_ROLE TO carrental"))

    # optional: make querying easier (no schema prefix)
    conn.execute(text("CREATE OR REPLACE SYNONYM carrental.iot_telemetry FOR raw_layer.iot_telemetry"))

print("‚úÖ Granted + synonym created")

‚úÖ Granted + synonym created


In [26]:
# On v√©rifie que la vitesse correspond √† peu pr√®s √† la distance entre deux points
df_sorted = df.sort_values(["CAR_ID", "TIMESTAMP"])

df_sorted["D_ODO"] = (
    df_sorted.groupby("CAR_ID")["ODOMETER_KM"]
    .diff()
)

# Vitesse th√©orique sur intervalle 30s = D_ODO * 3600 / 30
dt_sec = 30.0  # on sait que IOT_INTERVAL_SECONDS = 30
df_sorted["SPEED_THEO"] = df_sorted["D_ODO"] * 3600.0 / dt_sec

sample = df_sorted.dropna(subset=["SPEED_THEO", "SPEED_KMH"]).sample(10_000, random_state=42)

print(sample[["SPEED_KMH", "SPEED_THEO"]].describe())

plt.figure(figsize=(6, 6))
plt.scatter(sample["SPEED_THEO"], sample["SPEED_KMH"], s=3, alpha=0.3)
plt.xlabel("Vitesse th√©orique (km/h, √† partir ODOMETER_KM)")
plt.ylabel("SPEED_KMH mesur√©e")
plt.title("Tire 1 ‚Äì Coh√©rence SPEED_KMH vs ODOMETER_KM (√©chantillon)")
plt.show()

NameError: name 'df' is not defined

In [27]:
from sqlalchemy import text
import pandas as pd

sql_count = text("""
SELECT COUNT(*) AS rentals_this_month
FROM (
  SELECT DISTINCT car_id, rental_id
  FROM iot_telemetry
  WHERE event_ts >= TRUNC(SYSTIMESTAMP, 'MM')
    AND event_ts <  ADD_MONTHS(TRUNC(SYSTIMESTAMP, 'MM'), 1)
)
""")

with engine.connect() as conn:
    rentals_this_month = pd.read_sql(sql_count, conn)

rentals_this_month


Unnamed: 0,rentals_this_month
0,60


In [28]:
from sqlalchemy import text
import pandas as pd

sql_can_see = text("""
SELECT owner, table_name
FROM all_tables
WHERE table_name = 'IOT_TELEMETRY'
ORDER BY owner
""")

with engine.connect() as conn:
    can_see = pd.read_sql(sql_can_see, conn)

can_see

Unnamed: 0,owner,table_name
0,RAW_LAYER,IOT_TELEMETRY


In [29]:
from sqlalchemy import create_engine, text

sys_engine = create_engine(
    "oracle+oracledb://",
    connect_args={"user": "system", "password": "Admin#123", "dsn": "localhost:1521/XEPDB1"},
    pool_pre_ping=True,
)

with sys_engine.begin() as conn:
    conn.execute(text("ALTER SESSION SET CONTAINER = XEPDB1"))
    conn.execute(text("GRANT SELECT ON RAW_LAYER.IOT_TELEMETRY TO CARRENTAL"))
    conn.execute(text("GRANT SELECT ON RAW_LAYER.CARS TO CARRENTAL"))
    conn.execute(text("GRANT SELECT ON RAW_LAYER.CAR_CATEGORIES TO CARRENTAL"))
    conn.execute(text("CREATE OR REPLACE SYNONYM CARRENTAL.IOT_TELEMETRY FOR RAW_LAYER.IOT_TELEMETRY"))

print("‚úÖ Grants + synonym done. Now rerun the rentals query.")

‚úÖ Grants + synonym done. Now rerun the rentals query.


In [30]:
import pandas as pd
import numpy as np
from sqlalchemy import text

sql_rentals_details = text("""
SELECT
  car_id,
  rental_id,
  NVL(MIN(CASE WHEN event_type='ENGINE_START' THEN event_ts END), MIN(event_ts)) AS start_date,
  NVL(MAX(CASE WHEN event_type='ENGINE_STOP'  THEN event_ts END), MAX(event_ts)) AS end_date
FROM iot_telemetry
WHERE event_ts >= TRUNC(SYSTIMESTAMP, 'MM')
  AND event_ts <  ADD_MONTHS(TRUNC(SYSTIMESTAMP, 'MM'), 1)
  AND rental_id IS NOT NULL
GROUP BY car_id, rental_id
ORDER BY start_date, car_id, rental_id
""")

with engine.connect() as conn:
    rentals_details = pd.read_sql(sql_rentals_details, conn)

# ‚úÖ normalize Oracle column names to avoid KeyError
rentals_details.columns = [c.upper().strip() for c in rentals_details.columns]

# ‚úÖ parse dates
rentals_details["START_DATE"] = pd.to_datetime(rentals_details["START_DATE"])
rentals_details["END_DATE"]   = pd.to_datetime(rentals_details["END_DATE"])

# ‚úÖ durations
rentals_details["DURATION_DAYS"] = (
    (rentals_details["END_DATE"] - rentals_details["START_DATE"]).dt.total_seconds() / 86400
).round(4)

rentals_details["DAYS_COUNT"] = np.ceil(rentals_details["DURATION_DAYS"]).astype(int)

rentals_details


Unnamed: 0,CAR_ID,RENTAL_ID,START_DATE,END_DATE,DURATION_DAYS,DAYS_COUNT
0,2,1,2025-12-15 22:51:47,2025-12-17 22:51:47,2.0,2
1,3,1,2025-12-15 22:51:47,2025-12-27 09:19:30,11.4359,12
2,4,1,2025-12-15 22:51:47,2025-12-17 16:55:30,1.7526,2
3,5,1,2025-12-15 22:51:47,2025-12-15 23:58:17,0.0462,1
4,8,1,2025-12-15 22:51:47,2025-12-18 21:04:30,2.9255,3
5,10,1,2025-12-15 22:51:47,2025-12-16 13:58:30,0.6297,1
6,12,1,2025-12-15 22:51:47,2025-12-16 10:49:30,0.4984,1
7,14,1,2025-12-15 22:51:47,2025-12-18 22:51:47,3.0,3
8,7,1,2025-12-16 07:59:00,2025-12-19 10:40:30,3.1122,4
9,9,1,2025-12-17 10:43:00,2025-12-25 20:56:30,8.426,9


In [7]:
import pandas as pd

# 1) V√©rifier qu'on a folium
try:
    import folium
    from folium.plugins import AntPath
except ImportError as e:
    print("‚ùå Le module 'folium' n'est pas install√©.")
    print("‚û° Installe-le d'abord avec :  !pip install folium")
    raise e

# 2) V√©rifier que df existe
try:
    df
except NameError:
    raise NameError(
        "‚ùå Le DataFrame 'df' n'existe pas. "
        "Charge d'abord tes CSV dans df (ex: df = load_all_telemetry())."
    )

# 3) V√©rifier les colonnes n√©cessaires
required_cols = {"CAR_ID", "LATITUDE", "LONGITUDE", "TIMESTAMP", "EVENT_TYPE"}
missing = required_cols - set(df.columns)
if missing:
    print("Colonnes pr√©sentes :", list(df.columns))
    raise KeyError(f"‚ùå Colonnes manquantes dans df : {missing}")

# 4) Nettoyage de base
telemetry = df.dropna(subset=["LATITUDE", "LONGITUDE"]).copy()
telemetry = telemetry.sort_values("TIMESTAMP")

if telemetry.empty:
    raise ValueError("‚ùå Aucun point avec LATITUDE/LONGITUDE non nuls dans df.")

# 5) Choisir automatiquement une voiture qui a des points GPS
cars_counts = telemetry.groupby("CAR_ID").size().sort_values(ascending=False)
car_id = int(cars_counts.index[0])  # voiture avec le plus de points
print(f"üöó CAR_ID s√©lectionn√© automatiquement : {car_id} ({cars_counts.iloc[0]} points)")

car_df = telemetry[telemetry["CAR_ID"] == car_id].copy()

# (Optionnel) filtrer sur 1 jour pour plus de lisibilit√©
# d√©commenter et adapte DATE_TARGET si tu veux :
# DATE_TARGET = pd.to_datetime("2025-12-10").date()
# car_df = car_df[car_df["TIMESTAMP"].dt.date == DATE_TARGET]

if car_df.empty:
    raise ValueError("‚ùå car_df est vide apr√®s filtrage. V√©rifie le filtre de date √©ventuellement.")

print(f"üìä Nombre de points pour CAR_ID={car_id}: {len(car_df)}")

# 6) Pr√©paration des points GPS
points = list(zip(car_df["LATITUDE"], car_df["LONGITUDE"]))

# Centre de la carte
center_lat = car_df["LATITUDE"].mean()
center_lon = car_df["LONGITUDE"].mean()

# 7) Cr√©ation de la carte
m = folium.Map(location=[center_lat, center_lon], zoom_start=12)

# Trajet continu
folium.PolyLine(
    locations=points,
    weight=4,
    opacity=0.8,
).add_to(m)

# Variante anim√©e (ligne "vivante")
AntPath(
    locations=points,
    dash_array=[10, 20],
    delay=800,
).add_to(m)

# 8) Marqueurs START/STOP

start_point = car_df.iloc[0]
end_point   = car_df.iloc[-1]

folium.Marker(
    location=[start_point["LATITUDE"], start_point["LONGITUDE"]],
    popup=f"START<br>{start_point['TIMESTAMP']}",
    icon=folium.Icon(color="green", icon="play")
).add_to(m)

folium.Marker(
    location=[end_point["LATITUDE"], end_point["LONGITUDE"]],
    popup=f"STOP<br>{end_point['TIMESTAMP']}",
    icon=folium.Icon(color="red", icon="stop")
).add_to(m)

print("‚úÖ Carte g√©n√©r√©e. Si tu es dans un notebook, la carte doit s'afficher juste en dessous üëá")

m


üöó CAR_ID s√©lectionn√© automatiquement : 34 (3498 points)
üìä Nombre de points pour CAR_ID=34: 3498
‚úÖ Carte g√©n√©r√©e. Si tu es dans un notebook, la carte doit s'afficher juste en dessous üëá


In [9]:
from math import radians, sin, cos, sqrt, atan2
import numpy as np

def haversine_km(lat1, lon1, lat2, lon2):
    R = 6371.0
    dlat = radians(lat2 - lat1)
    dlon = radians(lon2 - lon1)
    a = sin(dlat/2)**2 + cos(radians(lat1)) * cos(radians(lat2)) * sin(dlon/2)**2
    c = 2 * atan2(sqrt(a), sqrt(1-a))
    return R * c

df = df.sort_values(["CAR_ID", "RENTAL_ID", "TIMESTAMP"]).copy()

# D√©calage des points pr√©c√©dents
df["LAT_PREV"] = df.groupby(["CAR_ID", "RENTAL_ID"])["LATITUDE"].shift(1)
df["LON_PREV"] = df.groupby(["CAR_ID", "RENTAL_ID"])["LONGITUDE"].shift(1)

# Distance GPS entre chaque point successif
mask_has_prev = df["LAT_PREV"].notna() & df["LON_PREV"].notna()
df["DIST_GPS_KM"] = 0.0
df.loc[mask_has_prev, "DIST_GPS_KM"] = df.loc[mask_has_prev].apply(
    lambda r: haversine_km(r["LAT_PREV"], r["LON_PREV"], r["LATITUDE"], r["LONGITUDE"]),
    axis=1
)

# Distance attendue √† partir de la vitesse
# (en supposant ton pas de temps de 30s)
DT_SEC = 30.0
df["DIST_SPEED_KM"] = df["SPEED_KMH"] * DT_SEC / 3600.0

# On regarde uniquement quand la voiture roule vraiment
moving = (df["SPEED_KMH"] > 5) & mask_has_prev

df["GPS_SPEED_RATIO"] = np.where(
    moving,
    df["DIST_GPS_KM"] / df["DIST_SPEED_KM"].replace(0, np.nan),
    np.nan
)

# Trajets / points suspects : on roule mais on ne bouge pas sur la carte
suspect_points = df[moving & (df["DIST_GPS_KM"] < 0.01)]  # < 10 m√®tres

print(f"Nombre de points 'physiquement' en mouvement mais GPS quasi fixe : {len(suspect_points)}")

suspect_points.head(2)

Nombre de points 'physiquement' en mouvement mais GPS quasi fixe : 4


Unnamed: 0,DEVICE_ID,CAR_ID,RENTAL_ID,TIMESTAMP,LATITUDE,LONGITUDE,SPEED_KMH,ACCELERATION_MS2,BRAKE_PRESSURE_BAR,FUEL_LEVEL_PCT,...,CITY,CREATED_AT,SOURCE_FILE,DT_SEC,DIST_KM_FROM_SPEED,LAT_PREV,LON_PREV,DIST_GPS_KM,DIST_SPEED_KM,GPS_SPEED_RATIO
68114,13,13,6,2026-01-21 10:16:00,33.5731,-7.5898,12.421677,-0.091631,4.567622,65.651308,...,Casablanca,2026-01-21 10:16:00,iot_telemetry_20260121.csv,0.0,0.0,33.5731,-7.5898,0.0,0.103514,0.0
50716,28,28,2,2026-01-16 13:34:00,31.6295,-7.9811,43.788651,0.19133,2.263067,70.266381,...,Marrakech,2026-01-16 13:34:00,iot_telemetry_20260116.csv,0.0,0.0,31.6295,-7.9811,0.0,0.364905,0.0


In [None]:
## Truncate 
def truncate_iot_tables():
    with engine.begin() as conn:
        try:
            conn.execute(text("TRUNCATE TABLE IOT_TELEMETRY"))
            print("üßπ TRUNCATED IOT_TELEMETRY")
        except Exception as e:
            print(f"‚ö†Ô∏è Could not TRUNCATE IOT_TELEMETRY ({e}); trying DELETE...")
            conn.execute(text("DELETE FROM IOT_TELEMETRY"))
            print("üßΩ DELETE fallback used.")