# Predictive Maintenance mit SCANIA-Daten – Data Preperation Testing

**Projekt:** Bachelorarbeit Data Science  
**Thema:** 
**Datengrundlage:** SCANIA Component X Dataset  
**Autor:** Justin Stange-Heiduk  
**Betreuung:** Dr. MArtin Prause
**Ziel:** Erstllen und testen der Daten Vorbereitung Funktionen  

---

**Erstellt:** 2025-08-05  
**Letzte Änderung:** 2025-07-25


---

In [1]:
import sys
from pathlib import Path
import importlib.util
import matplotlib.pyplot as plt
from matplotlib.lines import Line2D
import pandas as pd
from kedro_datasets.pandas.csv_dataset import CSVDataset
from kedro.framework.project import configure_project
from kedro.framework.session import KedroSession

### Setze scr_path des Kedro Projets

In [3]:
# src-Pfad bestimmen (Jupyter: Path.cwd() → /workspace/scaniakedro/notebooks)
project_root = Path.cwd().parents[0]  # /workspace/scaniakedro
src_path = project_root / "src"
print("src_path:", src_path)
print("src_path exists:", src_path.exists())

# src GANZ OBEN ins sys.path einfügen
if str(src_path) != sys.path[0]:
    sys.path.insert(0, str(src_path))

print("sys.path[0]:", sys.path[0])
print("Findet scaniakedro:", importlib.util.find_spec("scaniakedro") is not None)


src_path: /workspace/scaniakedro/src
src_path exists: True
sys.path[0]: /workspace/scaniakedro/src
Findet scaniakedro: True


### raw_date_loading Pipeline ausführen

In [4]:
configure_project("scaniakedro")

with KedroSession.create() as session:
    context = session.load_context()
    run_result = session.run(pipeline_name="raw_data_loading")
    print(run_result)
    print("Catalog datasets:", context.catalog.list())
    raw_data = run_result["raw_datasets"]  

{'raw_datasets': {'train': {'spec':        vehicle_id Spec_0 Spec_1 Spec_2 Spec_3 Spec_4 Spec_5 Spec_6 Spec_7
0               0   Cat0   Cat0   Cat0   Cat0   Cat0   Cat0   Cat0   Cat0
1               2   Cat0   Cat1   Cat1   Cat0   Cat0   Cat0   Cat0   Cat1
2               3   Cat0   Cat1   Cat1   Cat1   Cat0   Cat0   Cat0   Cat1
3               4   Cat0   Cat0   Cat2   Cat1   Cat0   Cat0   Cat0   Cat1
4               5   Cat0   Cat2   Cat2   Cat0   Cat0   Cat0   Cat0   Cat1
...           ...    ...    ...    ...    ...    ...    ...    ...    ...
23545       33639   Cat0   Cat1   Cat1   Cat0   Cat0   Cat0   Cat1   Cat4
23546       33640   Cat0  Cat14   Cat1   Cat3   Cat0   Cat0   Cat1   Cat4
23547       33641   Cat0   Cat1   Cat1   Cat0   Cat0   Cat0   Cat1   Cat4
23548       33642   Cat0   Cat1   Cat1   Cat0   Cat0   Cat0   Cat1   Cat4
23549       33643   Cat0   Cat0   Cat2   Cat0   Cat0   Cat0   Cat1   Cat4

[23550 rows x 9 columns], 'readouts':          vehicle_id  time_step      1

Catalog datasets: ['train_specifications', 'train_operational_readouts', 'train_tte', 'validation_specifications', 'validation_operational_readouts', 'validation_labels', 'test_specifications', 'test_operational_readouts', 'test_labels', 'raw_datasets', 'parameters']


## Data Preperation for Survivial Analyse

### Interpolation missing Values

In [16]:
for key in raw_data.keys():
    locals()[f"readouts_{key}"] = raw_data[key]["readouts"]

In [14]:
def interpolate_readout_df(df: pd.DataFrame) -> pd.DataFrame:
    """
    Performs linear interpolation of missing values in time series sensor data per vehicle.

    The function:
    - Sorts the input DataFrame by 'vehicle_id' and 'time_step'
    - Applies linear interpolation separately for each vehicle
    - Interpolates all numeric feature columns except 'vehicle_id' and 'time_step'
    - Fills missing values in both directions (forward and backward)

    Args:
        df (pd.DataFrame): The raw readout DataFrame containing sensor values, 
            including the columns 'vehicle_id' and 'time_step'

    Returns:
        pd.DataFrame: A new DataFrame with interpolated sensor features. 
            Original index is not preserved. Interpolation is applied only to 
            feature columns; identifiers remain unchanged.
    """
    # Sort by vehicle_id and time_step
    df = df.sort_values(by=["vehicle_id", "time_step"])

    # Interpolate numeric columns, excluding 'vehicle_id' and 'time_step'
    feature_cols = df.select_dtypes(include=["number"]).columns.difference(["vehicle_id", "time_step"])
    df[feature_cols] = df.groupby("vehicle_id")[feature_cols].transform(lambda x: x.interpolate(method="linear", limit_direction="both"))

    # Reset index to avoid keeping the original index
    return df.reset_index(drop=True)

In [17]:
for key in raw_data.keys():
    locals()[f"readouts_inter_{key}"] = interpolate_readout_df(raw_data[key]["readouts"])

In [51]:
feature_cols = readouts_train.select_dtypes(include=["number"]).columns.difference(["vehicle_id", "time_step"])

original_stats_train = readouts_train[feature_cols].describe()
interp_stats_train = readouts_inter_train[feature_cols].describe()

original_stats_test = readouts_test[feature_cols].describe()
interp_stats_test = readouts_inter_test[feature_cols].describe()

original_stats_validation = readouts_val[feature_cols].describe()
interp_stats_validation = readouts_inter_validation[feature_cols].describe()

print("Stats (train):")
display(original_stats_train/interp_stats_train)
print("Stats (test):")
display(original_stats_test/interp_stats_test)
print("Stats (validation):")
display(original_stats_validation/interp_stats_validation)

Stats (train):


Unnamed: 0,100_0,158_0,158_1,158_2,158_3,158_4,158_5,158_6,158_7,158_8,158_9,167_0,167_1,167_2,167_3,167_4,167_5,167_6,167_7,167_8,167_9,171_0,272_0,272_1,272_2,272_3,272_4,272_5,272_6,272_7,272_8,272_9,291_0,291_1,291_10,291_2,291_3,291_4,291_5,291_6,291_7,291_8,291_9,309_0,370_0,397_0,397_1,397_10,397_11,397_12,397_13,397_14,397_15,397_16,397_17,397_18,397_19,397_2,397_20,397_21,397_22,397_23,397_24,397_25,397_26,397_27,397_28,397_29,397_3,397_30,397_31,397_32,397_33,397_34,397_35,397_4,397_5,397_6,397_7,397_8,397_9,427_0,459_0,459_1,459_10,459_11,459_12,459_13,459_14,459_15,459_16,459_17,459_18,459_19,459_2,459_3,459_4,459_5,459_6,459_7,459_8,459_9,666_0,835_0,837_0
count,0.999919,0.999919,0.999919,0.999919,0.999919,0.999919,0.999919,0.999919,0.999919,0.999919,0.999919,0.999964,0.999964,0.999964,0.999964,0.999964,0.999964,0.999964,0.999964,0.999964,0.999964,0.999919,0.999919,0.999919,0.999919,0.999919,0.999919,0.999919,0.999919,0.999919,0.999919,0.999919,0.999955,0.999955,0.999955,0.999955,0.999955,0.999955,0.999955,0.999955,0.999955,0.999955,0.999955,0.999919,0.999919,0.999919,0.999919,0.999919,0.999919,0.999919,0.999919,0.999919,0.999919,0.999919,0.999919,0.999919,0.999919,0.999919,0.999919,0.999919,0.999919,0.999919,0.999919,0.999919,0.999919,0.999919,0.999919,0.999919,0.999919,0.999919,0.999919,0.999919,0.999919,0.999919,0.999919,0.999919,0.999919,0.999919,0.999919,0.999919,0.999919,0.999919,0.999919,0.999919,0.999919,0.999919,0.999919,0.999919,0.999919,0.999919,0.999919,0.999919,0.999919,0.999919,0.999919,0.999919,0.999919,0.999919,0.999919,0.999919,0.999919,0.999919,0.999919,0.999919,0.999919
mean,1.000053,1.000068,1.000053,1.000057,1.000054,1.000032,1.000022,1.000027,1.00004,1.000035,0.999986,1.000036,1.000027,1.000031,1.000035,1.000035,1.000035,1.000036,1.000036,1.000036,1.000036,1.000043,1.000032,1.000034,1.00005,1.00007,1.000045,1.000066,1.000078,1.00007,1.000053,0.999971,1.000031,1.000042,1.00004,1.000045,1.000026,1.000025,1.000025,1.00003,1.00003,1.000037,1.000036,1.000077,1.000078,1.000042,1.000061,1.000078,1.000081,1.000073,1.000067,1.00003,1.000072,1.000073,1.000073,1.000052,1.000066,1.000062,1.000023,1.000072,1.00008,1.000081,1.000046,1.000062,1.000034,1.000069,1.000081,1.000078,1.000069,1.000066,1.000036,1.000023,1.000065,1.000073,1.000065,1.000076,1.000075,1.000067,1.000068,1.00005,1.000075,1.000045,1.000062,1.000056,1.000051,1.000053,1.000048,1.000042,0.999975,1.00005,1.000081,1.000079,1.00008,1.000081,1.000051,1.000055,1.000059,1.000059,1.000061,1.000061,1.00006,1.000056,1.000003,1.000049,1.00004
std,1.000015,1.000023,1.000001,1.000003,1.000013,1.000008,1.000001,1.000007,1.000016,1.000021,1.00002,1.000016,1.000003,1.000001,1.000004,1.000004,0.999991,1.000008,1.000015,1.000017,1.000018,1.000005,1.000019,1.000022,1.000032,1.000033,1.000011,1.000033,1.000039,1.00004,1.00004,1.000034,1.000023,1.000023,1.000023,1.000023,1.000017,1.00002,1.000022,1.000023,1.000023,1.000023,1.000023,1.00004,1.000036,1.00004,1.000039,1.000041,1.000041,1.000041,1.00004,1.00004,1.000041,1.000041,1.000041,1.000034,1.00004,1.00004,1.000008,1.000041,1.000041,1.000041,1.000039,1.000022,1.000014,1.000037,1.000041,1.000041,1.00004,1.00004,1.000028,1.000018,1.000023,1.000033,1.000039,1.000041,1.000033,1.000041,1.00004,1.000038,1.000041,1.000003,1.000028,1.000025,1.000015,1.000015,1.000017,1.000024,0.999982,1.000019,1.000036,1.000041,1.00004,1.000041,1.000015,1.000016,1.000016,1.000017,1.000016,1.000015,1.000014,1.000015,1.000005,1.000002,1.000033
min,1.0,,,,,,,,,,,,,,,,,,,,,1.0,,,,,,,,,,,,,,,,,,,,,,,,1.0,1.0,,,1.0,,,,,,1.0,,,,,,,,,,,,,,,,,,,,,,1.0,1.0,1.0,,1.0,1.0,1.0,,,,,,,,,,,1.0,,,,,,,,,,
25%,1.000138,1.000264,1.000153,1.000113,1.000118,1.000061,1.000065,1.000134,1.00009,1.000138,1.000325,1.0,1.000131,1.00012,1.000108,1.000102,1.000095,1.000219,1.000388,,,1.000116,1.000125,1.000116,1.000119,1.000137,1.000185,1.000252,,,,,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,,1.000116,1.000099,1.000165,1.0,1.000154,1.000169,1.000092,1.000121,1.000078,1.0,1.000092,1.00012,1.000142,1.000125,1.000149,1.000154,1.0,1.000136,1.00013,1.0001,1.000181,1.000185,1.0,1.000164,1.000206,1.000119,1.000131,1.000174,1.000172,1.0,1.000231,1.000247,1.000137,1.000129,1.000096,1.00009,1.000103,1.000147,1.000109,1.0001,1.000159,1.000105,1.000076,1.000093,1.00019,1.000173,1.000169,1.0,1.0,1.000118,1.00012,1.000139,1.000145,1.000107,1.000141,1.000142,1.000141,1.000135,1.000119,1.0
50%,1.000058,1.000144,1.000057,1.00007,1.000043,1.000041,1.000017,1.000016,1.000032,1.000049,0.999983,1.0,1.000038,1.000047,1.000046,1.000055,1.000046,1.000038,1.000109,1.00052,,1.000046,1.000033,1.000017,1.000059,1.000106,1.000044,1.000122,1.0,,,,1.000081,1.000106,1.0,1.000103,1.000071,1.0,1.0,1.000186,1.0,1.0,1.000158,1.0,,1.000039,1.000089,1.00007,1.0,1.000085,1.000115,1.000029,1.000084,1.000067,1.0,1.000045,1.000091,1.000072,1.000029,1.000107,1.000179,1.0,1.000041,1.000079,1.000041,1.000101,1.000139,1.0,1.000089,1.000092,1.000043,1.000031,1.000127,1.000128,1.0,1.000146,1.000165,1.000111,1.000078,1.000036,1.000102,1.000044,1.000098,1.000076,1.000058,1.000051,1.000049,1.000034,1.000009,1.000071,1.000128,1.000207,1.000698,1.0,1.00004,1.000042,1.000074,1.000067,1.000098,1.000093,1.000075,1.000068,1.000013,1.000042,1.0
75%,1.000074,1.000062,1.000051,1.000048,1.000047,1.000023,0.999997,0.999999,1.000044,1.000017,0.999911,1.000077,1.000016,1.000014,1.000039,1.000028,1.000017,1.00004,1.000044,1.000092,1.0,1.000045,1.000026,1.000021,1.000051,1.000129,1.00005,1.000106,1.000338,,,,1.0,1.0,1.0,1.000056,1.0,1.0,1.0,1.000072,1.000166,1.0,1.000089,1.000192,1.000358,1.000025,1.000042,1.000065,1.0,1.000038,1.000055,0.999997,1.000086,1.000065,1.0,1.000049,1.000044,1.000062,0.999997,1.000063,1.000069,1.0,1.000046,1.000062,1.000007,1.000076,1.000085,1.0,1.000066,1.000098,1.0,0.999995,1.000086,1.000127,1.0,1.00007,1.000099,1.000059,1.000062,1.000071,1.000063,1.000046,1.000068,1.000054,1.000075,1.000052,1.000033,1.000019,0.999961,1.000051,1.000547,1.000014,1.000132,1.0,1.000058,1.000074,1.000043,1.000063,1.000056,1.000056,1.000061,1.000043,0.999986,1.000036,1.000019
max,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0


Stats (test):


Unnamed: 0,100_0,158_0,158_1,158_2,158_3,158_4,158_5,158_6,158_7,158_8,158_9,167_0,167_1,167_2,167_3,167_4,167_5,167_6,167_7,167_8,167_9,171_0,272_0,272_1,272_2,272_3,272_4,272_5,272_6,272_7,272_8,272_9,291_0,291_1,291_10,291_2,291_3,291_4,291_5,291_6,291_7,291_8,291_9,309_0,370_0,397_0,397_1,397_10,397_11,397_12,397_13,397_14,397_15,397_16,397_17,397_18,397_19,397_2,397_20,397_21,397_22,397_23,397_24,397_25,397_26,397_27,397_28,397_29,397_3,397_30,397_31,397_32,397_33,397_34,397_35,397_4,397_5,397_6,397_7,397_8,397_9,427_0,459_0,459_1,459_10,459_11,459_12,459_13,459_14,459_15,459_16,459_17,459_18,459_19,459_2,459_3,459_4,459_5,459_6,459_7,459_8,459_9,666_0,835_0,837_0
count,0.999944,0.999944,0.999944,0.999944,0.999944,0.999944,0.999944,0.999944,0.999944,0.999944,0.999944,0.999944,0.999944,0.999944,0.999944,0.999944,0.999944,0.999944,0.999944,0.999944,0.999944,0.999944,0.999944,0.999944,0.999944,0.999944,0.999944,0.999944,0.999944,0.999944,0.999944,0.999944,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,0.999944,0.999944,0.999944,0.999944,0.999944,0.999944,0.999944,0.999944,0.999944,0.999944,0.999944,0.999944,0.999944,0.999944,0.999944,0.999944,0.999944,0.999944,0.999944,0.999944,0.999944,0.999944,0.999944,0.999944,0.999944,0.999944,0.999944,0.999944,0.999944,0.999944,0.999944,0.999944,0.999944,0.999944,0.999944,0.999944,0.999944,0.999944,0.999944,0.999944,0.999944,0.999944,0.999944,0.999944,0.999944,0.999944,0.999944,0.999944,0.999944,0.999944,0.999944,0.999944,0.999944,0.999944,0.999944,0.999944,0.999944,0.999944,0.999944,0.999944,0.999944,0.999944
mean,1.00005,1.000055,1.000051,1.000049,1.000055,1.000054,1.000054,1.000054,1.000054,1.000055,1.000055,1.000056,1.000038,1.000037,1.000044,1.000049,1.000055,1.000055,1.000056,1.000056,1.000056,1.000055,1.00004,1.000039,1.000043,1.000044,1.000055,1.000056,1.000056,1.000056,1.000056,1.000056,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.000053,0.999993,1.000053,1.000047,1.000028,1.000054,1.000045,1.000055,1.000055,1.000044,1.000017,1.000054,1.00005,1.000055,1.000054,1.000055,1.000048,1.000031,1.00005,1.000054,1.000055,1.000055,1.000051,1.000038,1.000048,1.000051,1.000055,1.000055,1.000055,1.000054,1.000044,1.000051,1.000051,1.000055,1.000043,1.000047,1.000055,1.000045,1.000054,1.000032,1.00005,1.000054,1.000055,1.000055,1.000055,1.000055,1.000055,1.000055,0.999619,0.99956,1.000055,1.00005,1.000051,1.000051,1.000052,1.000052,1.000053,1.000053,1.000054,1.000054,1.000052,1.000055
std,1.000003,1.000011,0.99999,0.999997,0.999998,0.999996,0.999999,1.0,1.000002,1.000008,1.000021,1.000022,1.000006,1.000008,1.000013,1.000012,0.99999,1.000013,1.000023,1.000027,1.000027,0.999989,1.000007,1.000012,1.00002,1.000023,0.999996,1.000021,1.000027,1.000028,1.000028,1.000028,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.000026,1.000023,1.000002,0.999998,1.000025,1.000023,1.000021,1.000011,1.000002,1.000026,1.000026,1.000025,1.000022,1.000004,0.999997,1.000002,1.000025,1.000026,1.000023,1.000006,1.0,1.0,1.000024,1.000025,1.000026,1.000016,1.000016,1.000013,1.000009,1.000012,1.000024,1.000026,1.000015,1.000023,1.000027,1.000011,1.0,1.000026,0.999988,1.00002,1.000006,1.000001,1.000001,1.000002,1.000011,1.000012,1.000005,1.000025,1.000023,1.000025,1.000028,1.000002,1.000003,1.000004,1.000004,1.000004,1.000003,1.000003,1.000002,1.000009,0.99999,1.000018
min,1.0,,1.0,1.0,1.0,,,,,,,,,,,,,,,,,1.0,,,,,,,,,,,,,,,,,,,,,,,,1.0,1.0,,,1.0,1.0,1.0,,,,1.0,,1.0,,,,,,,,,,,,,,,,,,,,1.0,1.0,1.0,1.0,1.0,1.0,1.0,,,,,,,,,,,1.0,1.0,1.0,1.0,1.0,,,,,1.0,
25%,1.000107,1.000267,1.000105,1.000066,1.000247,1.000183,1.000198,1.000201,1.000224,1.000292,1.001192,1.0,1.000062,1.000233,1.000111,1.000152,1.000172,1.000383,1.000573,,,1.000325,1.000109,1.00008,1.000091,1.00008,1.000369,1.00046,,,,,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,,1.000243,1.000039,1.00007,1.0,1.000057,1.000172,1.000247,1.000117,1.000106,1.0,1.000122,1.00014,1.000206,1.000092,1.000213,1.0,1.0,1.000094,1.000172,1.000198,1.000076,1.0,1.0,1.000108,1.000281,1.000203,1.000089,1.000266,1.00012,1.0,1.000181,1.000511,1.000072,1.000125,1.000189,1.000089,1.000127,1.000084,1.000215,1.000299,1.000182,1.00014,1.000115,1.000318,1.000114,1.000087,1.000227,1.0,1.0,1.000216,1.000118,1.000096,1.000152,1.000191,1.000162,1.00012,1.000086,1.00027,1.00007,1.00076
50%,1.00004,1.000101,1.000069,1.000056,1.000071,1.000043,1.000052,1.000042,1.000088,1.000058,1.000136,1.0,1.000022,1.000035,1.000063,1.000083,1.000152,1.000156,1.000095,1.0,,1.000094,1.000062,1.000015,1.000085,1.000008,1.000109,1.000038,1.0006,,,,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,,1.000131,1.000056,1.000034,1.0,1.000032,1.000042,1.000104,1.000016,1.0,1.0,1.000074,1.000114,1.000079,1.000097,1.000056,1.00002,1.0,1.000084,1.000086,1.000053,1.000052,1.000093,1.0,1.000104,1.000061,1.000086,1.000107,1.000128,1.000049,1.00113,1.000154,1.000222,1.000133,1.000088,1.000088,1.000031,1.000068,1.000012,1.000084,1.000051,1.000067,1.000099,1.000106,1.000119,1.000146,1.000107,1.000018,1.0,1.0,1.000112,1.000129,1.000047,1.000031,1.000074,1.000085,1.00005,1.000051,1.000095,1.000041,1.000333
75%,1.000038,1.000053,1.000026,1.000025,1.000015,1.000037,1.000017,1.000012,1.000034,1.000048,1.000036,1.0,1.000039,1.000142,1.000004,1.000116,1.000025,1.000018,1.000029,1.000276,1.001794,1.000041,1.000028,1.000068,1.000027,1.000008,1.000019,1.000033,1.000052,,,,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,0.999666,1.000009,1.000013,0.999997,1.0,1.000126,1.00003,1.000003,1.000043,0.999954,1.0,1.000062,1.000018,1.000043,1.000034,1.000052,0.999996,1.0,1.000007,1.000045,1.00006,1.000015,1.0,1.0,1.000034,1.000032,1.000067,1.000003,1.000024,1.000123,1.0,1.00004,1.000052,1.000026,1.000053,1.000027,1.000045,1.000013,1.000027,1.000025,1.000072,1.000035,1.000021,1.000085,1.000068,1.000097,1.000108,0.999946,0.99985,1.0,1.00002,1.000015,1.000035,1.000035,1.000024,1.000031,1.000058,1.000016,1.000057,1.000013,1.000021
max,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0


Stats (validation):


Unnamed: 0,100_0,158_0,158_1,158_2,158_3,158_4,158_5,158_6,158_7,158_8,158_9,167_0,167_1,167_2,167_3,167_4,167_5,167_6,167_7,167_8,167_9,171_0,272_0,272_1,272_2,272_3,272_4,272_5,272_6,272_7,272_8,272_9,291_0,291_1,291_10,291_2,291_3,291_4,291_5,291_6,291_7,291_8,291_9,309_0,370_0,397_0,397_1,397_10,397_11,397_12,397_13,397_14,397_15,397_16,397_17,397_18,397_19,397_2,397_20,397_21,397_22,397_23,397_24,397_25,397_26,397_27,397_28,397_29,397_3,397_30,397_31,397_32,397_33,397_34,397_35,397_4,397_5,397_6,397_7,397_8,397_9,427_0,459_0,459_1,459_10,459_11,459_12,459_13,459_14,459_15,459_16,459_17,459_18,459_19,459_2,459_3,459_4,459_5,459_6,459_7,459_8,459_9,666_0,835_0,837_0
count,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0
mean,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0
std,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0
min,1.0,,1.0,1.0,,,,,,,,,1.0,,,,,,,,,1.0,1.0,,,,,,,,,,,,,,,,,,,,,,,1.0,1.0,,,1.0,1.0,,,,,1.0,,1.0,,,,,,,,,,,,,,,,,,,,1.0,1.0,1.0,1.0,1.0,1.0,1.0,,,,,,,,,,,1.0,1.0,1.0,,,,,,,1.0,
25%,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,,,1.0,1.0,1.0,1.0,1.0,1.0,1.0,,,,,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0
50%,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,,,,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0
75%,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,,,,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0
max,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0


In [24]:
for split in raw_data.keys():
        df = raw_data[split]['readouts']
        locals()[f"{split}_missing_values"] = df.isnull().sum().sum()
        

readouts_inter_train_missing_values = readouts_inter_train.isnull().sum().sum()
readouts_inter_test_missing_values = readouts_inter_test.isnull().sum().sum()
readouts_inter_validation_missing_values = readouts_inter_validation.isnull().sum().sum()

print(f"Missing values in train: before={train_missing_values}, after={readouts_inter_train_missing_values}")
print(f"Missing values in test: before={test_missing_values}, after={readouts_inter_test_missing_values}")
print(f"Missing values in validation: before={validation_missing_values}, after={readouts_inter_validation_missing_values}")


Missing values in train: before=354634, after=950
Missing values in test: before=66403, after=121
Missing values in validation: before=60339, after=0


In [28]:
pd.set_option("display.max_columns", None)
pd.set_option("display.max_rows", None)

In [32]:
missing_columns = readouts_inter_train.loc[:, readouts_inter_train.isna().any()].columns.tolist()

display(readouts_inter_train[readouts_inter_train[missing_columns].isna().any(axis=1)])

Unnamed: 0,vehicle_id,time_step,171_0,666_0,427_0,837_0,167_0,167_1,167_2,167_3,167_4,167_5,167_6,167_7,167_8,167_9,309_0,272_0,272_1,272_2,272_3,272_4,272_5,272_6,272_7,272_8,272_9,835_0,370_0,291_0,291_1,291_2,291_3,291_4,291_5,291_6,291_7,291_8,291_9,291_10,158_0,158_1,158_2,158_3,158_4,158_5,158_6,158_7,158_8,158_9,100_0,459_0,459_1,459_2,459_3,459_4,459_5,459_6,459_7,459_8,459_9,459_10,459_11,459_12,459_13,459_14,459_15,459_16,459_17,459_18,459_19,397_0,397_1,397_2,397_3,397_4,397_5,397_6,397_7,397_8,397_9,397_10,397_11,397_12,397_13,397_14,397_15,397_16,397_17,397_18,397_19,397_20,397_21,397_22,397_23,397_24,397_25,397_26,397_27,397_28,397_29,397_30,397_31,397_32,397_33,397_34,397_35
545,22,3.2,179550.0,4460.0,6791004.0,680.0,,,,,,,,,,,70.0,1147601.0,1304433.0,667044.0,1013896.0,4156217.0,26481.0,0.0,0.0,0.0,0.0,6084900.0,0.0,876.0,564.0,456.0,669.0,288.0,153.0,292.0,222.0,138.0,459.0,111.0,58800.0,1897212.0,2739072.0,1212780.0,874956.0,415380.0,200964.0,107676.0,114313.0,54852.0,672890.0,162.90575,55.050778,51.093083,52.857722,52.513028,58.962278,70.358222,91.30425,256.714639,311.887028,171.630472,185.444028,247.545833,464.808806,3123.181944,905.518556,0.983,0.073722,0.007778,0.0,363385.0,260666.0,144542.0,4637.0,1488.0,60.0,274034.0,200645.0,550383.0,4354.0,617.0,156.0,32234.0,92052.0,372838.0,2328.0,412.0,28.0,7604.0,37016.0,96288.0,1244.0,144.0,0.0,4693.0,16592.0,38832.0,893.0,76.0,12.0,200.0,15180.0,32393.0,2325.0,321.0,20.0
546,22,7.4,355095.0,16955.0,12882265.0,3504.0,,,,,,,,,,,252.0,1380314.0,1602796.0,863331.0,1294345.0,11157251.0,132626.0,130.0,0.0,0.0,0.0,12027460.0,0.0,1236.0,705.0,552.0,1071.0,408.0,231.0,424.0,315.0,171.0,624.0,264.0,100548.0,3459252.0,4676580.0,2257741.0,2054580.0,1051932.0,541308.0,328548.0,409526.0,286872.0,1025801.0,223.204556,81.191694,86.891472,89.305889,90.093417,98.277833,118.124028,146.358861,393.662,529.896472,349.9775,383.249028,536.753639,953.066472,8062.055361,1567.517528,2.865944,0.100861,0.007778,0.0,867657.0,420206.0,310835.0,15821.0,6673.0,728.0,366594.0,253145.0,950316.0,7758.0,1522.0,184.0,39806.0,126313.0,926671.0,4952.0,1261.0,36.0,15496.0,61585.0,287544.0,3104.0,477.0,4.0,10125.0,34932.0,126917.0,2941.0,404.0,20.0,296.0,55100.0,141842.0,12377.0,1957.0,64.0
547,22,10.8,452700.0,23437.0,16294157.0,5856.0,,,,,,,,,,,337.0,1539434.0,1796314.0,1016211.0,1479076.0,15104532.0,205751.0,130.0,0.0,0.0,0.0,15476731.0,0.0,1524.0,807.0,657.0,1368.0,492.0,309.0,499.0,375.0,186.0,735.0,345.0,130800.0,4426429.0,5820492.0,2869801.0,2738772.0,1378620.0,723840.0,448440.0,552734.0,425256.0,1287086.0,265.55025,98.745278,110.289778,116.440694,115.135722,126.431278,148.609278,179.061306,473.282167,653.071972,437.278389,489.578139,685.202583,1196.777222,10353.699917,2395.600556,5.323333,0.200167,0.007778,0.0,1213509.0,518282.0,412107.0,24090.0,9418.0,912.0,415334.0,281759.0,1148168.0,9942.0,2186.0,204.0,44638.0,145489.0,1239307.0,6586.0,1802.0,52.0,20124.0,76441.0,393833.0,4245.0,735.0,20.0,13102.0,45112.0,175125.0,4102.0,593.0,28.0,340.0,73064.0,203347.0,17577.0,3289.0,248.0
548,22,12.2,483285.0,24978.0,17353810.0,6416.0,,,,,,,,,,,337.0,1588835.0,1824550.0,1050102.0,1557856.0,16356458.0,221664.0,130.0,0.0,0.0,0.0,16544601.0,0.0,1635.0,840.0,687.0,1432.0,525.0,324.0,523.0,384.0,192.0,780.0,366.0,134892.0,4705357.0,6192924.0,3123865.0,2948820.0,1465236.0,771337.0,481296.0,591038.0,446676.0,1364451.0,278.257639,103.441611,117.133917,122.919611,122.850306,133.793917,157.185333,187.51275,492.719889,689.969389,464.737556,520.339972,729.809861,1288.30375,11173.432306,2465.766139,5.828,0.210389,0.007778,0.0,1318493.0,546854.0,441327.0,26031.0,10238.0,940.0,430570.0,291351.0,1224105.0,10546.0,2384.0,208.0,46108.0,152230.0,1339704.0,7082.0,2034.0,56.0,21665.0,81226.0,422341.0,4649.0,813.0,24.0,14254.0,48629.0,187982.0,4535.0,709.0,44.0,404.0,77956.0,216100.0,18629.0,3365.0,268.0
549,22,17.2,663660.0,37683.0,23636999.0,10664.0,,,,,,,,,,,576.0,1813631.0,2295567.0,1272441.0,1941902.0,23504638.0,378457.0,221.0,0.0,0.0,0.0,22847122.0,0.0,2181.0,1074.0,823.0,1954.0,703.0,444.0,673.0,480.0,258.0,960.0,525.0,201024.0,6439033.0,8381640.0,4229713.0,4121544.0,2049636.0,1092541.0,698269.0,858206.0,734724.0,1821406.0,350.790278,132.204639,154.443861,163.75025,167.976083,186.825167,216.059083,250.570528,647.367889,913.525389,640.843639,730.338972,990.772056,1759.07425,14910.637917,4329.391583,9.250139,0.307333,0.007778,0.0,1938297.0,728190.0,649023.0,40752.0,15287.0,1452.0,503630.0,349415.0,1626405.0,14498.0,3420.0,216.0,54201.0,184054.0,1881212.0,10059.0,2955.0,80.0,29886.0,104575.0,603549.0,6976.0,1407.0,28.0,19367.0,67145.0,271407.0,6812.0,1269.0,44.0,628.0,113196.0,337733.0,29705.0,4998.0,325.0
550,22,17.4,663660.0,37683.0,23636999.0,10665.0,,,,,,,,,,,576.0,1813631.0,2295567.0,1272441.0,1941902.0,23504638.0,378457.0,221.0,0.0,0.0,0.0,22847122.0,0.0,2182.0,1074.0,823.0,1954.0,703.0,444.0,674.0,480.0,258.0,960.0,525.0,201024.0,6439034.0,8381640.0,4229713.0,4121544.0,2049636.0,1092542.0,698269.0,858206.0,734724.0,1821406.0,351.790278,132.204639,154.443861,163.75025,167.976083,186.825167,216.059083,250.570528,647.367889,913.525389,640.843639,730.338972,990.772056,1759.07425,14911.637917,4329.391583,9.250139,0.307333,0.007778,0.0,1938297.0,728190.0,649024.0,40753.0,15288.0,1453.0,503630.0,349415.0,1626406.0,14499.0,3420.0,216.0,54201.0,184054.0,1881212.0,10060.0,2955.0,80.0,29886.0,104575.0,603549.0,6976.0,1408.0,28.0,19368.0,67146.0,271407.0,6812.0,1270.0,44.0,628.0,113196.0,337733.0,29706.0,4999.0,325.0
551,22,18.6,706320.0,40315.0,25157771.0,11449.0,,,,,,,,,,,590.0,1897508.0,2400334.0,1357799.0,2088542.0,25168183.0,399075.0,222.0,0.0,0.0,0.0,24388062.0,0.0,2416.0,1138.0,898.0,2131.0,769.0,477.0,716.0,543.0,283.0,1020.0,559.0,217584.0,6902246.0,8958241.0,4503829.0,4379856.0,2182260.0,1163642.0,742897.0,907370.0,791304.0,1939851.0,372.225889,141.291861,165.958472,175.830778,180.668639,200.345194,230.524,267.714278,682.567444,979.449111,686.923444,765.810556,1041.078306,1838.764306,15433.304167,5063.614444,10.103472,0.309306,0.007778,0.0,2089926.0,778511.0,696245.0,44261.0,16009.0,1469.0,527786.0,368131.0,1737015.0,15741.0,3624.0,216.0,56797.0,195031.0,2002182.0,10970.0,3129.0,84.0,32148.0,110865.0,644846.0,7376.0,1516.0,28.0,20768.0,70506.0,288344.0,7073.0,1318.0,44.0,660.0,120728.0,359817.0,31138.0,5235.0,326.0
552,22,25.8,920055.0,53728.0,32682134.0,14225.0,,,,,,,,,,,646.0,2286014.0,2825174.0,1717145.0,2545453.0,33756750.0,493780.0,222.0,0.0,0.0,0.0,31939672.0,0.0,3007.0,1315.0,1036.0,2716.0,968.0,600.0,944.0,681.0,340.0,1272.0,730.0,291708.0,9057806.0,11414341.0,5869057.0,5889204.0,2946312.0,1563543.0,995918.0,1152302.0,1088617.0,2486921.0,463.588167,177.439417,212.107417,229.438556,234.066,260.568583,294.250806,339.412444,851.353139,1250.058139,911.494583,957.554583,1310.152889,2266.314917,20155.099667,7089.290389,12.591111,0.345667,0.007778,0.0,2791931.0,995279.0,927730.0,57933.0,20490.0,1957.0,625196.0,437883.0,2248668.0,21225.0,4776.0,232.0,68478.0,241303.0,2711563.0,14335.0,4166.0,100.0,44200.0,142555.0,869852.0,9929.0,1924.0,28.0,25085.0,91266.0,383397.0,9341.0,1662.0,44.0,968.0,156220.0,466257.0,40054.0,6720.0,326.0
553,22,33.6,1184550.0,72873.0,42004634.0,16289.0,,,,,,,,,,,702.0,2971647.0,3870452.0,2114477.0,3201576.0,43902184.0,577085.0,222.0,0.0,0.0,0.0,41469042.0,0.0,3508.0,1771.0,1348.0,3481.0,1241.0,798.0,1283.0,846.0,409.0,1506.0,953.0,405648.0,11859074.0,14351113.0,7221685.0,7611960.0,4125516.0,2204715.0,1393730.0,1489742.0,1618021.0,3253381.0,589.750111,231.267667,277.257944,294.582667,303.242556,333.328333,372.227944,434.693944,1079.134222,1596.418389,1179.712667,1266.758722,1726.583806,3058.086972,26460.395889,9545.639417,15.447944,0.400056,0.007778,0.0,3714628.0,1258268.0,1177546.0,72317.0,24026.0,2206.0,776125.0,532768.0,2774321.0,28079.0,6302.0,236.0,82451.0,301936.0,3600016.0,19051.0,5283.0,128.0,58716.0,188344.0,1199053.0,13362.0,2429.0,28.0,29417.0,127035.0,528151.0,12389.0,2119.0,52.0,1532.0,208184.0,621841.0,50650.0,7864.0,346.0
554,22,34.0,1206540.0,75099.0,42771521.0,16530.0,,,,,,,,,,,702.0,3027937.0,3904850.0,2158508.0,3257983.0,44755023.0,588655.0,223.0,0.0,0.0,0.0,42241712.0,0.0,3559.0,1808.0,1375.0,3556.0,1247.0,816.0,1316.0,852.0,415.0,1530.0,971.0,416112.0,12104858.0,14548838.0,7300490.0,7715449.0,4237308.0,2274975.0,1437194.0,1529211.0,1690813.0,3312851.0,598.99825,236.587583,283.174444,299.540194,308.754111,341.042167,379.902472,444.436639,1101.968306,1630.535278,1209.009583,1298.363972,1771.556028,3134.922139,27024.797361,9782.57725,15.804389,0.400056,0.007778,0.0,3791981.0,1278794.0,1198095.0,73465.0,24239.0,2270.0,785194.0,538632.0,2806945.0,28579.0,6399.0,240.0,83484.0,306488.0,3661553.0,19343.0,5349.0,128.0,59681.0,192124.0,1234794.0,13479.0,2475.0,28.0,29961.0,129656.0,543523.0,12647.0,2136.0,52.0,1541.0,215392.0,642770.0,52110.0,8036.0,347.0


In [34]:
missing_columns = readouts_inter_test.loc[:, readouts_inter_test.isna().any()].columns.tolist()
display(readouts_inter_test[readouts_inter_test[missing_columns].isna().any(axis=1)])

Unnamed: 0,vehicle_id,time_step,171_0,666_0,427_0,837_0,167_0,167_1,167_2,167_3,167_4,167_5,167_6,167_7,167_8,167_9,309_0,272_0,272_1,272_2,272_3,272_4,272_5,272_6,272_7,272_8,272_9,835_0,370_0,291_0,291_1,291_2,291_3,291_4,291_5,291_6,291_7,291_8,291_9,291_10,158_0,158_1,158_2,158_3,158_4,158_5,158_6,158_7,158_8,158_9,100_0,459_0,459_1,459_2,459_3,459_4,459_5,459_6,459_7,459_8,459_9,459_10,459_11,459_12,459_13,459_14,459_15,459_16,459_17,459_18,459_19,397_0,397_1,397_2,397_3,397_4,397_5,397_6,397_7,397_8,397_9,397_10,397_11,397_12,397_13,397_14,397_15,397_16,397_17,397_18,397_19,397_20,397_21,397_22,397_23,397_24,397_25,397_26,397_27,397_28,397_29,397_30,397_31,397_32,397_33,397_34,397_35
36300,4498,13.4,9720.0,2730.0,6101251.0,30.0,0.0,909369.0,2054272.0,1181105.0,2060329.0,503936.0,1504.0,0.0,0.0,0.0,5.0,1684232.0,1533939.0,1064008.0,4854674.0,96624.0,506.0,0.0,0.0,0.0,0.0,9567396.0,15650218.0,,,,,,,,,,,,25382.0,4268838.0,6834409.0,292153.0,136612.0,77182.0,52948.0,41944.0,22792.0,84.0,1458280.0,3639.394556,146.958278,108.559389,100.727944,106.324833,79.783111,82.560444,85.949889,155.282167,135.807056,55.919833,62.675944,66.621944,142.372056,601.303278,370.137722,18.857778,18.425611,2.973667,0.0095,147161.0,909938.0,56091.0,35448.0,22218.0,14.0,2613437.0,1210818.0,50183.0,73670.0,41244.0,7.0,336246.0,39452.0,26040.0,68454.0,71814.0,21.0,47572.0,8316.0,14357.0,24185.0,19244.0,14.0,3046.0,1078.0,5656.0,8925.0,7526.0,42.0,35.0,98.0,3641.0,9982.0,20140.0,70.0
36301,4498,41.8,34650.0,7293.0,6101251.0,30.0,0.0,1533658.0,4103809.0,2151922.0,3875969.0,977240.0,8944.0,0.0,0.0,0.0,5.0,2917024.0,3151984.0,1904331.0,9229803.0,232364.0,584.0,0.0,0.0,0.0,0.0,18069360.0,15650218.0,,,,,,,,,,,,55762.0,6892102.0,13775791.0,557692.0,321804.0,230216.0,176596.0,128058.0,53284.0,84.0,1458280.0,5887.010833,310.946167,258.643333,254.320667,282.177444,255.685333,273.496,278.050556,576.102111,507.447,208.635278,202.351167,207.741167,303.811111,785.042889,515.573778,94.763611,115.722167,32.033722,0.0095,1004481.0,2672391.0,195797.0,95727.0,47769.0,119.0,3962904.0,1596114.0,134080.0,128216.0,65086.0,161.0,425169.0,62980.0,75985.0,114220.0,104959.0,154.0,59136.0,21162.0,48658.0,52787.0,41079.0,465.0,5239.0,3710.0,20594.0,26383.0,24172.0,168.0,42.0,210.0,5995.0,27567.0,71534.0,518.0
36302,4498,68.4,44850.0,11804.0,6101251.0,30.0,0.0,2261962.0,6296554.0,3169674.0,5380065.0,1770584.0,16168.0,0.0,0.0,0.0,5.0,4480916.0,4874364.0,2827660.0,13661132.0,274054.0,585.0,0.0,0.0,0.0,0.0,27073260.0,15650218.0,,,,,,,,,,,,103446.0,9929360.0,20250624.0,1028975.0,653381.0,505667.0,385238.0,273924.0,111188.0,196.0,1458280.0,7858.846444,531.635278,451.246056,459.873833,519.276722,512.876444,556.127556,581.946944,1236.484333,1124.688722,487.168167,469.257,475.139778,618.916722,1190.694444,855.720889,285.103167,285.004667,75.964389,0.0095,1977286.0,4076536.0,408787.0,199838.0,94641.0,168.0,5347701.0,1966849.0,262530.0,221127.0,118489.0,288.0,515210.0,93410.0,162064.0,199592.0,173916.0,175.0,71408.0,40237.0,105442.0,107563.0,80734.0,521.0,7808.0,9850.0,50484.0,57071.0,50976.0,351.0,42.0,448.0,18350.0,57828.0,142514.0,812.0
36303,4498,75.2,57360.0,13325.0,6101251.0,30.0,0.0,2404946.0,6847338.0,3480106.0,5986441.0,1996320.0,23056.0,0.0,0.0,0.0,5.0,4767972.0,5323758.0,3048651.0,15211274.0,337865.0,585.0,0.0,0.0,0.0,0.0,29739733.0,15650218.0,,,,,,,,,,,,112140.0,10682826.0,22344562.0,1164985.0,748063.0,583815.0,437010.0,314272.0,126812.0,196.0,1458280.0,8453.072333,583.617889,503.607667,515.672,580.203444,578.748722,633.535889,666.037833,1425.416333,1295.307444,558.087056,536.800389,547.601722,708.957889,1291.007111,933.4455,352.479056,354.6475,91.932,0.0095,2318928.0,4546132.0,467846.0,227208.0,107234.0,182.0,5694439.0,2067593.0,297853.0,247461.0,130677.0,337.0,536428.0,101363.0,184002.0,222063.0,198774.0,189.0,74775.0,45488.0,121494.0,122005.0,91878.0,521.0,8410.0,11621.0,58912.0,65150.0,58095.0,393.0,49.0,504.0,22228.0,66151.0,160147.0,868.0
36304,4498,87.6,68070.0,15977.0,6101251.0,30.0,0.0,2630451.0,7796051.0,3902874.0,6638361.0,2165216.0,36264.0,0.0,0.0,0.0,5.0,5226859.0,6086179.0,3463802.0,16873869.0,444917.0,2697.0,0.0,0.0,0.0,0.0,33272701.0,15650218.0,,,,,,,,,,,,121590.0,11906594.0,25076914.0,1280807.0,826771.0,655089.0,489594.0,357631.0,137368.0,196.0,1458280.0,9295.810167,673.941278,598.714222,616.130167,682.343444,675.085222,736.511278,763.281167,1621.097389,1458.134833,613.645167,588.413556,594.489278,767.211,1353.117556,971.167833,379.071056,395.861833,101.360222,0.0095,2685826.0,5194984.0,543817.0,256461.0,116398.0,196.0,6296313.0,2226025.0,341078.0,271402.0,139729.0,365.0,569279.0,112675.0,207655.0,239185.0,208737.0,196.0,79675.0,51697.0,137580.0,135614.0,100943.0,542.0,9460.0,13035.0,65961.0,73761.0,66461.0,470.0,49.0,553.0,23923.0,73368.0,181960.0,959.0
176183,28365,13.0,8145.0,507.0,305390.0,1486.0,0.0,71920.0,52088.0,11384.0,47264.0,136800.0,0.0,0.0,0.0,0.0,181.0,122826.0,44386.0,15730.0,272581.0,2453.0,0.0,0.0,0.0,0.0,0.0,474048.0,0.0,,,,,,,,,,,,364.0,103782.0,184436.0,80893.0,126476.0,52612.0,13594.0,5852.0,8176.0,6692.0,33378.0,35.482222,13.753389,15.282333,21.411333,17.259056,16.173944,14.198722,19.572889,50.157,54.026611,24.064111,29.030778,23.372056,68.624389,216.837556,267.630833,0.0,0.0,0.0,0.0,22995.0,24171.0,10585.0,4956.0,1457.0,14.0,37424.0,12405.0,21283.0,16808.0,1659.0,14.0,17283.0,4424.0,38290.0,44506.0,2240.0,35.0,2646.0,2304.0,5432.0,5146.0,1470.0,21.0,639.0,1834.0,1862.0,1057.0,861.0,7.0,147.0,847.0,2872.0,1933.0,1800.0,28.0
176184,28365,65.6,9450.0,833.0,386571.0,1982.0,0.0,143256.0,83040.0,16865.0,55456.0,142105.0,0.0,0.0,0.0,0.0,196.0,238612.0,65748.0,21989.0,302215.0,5269.0,0.0,0.0,0.0,0.0,0.0,655609.0,0.0,,,,,,,,,,,,462.0,168952.0,290851.0,99849.0,142408.0,60102.0,17122.0,7868.0,10542.0,8708.0,56382.0,61.265556,26.1935,24.661778,32.602556,27.384944,26.941556,24.553389,31.637611,79.295556,78.923,31.141222,35.909278,30.638333,75.990889,222.589056,287.400056,0.0,0.0,0.0,0.0,32264.0,36498.0,14127.0,5740.0,1597.0,21.0,69869.0,20441.0,26184.0,17907.0,1841.0,21.0,32440.0,8401.0,42870.0,47573.0,2361.0,35.0,4508.0,4489.0,7624.0,5903.0,1491.0,63.0,1054.0,3067.0,2828.0,1352.0,904.0,7.0,245.0,1267.0,4160.0,2389.0,1884.0,28.0
176185,28365,67.0,10140.0,872.0,418191.0,2087.0,0.0,146856.0,106872.0,19561.0,58304.0,152297.0,0.0,0.0,0.0,0.0,202.0,246664.0,79322.0,35123.0,327207.0,6204.0,0.0,0.0,0.0,0.0,0.0,718382.0,0.0,,,,,,,,,,,,519.0,196756.0,314217.0,107269.0,152012.0,66094.0,18186.0,8372.0,11144.0,9366.0,64149.0,68.304222,28.484167,26.2795,34.763222,30.116778,29.237944,27.3075,36.511611,88.909278,85.516889,32.944722,37.010444,33.208667,79.120722,238.542389,306.575889,0.0,0.0,0.0,0.0,36550.0,39915.0,15373.0,6006.0,1632.0,21.0,81356.0,22598.0,27479.0,18677.0,1885.0,28.0,34183.0,9683.0,46834.0,50905.0,2410.0,35.0,4887.0,5077.0,8220.0,6219.0,1512.0,63.0,1138.0,3348.0,3082.0,1366.0,927.0,7.0,245.0,1344.0,4510.0,2536.0,1941.0,28.0
176186,28365,67.6,10350.0,872.0,425901.0,2147.0,0.0,150184.0,110360.0,20337.0,58304.0,152297.0,0.0,0.0,0.0,0.0,237.0,252417.0,80840.0,36850.0,329924.0,6754.0,0.0,0.0,0.0,0.0,0.0,731054.0,0.0,,,,,,,,,,,,519.0,198899.0,319957.0,108445.0,154588.0,68167.0,18718.0,8694.0,11984.0,9576.0,65193.0,69.387,28.902889,26.681556,35.206667,30.460278,29.904833,27.774722,37.029056,90.715444,87.426056,34.515,38.369333,34.097389,81.6565,252.474833,310.028,0.0,0.0,0.0,0.0,36858.0,40412.0,15842.0,6153.0,1814.0,28.0,82988.0,22782.0,27822.0,18803.0,2018.0,28.0,34617.0,9725.0,47968.0,51101.0,2515.0,35.0,4930.0,5112.0,8339.0,6269.0,2282.0,70.0,1146.0,3358.0,3132.0,1401.0,1033.0,7.0,252.0,1344.0,4567.0,2683.0,2361.0,42.0
176187,28365,82.4,11235.0,1068.0,472022.0,2237.0,0.0,163369.0,139352.0,29025.0,69104.0,156057.0,0.0,0.0,0.0,0.0,242.0,276936.0,111981.0,43539.0,364849.0,7304.0,0.0,0.0,0.0,0.0,0.0,832407.0,0.0,,,,,,,,,,,,519.0,244021.0,363568.0,120010.0,164319.0,75937.0,21504.0,10290.0,13553.0,10500.0,77560.0,80.261222,32.455056,29.335556,38.779056,35.0125,33.6405,32.297333,44.2715,110.356167,103.030556,38.740389,45.065556,40.309,88.876778,261.816278,326.0245,0.0,0.0,0.0,0.0,48611.0,50185.0,17517.0,6433.0,1864.0,28.0,94888.0,26443.0,31575.0,19335.0,2068.0,28.0,37998.0,11889.0,51643.0,53614.0,2550.0,35.0,5513.0,6366.0,9874.0,7181.0,2303.0,70.0,1266.0,3995.0,3735.0,1660.0,1041.0,7.0,259.0,1596.0,5302.0,2893.0,2368.0,42.0


#### Remove vehicles with nan Value

In [46]:
def remove_vehicles_with_nans_from_split(
    readouts_df: pd.DataFrame,
    raw_split_data: dict,
    split_name: str
) -> tuple[pd.DataFrame, pd.DataFrame, pd.DataFrame]:
    """
    Removes vehicles with remaining NaN values from readouts, labels, and specs 
    for a given dataset split (train, test, validation).

    Args:
        readouts_df (pd.DataFrame): Interpolated readouts DataFrame.
        raw_split_data (dict): Dictionary containing 'tte' or 'labels' and 'spec' DataFrames.
        split_name (str): Name of the data split ("train", "test", or "validation").

    Returns:
        tuple[pd.DataFrame, pd.DataFrame, pd.DataFrame]: Filtered readouts, labels/tte, and specs.
    """
    # NaN-Spalten identifizieren
    missing_cols = readouts_df.columns[readouts_df.isna().any()].tolist()

    if not missing_cols:
        print(f"[{split_name}] Keine fehlenden Werte – keine Fahrzeuge entfernt.")
        label_df = raw_split_data["tte"] if split_name == "train" else raw_split_data["labels"]
        return readouts_df.copy(), label_df.copy(), raw_split_data["spec"].copy()

    # Fahrzeuge mit NaNs finden
    vehicles_with_nans = (
        readouts_df.loc[readouts_df[missing_cols].isna().any(axis=1), "vehicle_id"]
        .unique()
    )

    print(f"[{split_name}] {len(vehicles_with_nans)} Fahrzeug(e) mit NaNs entfernt.")
    print(f"   Entfernte vehicle_ids: {list(vehicles_with_nans)}")

    # Entsprechende DataFrames bereinigen
    readouts_clean = readouts_df[~readouts_df["vehicle_id"].isin(vehicles_with_nans)].copy()

    label_key = "tte" if split_name == "train" else "labels"
    labels_clean = raw_split_data[label_key][
        ~raw_split_data[label_key]["vehicle_id"].isin(vehicles_with_nans)
    ].copy()

    specs_clean = raw_split_data["spec"][
        ~raw_split_data["spec"]["vehicle_id"].isin(vehicles_with_nans)
    ].copy()

    return readouts_clean, labels_clean, specs_clean


In [47]:
readouts_train, tte_train_df, specs_train_df = remove_vehicles_with_nans_from_split(
    readouts_inter_train, raw_data["train"], "train"
)

readouts_test, tte_test_df, specs_test_df = remove_vehicles_with_nans_from_split(
    readouts_inter_test, raw_data["test"], "test"
)

readouts_val, tte_val_df, specs_val_df = remove_vehicles_with_nans_from_split(
    readouts_inter_validation, raw_data["validation"], "validation"
)


[train] 2 Fahrzeug(e) mit NaNs entfernt.
   Entfernte vehicle_ids: [22, 28364]
[test] 2 Fahrzeug(e) mit NaNs entfernt.
   Entfernte vehicle_ids: [4498, 28365]
[validation] Keine fehlenden Werte – keine Fahrzeuge entfernt.


In [None]:
readouts_inter_train_missing_values = readouts_train.isnull().sum().sum()
readouts_inter_test_missing_values = readouts_test.isnull().sum().sum()
readouts_inter_validation_missing_values = readouts_val.isnull().sum().sum()

print(f"Missing values in train: before={train_missing_values}, after={readouts_inter_train_missing_values}")
print(f"Missing values in test: before={test_missing_values}, after={readouts_inter_test_missing_values}")
print(f"Missing values in validation: before={validation_missing_values}, after={readouts_inter_validation_missing_values}")


Missing values in train: before=354634, after=0
Missing values in test: before=66403, after=0
Missing values in validation: before=60339, after=0


In [48]:
raw_data_cleaned = {
    "train": {
        "readouts": readouts_train,
        "tte": tte_train_df,
        "spec": specs_train_df,
    },
    "test": {
        "readouts": readouts_test,
        "tte": tte_test_df,
        "spec": specs_test_df,
    },
    "validation": {
        "readouts": readouts_val,
        "tte": tte_val_df,
        "spec": specs_val_df,
    }
}