In [4]:
import pandas as pd
import numpy as np


# Versicherungsdaten (Hauptliste)
df_ins = pd.read_csv("../data/motor_data14-2018.csv")

In [5]:
# Fahrzeugdaten
df_car = pd.read_csv("../data/The_Ultimate_Cars_Dataset_2024.csv", encoding="ISO-8859-1")

In [6]:
# Nur ersten 5 Zeilen anzeigen
print("Versicherungsdaten (motor_data14-2018.csv):")
display(df_ins.head())

print("\nFahrzeugdaten (The_Ultimate_Cars_Dataset_2024.csv):")
display(df_car.head())

# Optional: Spaltennamen vergleichen
print("\nSpalten in Versicherungsdaten:")
print(df_ins.columns.tolist())

print("\nSpalten in Fahrzeugdaten:")
print(df_car.columns.tolist())


Versicherungsdaten (motor_data14-2018.csv):


Unnamed: 0,SEX,INSR_BEGIN,INSR_END,EFFECTIVE_YR,INSR_TYPE,INSURED_VALUE,PREMIUM,OBJECT_ID,PROD_YEAR,SEATS_NUM,CARRYING_CAPACITY,TYPE_VEHICLE,CCM_TON,MAKE,USAGE,CLAIM_PAID
0,0,08-AUG-17,07-AUG-18,8,1202,519755.22,5097.83,5000029885,2007.0,4.0,6.0,Pick-up,3153.0,NISSAN,Own Goods,
1,0,08-AUG-16,07-AUG-17,8,1202,519755.22,6556.52,5000029885,2007.0,4.0,6.0,Pick-up,3153.0,NISSAN,Own Goods,
2,0,08-AUG-15,07-AUG-16,8,1202,519755.22,6556.52,5000029885,2007.0,4.0,6.0,Pick-up,3153.0,NISSAN,Own Goods,
3,0,08-AUG-14,07-AUG-15,8,1202,519755.22,5102.83,5000029885,2007.0,4.0,6.0,Pick-up,3153.0,NISSAN,Own Goods,
4,0,08-AUG-17,07-AUG-18,8,1202,1400000.0,13304.87,5000029901,2010.0,4.0,7.0,Pick-up,2494.0,TOYOTA,Own Goods,



Fahrzeugdaten (The_Ultimate_Cars_Dataset_2024.csv):


Unnamed: 0,Company Names,Cars Names,Engines,CC/Battery Capacity,HorsePower,Total Speed,Performance(0 - 100 )KM/H,Cars Prices,Fuel Types,Seats,Torque
0,FERRARI,SF90 STRADALE,V8,3990 cc,963 hp,340 km/h,2.5 sec,"$1,100,000",plug in hyrbrid,2,800 Nm
1,ROLLS ROYCE,PHANTOM,V12,6749 cc,563 hp,250 km/h,5.3 sec,"$460,000",Petrol,5,900 Nm
2,Ford,KA+,1.2L Petrol,"1,200 cc",70-85 hp,165 km/h,10.5 sec,"$12,000-$15,000",Petrol,5,100 - 140 Nm
3,MERCEDES,GT 63 S,V8,"3,982 cc",630 hp,250 km/h,3.2 sec,"$161,000",Petrol,4,900 Nm
4,AUDI,AUDI R8 Gt,V10,"5,204 cc",602 hp,320 km/h,3.6 sec,"$253,290",Petrol,2,560 Nm



Spalten in Versicherungsdaten:
['SEX', 'INSR_BEGIN', 'INSR_END', 'EFFECTIVE_YR', 'INSR_TYPE', 'INSURED_VALUE', 'PREMIUM', 'OBJECT_ID', 'PROD_YEAR', 'SEATS_NUM', 'CARRYING_CAPACITY', 'TYPE_VEHICLE', 'CCM_TON', 'MAKE', 'USAGE', 'CLAIM_PAID']

Spalten in Fahrzeugdaten:
['Company Names', 'Cars Names', 'Engines', 'CC/Battery Capacity', 'HorsePower', 'Total Speed', 'Performance(0 - 100 )KM/H', 'Cars Prices', 'Fuel Types', 'Seats', 'Torque']


### sometimes seats are displayed as 2+2 for example instead of 4 in the ultimate cars dataset so here we fix it:

In [7]:
def normalize_seats(value):
    if pd.isna(value):
        return None
    if isinstance(value, str) and "+" in value:
        try:
            return sum(int(part.strip()) for part in value.split("+"))
        except:
            return None
    try:
        return int(value)
    except:
        return None

df_car["Seats_CLEAN"] = df_car["Seats"].apply(normalize_seats)


In [8]:
print(df_car[["Seats", "Seats_CLEAN"]].drop_duplicates().sort_values("Seats_CLEAN"))

     Seats  Seats_CLEAN
622      1          1.0
0        2          2.0
298      3          3.0
3        4          4.0
11     2+2          4.0
1        5          5.0
453      6          6.0
20       7          7.0
197      8          8.0
327      9          9.0
286     12         12.0
255     15         15.0
241     20         20.0
1046  215          NaN
1047   27          NaN
1048   26          NaN
1055   78          NaN
1056   29          NaN
1059  212          NaN


### prepare for join:

In [9]:
# Spalten vereinheitlichen
df_ins["MAKE_NORM"] = df_ins["MAKE"].str.strip().str.upper()
df_car["MAKE_NORM"] = df_car["Company Names"].str.strip().str.upper()

# Falls nicht schon gemacht:
df_ins["SEATS_NUM"] = pd.to_numeric(df_ins["SEATS_NUM"], errors="coerce")


In [13]:
def clean_price(price_string):
    """
    Wandelt Preisangaben wie "$1,200" oder "$12,000–$15,000" in float-Werte um.
    Gibt np.nan zurück bei ungültigen Eingaben.
    """
    import re
    import numpy as np

    try:
        if isinstance(price_string, str):
            parts = price_string.split("–")
            numbers = [float(re.sub(r"[^\d.]", "", p)) for p in parts if p.strip()]
            if len(numbers) == 2:
                return np.mean(numbers)
            elif len(numbers) == 1:
                return numbers[0]
    except:
        pass

    return np.nan


## Join the ultimate cars to the insurance data on the car brand, the number of seats and the nearest price to the insurance value:

In [16]:
# Datum vorbereiten
df_ins["INSR_BEGIN"] = pd.to_datetime(df_ins["INSR_BEGIN"], errors="coerce")

# Fahrzeugalter berechnen
df_ins["VEHICLE_AGE_AT_INSURE"] = df_ins["INSR_BEGIN"].dt.year - df_ins["PROD_YEAR"]

# Annahme: 12 % jährlicher Wertverlust
ANNUAL_LOSS_RATE = 0.12

# Zurückgerechneter Startpreis
df_ins["EST_ORIGINAL_VALUE"] = df_ins["INSURED_VALUE"] / ((1 - ANNUAL_LOSS_RATE) ** df_ins["VEHICLE_AGE_AT_INSURE"])


  df_ins["INSR_BEGIN"] = pd.to_datetime(df_ins["INSR_BEGIN"], errors="coerce")


In [17]:
# Merge auf MAKE + SEATS
merged_primary = df_ins[["OBJECT_ID", "MAKE_NORM", "SEATS_NUM", "EST_ORIGINAL_VALUE"]].merge(
    df_car,
    how="inner",
    left_on=["MAKE_NORM", "SEATS_NUM"],
    right_on=["MAKE_NORM", "Seats_CLEAN"],
    suffixes=("_ins", "_car")
)


# Preis bereinigen
merged_primary["CarPrice_CLEAN"] = merged_primary["Cars Prices"].apply(clean_price)

# Differenz berechnen
merged_primary["price_diff"] = abs(merged_primary["EST_ORIGINAL_VALUE"] - merged_primary["CarPrice_CLEAN"])

# Bestes Match pro Versicherungsfall
best_primary = merged_primary.sort_values("price_diff").groupby("OBJECT_ID").first().reset_index()


In [18]:
# 🔁 Fallback-Strategie: Join nur auf MAKE, mit Preisvergleich über EST_ORIGINAL_VALUE
unmatched_ids = df_ins.loc[~df_ins["OBJECT_ID"].isin(best_primary["OBJECT_ID"]), "OBJECT_ID"]
df_unmatched = df_ins[df_ins["OBJECT_ID"].isin(unmatched_ids)]

merged_fallback = df_unmatched.merge(
    df_car,
    how="inner",
    left_on="MAKE_NORM",
    right_on="MAKE_NORM",
    suffixes=("_ins", "_car")
)

# Fahrzeugpreis bereinigen
merged_fallback["CarPrice_CLEAN"] = merged_fallback["Cars Prices"].apply(clean_price)

# Differenz zum hochgerechneten Originalpreis berechnen
merged_fallback["price_diff"] = abs(merged_fallback["EST_ORIGINAL_VALUE"] - merged_fallback["CarPrice_CLEAN"])

# Bestes Match pro Fahrzeug
best_fallback = merged_fallback.sort_values("price_diff").groupby("OBJECT_ID").first().reset_index()


In [19]:
final_matches = pd.concat([best_primary, best_fallback], ignore_index=True)
print(final_matches)

        OBJECT_ID   MAKE_NORM  SEATS_NUM  EST_ORIGINAL_VALUE Company Names  \
0      5000017906      TOYOTA        4.0        2.993730e+06        TOYOTA   
1      5000017907  MITSUBISHI        4.0        2.319522e+06    Mitsubishi   
2      5000017908  MITSUBISHI        4.0        1.089622e+06    Mitsubishi   
3      5000017909      TOYOTA        4.0        1.816959e+06        TOYOTA   
4      5000017910  MITSUBISHI        4.0        1.285943e+06    Mitsubishi   
...           ...         ...        ...                 ...           ...   
86865  5001334142      TOYOTA       14.0        1.056000e+06        TOYOTA   
86866  5001336495      TOYOTA        8.0        1.159761e+07        TOYOTA   
86867  5001337146      TOYOTA        1.0        1.391526e+06        TOYOTA   
86868  5001337147      TOYOTA        1.0        1.391576e+06        TOYOTA   
86869  5001337149      TOYOTA        1.0        1.391576e+06        TOYOTA   

        Cars Names                     Engines CC/Battery Capac

In [20]:
matched_makes = final_matches["MAKE_NORM"].dropna().unique()
all_makes = df_ins["MAKE_NORM"].dropna().unique()
no_match_makes = sorted(set(all_makes) - set(matched_makes))

print("Marken ohne Fahrzeug-Match:", no_match_makes)


Marken ohne Fahrzeug-Match: ['*', '1982', '1984', '1985', '2 AXEL LOWBED', '2011', '3-AXEL DRAWBAR CARGO TRAILER', '3-AXLE DRAWBAR CARGO TRAILER', '330-30 TRAILER', '4WDTUAB TERES COPIC HANDLER', 'A', 'ABAY', 'ACHIVE', 'ACHIVER', 'ADDIS GEELY', 'ADDIS GELLY', 'ADGE', 'AEOLUS', 'AFRO', 'AGEL', 'AIRCARGO MOBILE TRUCK', 'ALAMI', 'ALFA', 'ALFAROMEO', 'AMBULANCE', 'AMI', 'APACH', 'APACHE', 'APACHE RTR', 'APACHERTE', 'APPACH EBS', 'APPACHE', 'ARBE EMERET', 'AREB EMERATE', 'ARSLAN', 'ARTICULATED', 'ARTICULATED DUMP TRUCK', 'ARTICULATED TRACTOR', 'ASNAKE ENGINERING', 'ASNAKE ENGNERING', 'ASTRA', 'ATOZ', 'ATZ', 'AU', 'AUMAN', 'AUTO', 'AUTOBUS', 'AUTOMOBIL', 'AUTOMOBILE', 'AWASH', 'AXION', 'B.AKEL', 'BACK LOADER', 'BAIC', 'BAIC AUTOMOBIL', 'BAJAJ', 'BAJAJI', 'BARTOLETI', 'BASHAN', 'BAYBEN FIRE FIGHTER', 'BAYBEN HIGHBAD', 'BAYBEN HIGHBAD TRAILER', 'BAYBEN TRUCK HIGHBED', 'BEBEN', 'BEBEN HIGHBAD', 'BEBEN SEMI TRAILER', 'BEBIEN TANKER', 'BEL TRACTOR', 'BELARUS', 'BELARUS TRACTOR', 'BELL', 'BELL TRA

In [21]:
# Alle Marken in Versicherungsdaten (vereinheitlicht)
makes_ins = df_ins["MAKE_NORM"].dropna().unique()

# Alle Marken in Fahrzeugdaten (vereinheitlicht)
makes_car = df_car["MAKE_NORM"].dropna().unique()

# Alle Marken aus Versicherungsdaten, die in Fahrzeugdaten fehlen
unmatched_makes = sorted(set(makes_ins) - set(makes_car))

print("Marken in Versicherungsdaten ohne Entsprechung in Fahrzeugdaten:")
for make in unmatched_makes:
    print("-", make)


Marken in Versicherungsdaten ohne Entsprechung in Fahrzeugdaten:
- *
- 1982
- 1984
- 1985
- 2 AXEL LOWBED
- 2011
- 3-AXEL DRAWBAR CARGO TRAILER
- 3-AXLE DRAWBAR CARGO TRAILER
- 330-30 TRAILER
- 4WDTUAB TERES COPIC HANDLER
- A
- ABAY
- ACHIVE
- ACHIVER
- ADDIS GEELY
- ADDIS GELLY
- ADGE
- AEOLUS
- AFRO
- AGEL
- AIRCARGO MOBILE TRUCK
- ALAMI
- ALFA
- ALFAROMEO
- AMBULANCE
- AMI
- APACH
- APACHE
- APACHE RTR
- APACHERTE
- APPACH EBS
- APPACHE
- ARBE EMERET
- AREB EMERATE
- ARSLAN
- ARTICULATED
- ARTICULATED DUMP TRUCK
- ARTICULATED TRACTOR
- ASNAKE ENGINERING
- ASNAKE ENGNERING
- ASTRA
- ATOZ
- ATZ
- AU
- AUMAN
- AUTO
- AUTOBUS
- AUTOMOBIL
- AUTOMOBILE
- AWASH
- AXION
- B.AKEL
- BACK LOADER
- BAIC
- BAIC AUTOMOBIL
- BAJAJ
- BAJAJI
- BARTOLETI
- BASHAN
- BAYBEN FIRE FIGHTER
- BAYBEN HIGHBAD
- BAYBEN HIGHBAD TRAILER
- BAYBEN TRUCK HIGHBED
- BEBEN
- BEBEN HIGHBAD
- BEBEN SEMI TRAILER
- BEBIEN TANKER
- BEL TRACTOR
- BELARUS
- BELARUS TRACTOR
- BELL
- BELL TRACTOR
- BEYBEN TRUCK
- BISHEFTU
- B

#### The insurance data is way bigger than the car data so there are some car companies that arent in the cars dataset

In [22]:
print("Alle Spalten im final_matches DataFrame:\n")
print(final_matches.columns.tolist())


Alle Spalten im final_matches DataFrame:

['OBJECT_ID', 'MAKE_NORM', 'SEATS_NUM', 'EST_ORIGINAL_VALUE', 'Company Names', 'Cars Names', 'Engines', 'CC/Battery Capacity', 'HorsePower', 'Total Speed', 'Performance(0 - 100 )KM/H', 'Cars Prices', 'Fuel Types', 'Seats', 'Torque', 'Seats_CLEAN', 'CarPrice_CLEAN', 'price_diff', 'SEX', 'INSR_BEGIN', 'INSR_END', 'EFFECTIVE_YR', 'INSR_TYPE', 'INSURED_VALUE', 'PREMIUM', 'PROD_YEAR', 'CARRYING_CAPACITY', 'TYPE_VEHICLE', 'CCM_TON', 'MAKE', 'USAGE', 'CLAIM_PAID', 'VEHICLE_AGE_AT_INSURE']


In [23]:
# Liste der Spalten, die entfernt werden sollen
cols_to_drop = [
    "MAKE", "Company Names", "Cars Names",
    "Cars Prices", "SEATS_NUM", "Seats",
    "price_diff"
]

# Neuen bereinigten DataFrame erstellen
final_df_cleaned = final_matches.drop(columns=cols_to_drop)



In [24]:
print("Alle Spalten im final_matches DataFrame:\n")
print(final_df_cleaned.columns.tolist())

Alle Spalten im final_matches DataFrame:

['OBJECT_ID', 'MAKE_NORM', 'EST_ORIGINAL_VALUE', 'Engines', 'CC/Battery Capacity', 'HorsePower', 'Total Speed', 'Performance(0 - 100 )KM/H', 'Fuel Types', 'Torque', 'Seats_CLEAN', 'CarPrice_CLEAN', 'SEX', 'INSR_BEGIN', 'INSR_END', 'EFFECTIVE_YR', 'INSR_TYPE', 'INSURED_VALUE', 'PREMIUM', 'PROD_YEAR', 'CARRYING_CAPACITY', 'TYPE_VEHICLE', 'CCM_TON', 'USAGE', 'CLAIM_PAID', 'VEHICLE_AGE_AT_INSURE']


In [25]:
# Stelle sicher, dass INSR_END als datetime erkannt wird
final_df_cleaned["INSR_END"] = pd.to_datetime(final_df_cleaned["INSR_END"], errors="coerce")

# Jetzt funktioniert der Vergleich
active_contracts = final_df_cleaned[final_df_cleaned["INSR_END"] > pd.Timestamp.today()]


  final_df_cleaned["INSR_END"] = pd.to_datetime(final_df_cleaned["INSR_END"], errors="coerce")


In [26]:
# Zeige z. B. die ersten 5 aktiven Verträge
display(active_contracts.head())
print(f"Aktive Verträge gefunden: {len(active_contracts)}")


Unnamed: 0,OBJECT_ID,MAKE_NORM,EST_ORIGINAL_VALUE,Engines,CC/Battery Capacity,HorsePower,Total Speed,Performance(0 - 100 )KM/H,Fuel Types,Torque,...,INSR_TYPE,INSURED_VALUE,PREMIUM,PROD_YEAR,CARRYING_CAPACITY,TYPE_VEHICLE,CCM_TON,USAGE,CLAIM_PAID,VEHICLE_AGE_AT_INSURE


Aktive Verträge gefunden: 0


In [27]:

# Annahme: 12 % jährlicher Wertverlust
ANNUAL_LOSS_RATE = 0.12

# Fahrzeugalter berechnen
df_ins["INSR_BEGIN"] = pd.to_datetime(df_ins["INSR_BEGIN"], errors="coerce")
df_ins["VEHICLE_AGE_AT_INSURE"] = df_ins["INSR_BEGIN"].dt.year - df_ins["PROD_YEAR"]

# Zurückgerechneter Startpreis
df_ins["EST_ORIGINAL_VALUE"] = df_ins["INSURED_VALUE"] / ((1 - ANNUAL_LOSS_RATE) ** df_ins["VEHICLE_AGE_AT_INSURE"])


In [28]:
# Durchschnittlicher Wertverlust pro Jahr (NaN-Schutz bei Alter = 0)
df_ins["avg_yearly_value"] = df_ins["EST_ORIGINAL_VALUE"] / df_ins["VEHICLE_AGE_AT_INSURE"]
df_ins["avg_yearly_value"].replace([np.inf, -np.inf], np.nan, inplace=True)
df_ins["avg_yearly_value"].fillna(0, inplace=True)


In [30]:
print(final_matches.columns)


Index(['OBJECT_ID', 'MAKE_NORM', 'SEATS_NUM', 'EST_ORIGINAL_VALUE',
       'Company Names', 'Cars Names', 'Engines', 'CC/Battery Capacity',
       'HorsePower', 'Total Speed', 'Performance(0 - 100 )KM/H', 'Cars Prices',
       'Fuel Types', 'Seats', 'Torque', 'Seats_CLEAN', 'CarPrice_CLEAN',
       'price_diff', 'SEX', 'INSR_BEGIN', 'INSR_END', 'EFFECTIVE_YR',
       'INSR_TYPE', 'INSURED_VALUE', 'PREMIUM', 'PROD_YEAR',
       'CARRYING_CAPACITY', 'TYPE_VEHICLE', 'CCM_TON', 'MAKE', 'USAGE',
       'CLAIM_PAID', 'VEHICLE_AGE_AT_INSURE'],
      dtype='object')


In [40]:
final_df_cleaned["power_to_weight"] = pd.to_numeric(final_df_cleaned["HorsePower"], errors="coerce") / pd.to_numeric(final_df_cleaned["CARRYING_CAPACITY"], errors="coerce")
final_df_cleaned["power_to_weight"].replace([np.inf, -np.inf], np.nan, inplace=True)
final_df_cleaned["power_to_weight"].fillna(0, inplace=True)


In [41]:
# Datum vorbereiten
final_df_cleaned["INSR_BEGIN"] = pd.to_datetime(final_df_cleaned["INSR_BEGIN"], errors="coerce")

# Alter des Fahrzeugs beim Versicherungsbeginn
final_df_cleaned["VEHICLE_AGE_AT_INSURE"] = final_df_cleaned["INSR_BEGIN"].dt.year - final_df_cleaned["PROD_YEAR"]

# Konstanten
ANNUAL_LOSS_RATE = 0.12

# Geschätzter Originalwert
final_df_cleaned["EST_ORIGINAL_VALUE"] = final_df_cleaned["INSURED_VALUE"] / (
    (1 - ANNUAL_LOSS_RATE) ** final_df_cleaned["VEHICLE_AGE_AT_INSURE"]
)

# Durchschnittlicher Wertverlust pro Jahr
final_df_cleaned["avg_yearly_value"] = final_df_cleaned["EST_ORIGINAL_VALUE"] / final_df_cleaned["VEHICLE_AGE_AT_INSURE"]

# Optional: Unendlichkeiten oder NaNs behandeln (bei Alter = 0)
final_df_cleaned["avg_yearly_value"].replace([np.inf, -np.inf], np.nan, inplace=True)
final_df_cleaned["avg_yearly_value"].fillna(0, inplace=True)


In [48]:
# HorsePower als float bereinigen
final_df_cleaned["HorsePower"] = final_df_cleaned["HorsePower"].astype(str).str.extract(r"(\d+\.?\d*)").astype(float)

In [49]:
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_squared_error, r2_score
from sklearn.model_selection import train_test_split
import joblib


# Kombiniere X und y in einen DataFrame
features = [
    "VEHICLE_AGE_AT_INSURE",
    "CarPrice_CLEAN",
    "avg_yearly_value",
    "HorsePower",
    "power_to_weight"
]

df_model = final_df_cleaned[features + ["PREMIUM"]].copy()

# Entferne Zeilen mit NaNs in Features oder Ziel
df_model = df_model.dropna()

# Split wieder herstellen
X = df_model[features]
y = df_model["PREMIUM"]

from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)


In [50]:
from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)


In [51]:
model = RandomForestRegressor(random_state=42)
model.fit(X_train, y_train)


In [52]:
from sklearn.metrics import mean_squared_error, r2_score

# Vorhersage auf Testdaten
y_pred = model.predict(X_test)

# RMSE und R² auswerten
rmse = mean_squared_error(y_test, y_pred, squared=False)
r2 = r2_score(y_test, y_pred)

print(f"RMSE: {rmse:.2f}")
print(f"R²: {r2:.3f}")


RMSE: 6479.06
R²: 0.651


In [53]:
import joblib
joblib.dump(model, "random_forest_insurance_model.pkl")


['random_forest_insurance_model.pkl']

In [54]:
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error, r2_score

# Modell initialisieren
lin_model = LinearRegression()

# Training
lin_model.fit(X_train, y_train)

# Vorhersage
y_pred_lin = lin_model.predict(X_test)

# Evaluation
rmse_lin = mean_squared_error(y_test, y_pred_lin, squared=False)
r2_lin = r2_score(y_test, y_pred_lin)

print("🔹 Linear Regression")
print(f"RMSE: {rmse_lin:.2f}")
print(f"R²:   {r2_lin:.3f}")


🔹 Linear Regression
RMSE: 7865.18
R²:   0.486


In [55]:
import joblib
joblib.dump(lin_model, "linear_regression_insurance_model.pkl")

['linear_regression_insurance_model.pkl']

In [56]:
from sklearn.ensemble import RandomForestRegressor
import joblib

model_rf = RandomForestRegressor(random_state=42)
model_rf.fit(X_train, y_train)

# Speichern
joblib.dump(model_rf, "random_forest_hf_compatible.pkl")


['random_forest_hf_compatible.pkl']