In [1]:
import pandas as pd

df = pd.read_csv("../data/X_test.csv")
df.head(20).to_csv("../data/new_data.csv", index=False)

In [None]:
# predict_new_data.py
# -------------------------
# Uporabi trenirani model za napovedovanje na novi množici

import pandas as pd
import joblib
import os

# -------------------------
# 1. Pot do modela in podatkov
# -------------------------
MODEL_PATH = "../models/random_forest_model.pkl"  # spremeni, če imaš timestampirano verzijo
NEW_DATA_PATH = "../data/new_data.csv"  # datoteka brez 'status' labela
OUTPUT_PATH = "../data/predictions.csv"

# -------------------------
# 2. Naloži model
# -------------------------
if not os.path.exists(MODEL_PATH):
    raise FileNotFoundError(f"Model not found at {MODEL_PATH}")

model = joblib.load(MODEL_PATH)
print("✅ Model uspešno naložen.")

# -------------------------
# 3. Naloži podatke in pripravi
# -------------------------
X_new = pd.read_csv(NEW_DATA_PATH)

if 'timestamp' in X_new.columns:
    X_new = X_new.drop(columns=['timestamp'])

print(f"🔢 Naloženih primerov: {X_new.shape[0]}")

# -------------------------
# 4. Preveri stolpce in očisti, če je treba
# -------------------------
model_features = model.feature_names_in_
missing = [col for col in model_features if col not in X_new.columns]
extra = [col for col in X_new.columns if col not in model_features]

if missing:
    print("⚠️ Manjkajoči stolpci v new_data:", missing)
    raise ValueError("Napoved ni mogoča zaradi manjkajočih značilnic.")

if extra:
    print("ℹ️ Odstranjujem dodatne stolpce:", extra)

# Obreži na točno tiste stolpce, ki jih model pričakuje
X_new = X_new[model_features]

# -------------------------
# 5. Napoved
# -------------------------
predictions = model.predict(X_new)

# -------------------------
# 6. Shrani rezultate
# -------------------------
pred_df = pd.DataFrame({"predicted_status": predictions})
pred_df.to_csv(OUTPUT_PATH, index=False)

print(f"📄 Napovedi shranjene v: {OUTPUT_PATH}")


✅ Model uspešno naložen.
🔢 Naloženih primerov: 20
📄 Napovedi shranjene v: ../data/predictions.csv


https://scikit-learn.org/stable/model_persistence.html#security-maintainability-limitations
https://scikit-learn.org/stable/model_persistence.html#security-maintainability-limitations


In [3]:
# Primerjaj feature-je z originalnim modelom
model_features = model.feature_names_in_
missing = [col for col in model_features if col not in X_new.columns]
extra = [col for col in X_new.columns if col not in model_features]

print("📉 Manjkajoči stolpci:", missing)
print("🧪 Dodatni stolpci:", extra)


📉 Manjkajoči stolpci: []
🧪 Dodatni stolpci: ['max_line_loading_percent_basecase', 'min_bus_voltage_pu_basecase', 'max_bus_voltage_pu_basecase', 'max_line_loading_percent_contingency', 'min_bus_voltage_pu_contingency', 'max_bus_voltage_pu_contingency']
