In [9]:
import pandas as pd
import numpy as np
import joblib
import os
from sklearn.model_selection import train_test_split
from sklearn.ensemble import IsolationForest
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import accuracy_score, classification_report

In [11]:
df = pd.read_csv('../data/sample_telemetry.csv')
print(df.head())


             timestamp  battery_temp  solar_power    gyro_x  fuel_pressure
0  2025-01-01 00:00:00     30.721154     6.954021 -0.051763     149.350656
1  2025-01-01 00:00:01     30.011417     6.903807 -0.088838     149.152782
2  2025-01-01 00:00:02     29.927526     6.927261 -0.002699     149.916516
3  2025-01-01 00:00:03     30.579043     6.951232  0.005630     150.389920
4  2025-01-01 00:00:04     30.272814     6.994982 -0.073914     150.154612


In [12]:
def preprocess(df):
    # Remove Null Values
    df = df.dropna()

    features = ["battery_temp", "solar_power", "gyro_x", "fuel_pressure"]
    
    X = df[features]

    # If labels exist (for evaluation)
    y = df["label"] if "label" in df.columns else None

    # Scaling
    scaler = StandardScaler()
    X_scaled = scaler.fit_transform(X)

    return X_scaled, y, scaler, features

In [13]:
def split_data(X, y=None):
    if y is not None:
        return train_test_split(X, y, test_size=0.2, random_state=42)
    return X, None, X, None  # If no labels, train on everything


In [14]:
def train_model(X_train):
    model = IsolationForest(contamination=0.01, random_state=42)
    model.fit(X_train)
    return model

In [15]:
def evaluate_model(model, X_test, y_test):
    if y_test is None:
        print("No labels found â†’ Skipping evaluation.")
        return

    preds = model.predict(X_test)
    preds = [1 if p == -1 else 0 for p in preds]

    print("\nAccuracy:", accuracy_score(y_test, preds))
    print("\nClassification Report:\n", classification_report(y_test, preds))

In [16]:
def save_model(model, scaler, features):
    os.makedirs("models", exist_ok=True)

    joblib.dump({
        "model": model,
        "scaler": scaler,
        "features": features
    }, "models/model.pkl")

    print("\nModel saved to models/model.pkl")

In [17]:
def run_pipeline():
    df = load_data()

    X, y, scaler, features = preprocess(df)

    X_train, X_test, y_train, y_test = split_data(X, y)

    model = train_model(X_train)

    evaluate_model(model, X_test, y_test)

    save_model(model, scaler, features)

    print("Training Pipeline Completed Successfully!")

