# Użycie wcześniej wytrenowanego modelu do nowych danych

In [1]:
# KOMÓRKA 0 – funkcje pomocnicze (takie same jak w notebooku treningowym)

import pandas as pd
import numpy as np

LSB_TO_G = 0.0039  # FULL_RES=1

def load_xyz_from_csv(path):
    df = pd.read_csv(path)
    cols_lower = {c.lower(): c for c in df.columns}

    # czas
    time_col = None
    for key, orig in cols_lower.items():
        if "time" in key or "czas" in key:
            time_col = orig
            break

    x_col = y_col = z_col = None
    for key, orig in cols_lower.items():
        if x_col is None and key.startswith("x"):
            x_col = orig
        if y_col is None and key.startswith("y"):
            y_col = orig
        if z_col is None and key.startswith("z"):
            z_col = orig

    if time_col is None or x_col is None or y_col is None or z_col is None:
        raise ValueError(
            f"Nie mogę znaleźć kolumn w {path}, mam: {df.columns.tolist()}"
        )

    t  = df[time_col].to_numpy()
    ax = df[x_col].to_numpy() * LSB_TO_G
    ay = df[y_col].to_numpy() * LSB_TO_G
    az = df[z_col].to_numpy() * LSB_TO_G

    dt = np.mean(np.diff(t))
    Fs = 1.0 / dt
    return t, ax, ay, az, Fs


# parametry detekcji (możesz skopiować dokładnie z poprzedniego nb)
BASELINE_LEN_S   = 0.5
SMOOTH_LEN_S     = 0.05
STD_THRESHOLD    = 3.0
DELAY_AFTER_JUMP = 1.0
WINDOW_LEN_S     = 12.0

def detect_steady_window(t, ax, ay, az,
                         baseline_len_s=BASELINE_LEN_S,
                         smooth_len_s=SMOOTH_LEN_S,
                         std_threshold=STD_THRESHOLD,
                         delay_after_jump=DELAY_AFTER_JUMP,
                         window_len_s=WINDOW_LEN_S):
    mag = np.sqrt(ax**2 + ay**2 + az**2)

    t0 = t[0]
    baseline_mask = t <= (t0 + baseline_len_s)
    baseline = mag[baseline_mask]
    mu = baseline.mean()
    sigma = baseline.std()
    thresh = mu + std_threshold * sigma

    dt = np.mean(np.diff(t))
    Fs = 1.0 / dt
    win_samples = int(round(smooth_len_s * Fs))
    if win_samples < 1:
        win_samples = 1
    kernel = np.ones(win_samples) / win_samples
    mag_smooth = np.convolve(mag, kernel, mode="same")

    idx_candidates = np.where(mag_smooth > thresh)[0]
    if len(idx_candidates) == 0:
        t_start = t0 + 1.0
    else:
        idx_event = idx_candidates[0]
        t_event   = t[idx_event]
        t_start   = t_event + delay_after_jump

    t_end = t_start + window_len_s
    if t_end > t[-1]:
        t_end = t[-1]
        t_start = t_end - window_len_s
        if t_start < t0:
            t_start = t0

    mask_seg = (t >= t_start) & (t <= t_end)

    return (t[mask_seg],
            ax[mask_seg],
            ay[mask_seg],
            az[mask_seg],
            (t_start, t_end),
            mag,
            mag_smooth,
            thresh)


# CWT osi Y
import pywt

def compute_cwt_freq_y(y, Fs, fmin=0.5, fmax=400, n_freqs=256,
                       wavelet="cmor3.5-1.0", normalize=True):
    y = y - y.mean()
    if normalize:
        rms = np.sqrt((y**2).mean())
        if rms > 0:
            y = y / rms

    dt = 1.0 / Fs
    freqs = np.linspace(fmin, fmax, n_freqs)
    scales = pywt.frequency2scale(wavelet, freqs * dt)

    coeffs, _ = pywt.cwt(y, scales, wavelet, sampling_period=dt)
    power = np.abs(coeffs)
    power_db = 20 * np.log10(power + 1e-12)
    return freqs, power_db


# >>> TO JEST KLUCZOWA FUNKCJA – cechy MUSZĄ mieć te same nazwy jak przy trenowaniu <<<
from scipy.stats import kurtosis

def extract_features_from_cwt(freqs, power_db, bands):
    """
    Zwraca słownik z cechami:
      E_*, Var_*, Kurt_*, centroid_hz, mean_db, std_db, spec_flatness, rolloff_hz
    Dokładnie takie nazwy są zapisane w meta["feature_cols"].
    """
    feats = {}

    # uśrednione widmo (po czasie)
    mean_spec = power_db.mean(axis=1)       # (n_freq,)
    lin_spec  = 10 ** (mean_spec / 20.0)    # ~amplituda

    # cechy pasmowe
    for (f1, f2) in bands:
        label = f"{f1:.0f}_{f2:.0f}"
        mask = (freqs >= f1) & (freqs <= f2)

        if not np.any(mask):
            feats[f"E_{label}"]    = np.nan
            feats[f"Var_{label}"]  = np.nan
            feats[f"Kurt_{label}"] = np.nan
            continue

        band_vals = power_db[mask, :].ravel()   # dB w paśmie

        feats[f"E_{label}"]   = band_vals.mean()
        feats[f"Var_{label}"] = band_vals.var()

        if band_vals.size > 1:
            feats[f"Kurt_{label}"] = kurtosis(band_vals, fisher=False, bias=False)
        else:
            feats[f"Kurt_{label}"] = np.nan

    # centroid częstotliwościowy
    w_sum = lin_spec.sum()
    if w_sum > 0:
        feats["centroid_hz"] = (freqs * lin_spec).sum() / w_sum
    else:
        feats["centroid_hz"] = np.nan

    # globalne statystyki widma
    feats["mean_db"] = mean_spec.mean()
    feats["std_db"]  = mean_spec.std()

    # spectral flatness
    pos = lin_spec > 0
    if np.any(pos):
        gmean = np.exp(np.log(lin_spec[pos]).mean())
        amean = lin_spec[pos].mean()
        feats["spec_flatness"] = gmean / (amean + 1e-12)
    else:
        feats["spec_flatness"] = np.nan

    # spectral rolloff (np. 95% energii)
    psd = lin_spec ** 2
    cumsum = np.cumsum(psd)
    if cumsum[-1] > 0:
        thresh = 0.95 * cumsum[-1]
        idx = np.searchsorted(cumsum, thresh)
        idx = min(idx, len(freqs) - 1)
        feats["rolloff_hz"] = freqs[idx]
    else:
        feats["rolloff_hz"] = np.nan

    return feats


In [2]:
from joblib import load
import json
from pathlib import Path

MODEL_DIR = Path("models")

model_path = MODEL_DIR / "propeller_rf_cwt.joblib"
meta_path  = MODEL_DIR / "propeller_rf_cwt_meta.json"

clf = load(model_path)
print("Wczytano model z:", model_path)

with open(meta_path, "r", encoding="utf-8") as f:
    meta = json.load(f)

feature_cols = meta["feature_cols"]
bands = [tuple(b) for b in meta["bands"]]   # JSON da listy, robimy tuplę

print("Cechy używane przez model:", feature_cols)
print("Pasma:", bands)


Wczytano model z: models\propeller_rf_cwt.joblib
Cechy używane przez model: ['E_200_400', 'Var_200_400', 'Var_40_100', 'E_0_40', 'E_40_100', 'centroid_hz', 'E_100_200', 'mean_db', 'spec_flatness', 'rolloff_hz']
Pasma: [(0.5, 40), (40, 100), (100, 200), (200, 400)]


In [3]:
import pandas as pd
import numpy as np
from pathlib import Path

def predict_material_for_files(csv_paths):
    """
    Dla listy plików CSV:
      - wycina ustaloną pracę (detect_steady_window),
      - dzieli na 2 połówki,
      - liczy CWT + cechy (extract_features_from_cwt),
      - używa zapisanego modelu clf do predykcji.
    Zwraca: DataFrame z predykcjami dla każdej połówki
            oraz podsumowanie majority vote na plik.
    """
    rows = []

    for path in csv_paths:
        path = Path(path)
        print(f"Przetwarzam: {path}")

        # 1) wczytanie + ustalona praca
        t, ax, ay, az, Fs = load_xyz_from_csv(path)
        t_seg, ax_seg, ay_seg, az_seg, (t_start, t_end), mag, mag_smooth, thresh = \
            detect_steady_window(t, ax, ay, az)

        # 2) podział na dwie połówki jak przy treningu
        N   = len(ay_seg)
        mid = N // 2
        halves = [
            (t_seg[:mid],  ay_seg[:mid]),
            (t_seg[mid:], ay_seg[mid:]),
        ]

        for part_idx, (t_part, y_part) in enumerate(halves):
            # 3) CWT osi Y
            freqs, power_db = compute_cwt_freq_y(
                y_part, Fs,
                fmin=0.5,
                fmax=400,
                n_freqs=256
            )

            # 4) cechy
            feat_dict = extract_features_from_cwt(freqs, power_db, bands)

            row_feat = {
                "file_path": str(path),
                "part": part_idx,
            }
            row_feat.update(feat_dict)
            rows.append(row_feat)

    df_parts = pd.DataFrame(rows)

    # 5) macierz cech w tej samej kolejności jak przy treningu
    X_new = df_parts[feature_cols].values

    # 6) predykcja + prawdopodobieństwa
    y_pred = clf.predict(X_new)
    proba  = clf.predict_proba(X_new)

    df_parts["pred_material"] = y_pred
    df_parts["proba_fabryczne"] = proba[:, list(clf.classes_).index("fabryczne")]
    df_parts["proba_kupione"]   = proba[:, list(clf.classes_).index("kupione")]

    # 7) majority vote na poziomie pliku
    summary = (
        df_parts.groupby("file_path")["pred_material"]
                .agg(lambda s: s.value_counts().idxmax())
                .reset_index(name="pred_material_majority")
    )

    return df_parts, summary


In [4]:
from pathlib import Path

new_paths = [Path("data/daneTestowe/rpm_i_adxL_test.csv")]

df_parts, df_summary = predict_material_for_files(new_paths)

print("Predykcje dla poówek:")
display(df_parts[["file_path", "part", "pred_material",
                  "proba_fabryczne", "proba_kupione"]])


Przetwarzam: data\daneTestowe\rpm_i_adxL_test.csv
Predykcje dla poówek:


Unnamed: 0,file_path,part,pred_material,proba_fabryczne,proba_kupione
0,data\daneTestowe\rpm_i_adxL_test.csv,0,kupione,0.3125,0.6875
1,data\daneTestowe\rpm_i_adxL_test.csv,1,kupione,0.3225,0.6775
