In [None]:
import sys
import os
from pathlib import Path
import joblib

# Ustawienie ścieżki do folderu głównego projektu
project_root = os.path.abspath("..")
if project_root not in sys.path:
    sys.path.append(project_root)

# Importowanie modułów
from data_loader import (
    data_loader,
    binned_distr,
    dev_mad_var,
    features_accelerometer,
    features_cosine,
    features_freq,
    features_temporal,
    vector_magnitude,
    peak_features,
)

import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report

# Procesor danych
data_processor = data_loader.TimeWindowSegmenter(
    df_path="../data_loader/real_world_2016.parquet",
    window_size=10,
    step_size=4,
    source_sampling_rate=50,
    time_column="timestamp",
    id_column='person_id',
    activity_column='activity_label',
    clean_columns=False,
    fix_timestamps=False,
    acc_columns=('acc_x', 'acc_y', 'acc_z'),
    gyr_columns=('gyr_x', 'gyr_y', 'g_z')
)
data_processor.resample_to(25)

# Funkcja do ekstrakcji cech
def extract_features_from_window(window, fs=25, axes=['acc_x', 'acc_y', 'acc_z', 'gyr_x', 'gyr_y', 'gyr_z']):
    features = {}
    
    freq_funcs = [
        ('dom_freq', features_freq.dominant_frequency, [fs]),    
        ('entropy', features_freq.spectral_entropy, [fs]),       
        ('energy', features_freq.spectral_energy, []),           
        ('centroid', features_freq.spectral_centroid, [fs]),     
        ('bandwidth', features_freq.spectral_bandwidth, [fs]),   
        ('flatness', features_freq.spectral_flatness, [fs]),     
        ('slope', features_freq.spectral_slope, [fs]),           
        ('rolloff', features_freq.spectral_rolloff, [fs]),       
        ('band_ratio', features_freq.band_energy_ratio, [fs])    
    ]
    
    for axis in axes:
        signal = window[axis].astype(float).values
        for fname, func, extra_args in freq_funcs:
            features[f"{axis}_{fname}"] = func(signal, *extra_args)

    binned = binned_distr.calculate_binned_distribution_multi_axis(window=window, bins=10, axes=axes)
    binned_flat = {
        f"{axis}_bin{i}": val
        for axis, bin_vals in binned.items()
        for i, val in enumerate(bin_vals)
        }
    features.update(binned_flat)

    features.update(dev_mad_var.calculate_statistics_multi_axis(window=window, axes=axes))
    features.update(features_accelerometer.extract_acc_features(window=window, axes=axes))
    features.update(features_cosine.extract_cosine_distances(window=window, axes=axes))
    features.update(features_temporal.extract_temporal_features(window=window, axes=axes[3:]))
    features["vector_acc_mag"] = vector_magnitude.calculate_accelerometer_magnitude(window, axes=axes[:3])
    features["vector_gyr_mag"] = vector_magnitude.calculate_gyroscope_magnitude(window, axes=axes[3:])
    features.update(peak_features.extract_peak_features(window_df=window, sampling_rate=fs, axes=axes))

    return features

# Ekstrakcja danych i etykiet
X_dicts = []
Y = []

for window in data_processor.segment():
    feats = extract_features_from_window(window)
    label = window[data_processor.activity_column].iloc[0]
    X_dicts.append(feats)
    Y.append(label)

# Konwersja do DataFrame
X_df = pd.DataFrame(X_dicts).fillna(0)
Y = np.array(Y)

# Normalizacja
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X_df)

# Podział na zbiór treningowy i walidacyjny
X_train, X_val, Y_train, Y_val = train_test_split(
    X_scaled, Y, test_size=0.2, random_state=42, stratify=Y
)

# Trenowanie KNN
knn = KNeighborsClassifier(n_neighbors=11, metric='euclidean')
knn.fit(X_train, Y_train)
Y_pred = knn.predict(X_val)

# Wyniki
accuracy = accuracy_score(Y_val, Y_pred)
print(f'Accuracy: {accuracy:.2f}')

conf_matrix = confusion_matrix(Y_val, Y_pred)
print("Confusion Matrix:")
print(conf_matrix)

class_report = classification_report(Y_val, Y_pred)
print("Classification Report:")
print(class_report)

# Zapisywanie modelu
joblib.dump(knn, 'knn_model.pkl')
print("Model saved as 'knn_model.pkl'")


Resampling: 100%|██████████| 71/71 [00:04<00:00, 14.48it/s]
Segmenting:  99%|█████████▊| 71/72 [00:50<00:00,  1.41it/s]


Accuracy: 0.87
Confusion Matrix:
[[145   1   1   1  12]
 [ 11 214   9  14   3]
 [  5   0 208  18   2]
 [ 20   5  15 185   6]
 [ 22   0   0   0 214]]
Classification Report:
              precision    recall  f1-score   support

  climbingup       0.71      0.91      0.80       160
     running       0.97      0.85      0.91       251
     sitting       0.89      0.89      0.89       233
    standing       0.85      0.80      0.82       231
     walking       0.90      0.91      0.90       236

    accuracy                           0.87      1111
   macro avg       0.87      0.87      0.87      1111
weighted avg       0.88      0.87      0.87      1111

Model saved as 'knn_model.pkl'
