# Fibre-sensing classification

## Analiza eksploracyjna

In [None]:
import pandas as pd
import seaborn as sns
import numpy as np
import matplotlib.pyplot as plt
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.neighbors import KNeighborsClassifier

### Wczytanie zbioru danych z etykietami

In [None]:
train_dataset = pd.read_csv('train_dataset.csv')

train_dataset.head()

### Sprawdzenie klas występujących w zbiorze treningowym

In [None]:
class_mapping = {
    0: 'car',
    1: 'construction works',
    2: 'running',
    3: 'going over the fence',
    4: 'cable manipulation',
    5: 'walking',
    6: 'opening/closing manhole',
    7: 'longboard',
    8: 'regular (background noise)'
}

train_dataset['label'].unique()

In [None]:
class_mapping_df = pd.DataFrame(list(class_mapping.items()), columns=['class_id', 'class_name'])

class_mapping_df

In [None]:
sns.set_theme(style="whitegrid")
plt.figure(figsize=(8, 5))
ax = sns.countplot(data=train_dataset, x='label', palette='viridis', hue='label', legend=False)

plt.title('Liczność poszczególnych klas w zbiorze treningowym')
plt.xlabel('Klasa')
plt.ylabel('Liczba wystąpień')

plt.show()

### Wczytanie zbioru danych z cechami

In [None]:
train_features = np.load('train_features.npy')

In [None]:
print(train_features.shape)
print(len(train_dataset))

In [None]:
train_features[0]

In [None]:
zero_count = np.count_nonzero(train_features == 0)
zero_count

In [None]:
train_features.min(), train_features.max()

In [None]:
plt.figure(figsize=(10, 5))
plt.plot(train_features[0])
plt.title('Feature values for first sample')
plt.xlabel('Time')
plt.ylabel('Strain value')
plt.grid()
plt.show()

In [None]:
row_means = train_features.mean(axis=1)

In [None]:
from utils import show_histogram

show_histogram(
    data=row_means, 
    title='Histogram of row means (all samples)', 
    xlabel='Mean feature value', 
    ylabel='Count'
)

In [None]:
row_mins = train_features.min(axis=1)

In [None]:
show_histogram(
    data=row_mins, 
    title='Histogram of row mins (all samples)', 
    xlabel='Min feature value', 
    ylabel='Count'
)

In [None]:
row_maxes = train_features.max(axis=1)

In [None]:
show_histogram(
    data=row_maxes, 
    title='Histogram of row maxes (all samples)', 
    xlabel='Max feature value', 
    ylabel='Count'
)

In [None]:
row_stds = train_features.std(axis=1)

In [None]:
show_histogram(
    data=row_stds, 
    title='Histogram of row stds (all samples)', 
    xlabel='Std feature value', 
    ylabel='Count'
)

## Transformacje danych

### Stworzenie zbioru treningowego składającego się z podstawowych agregatów

In [None]:
train_features_df = pd.DataFrame({
    'mean': row_means,
    'min': row_mins,
    'max': row_maxes,
    'std': row_stds,
})

train_features_df.head()

In [None]:
train_labels_df = train_dataset[['label']].copy()
train_labels_df.head()

In [None]:
from utils import combine_features_and_labels

train_data_df = combine_features_and_labels(train_features_df, train_labels_df)
train_data_df.head()

### Standaryzacja atrybutów

In [None]:
scaler = StandardScaler()

features_df = train_data_df.copy()
features_df = features_df.drop('label', axis=1)

scaled_features = scaler.fit_transform(features_df)

scaled_train_data_df = pd.DataFrame(scaled_features, columns=features_df.columns)
scaled_train_data_df.loc[:, 'label'] = train_data_df['label']

scaled_train_data_df.head()

## Trening modelu ML

### Model 1 (KNN)

Dane treningowe: podstawowe agregaty wyliczone na surowych danych

* średnia
* min
* max 
* odchylenie standardowe

#### Podział na zbiór treningowy i walidacyjny

In [None]:
X_train, X_val, y_train, y_val = train_test_split(
    scaled_train_data_df.iloc[:, scaled_train_data_df.columns != 'label'],
    scaled_train_data_df['label'],
    test_size=0.2, 
    random_state=42, 
    stratify=scaled_train_data_df['label']
)

In [None]:
len(X_train), len(X_val)

In [None]:
X_train.head()

In [None]:
y_train.head()

#### Trening i ewaluacja podstawowego modelu KNN

In [None]:
class KNNClassifierModel:
    def __init__(self, n_neighbors=5):
        self.model = KNeighborsClassifier(n_neighbors=n_neighbors)
    
    def train(self, X, y):
        self.model.fit(X, y)
    
    def predict(self, X):
        return self.model.predict(X)
    
    def evaluate(self, X, y):
        return self.model.score(X, y)

In [None]:
knn_classifier = KNNClassifierModel(n_neighbors=5)
knn_classifier.train(X_train, y_train)
y_pred = knn_classifier.predict(X_val)

accuracy = knn_classifier.evaluate(X_val, y_val)
print(f"Validation Accuracy: {accuracy:.2f}")

In [None]:
from utils import print_classification_report
print_classification_report(y_val, y_pred, class_mapping)

In [None]:
from utils import show_confusion_matrix

show_confusion_matrix(y_val, y_pred, class_mapping)

### Model 2

## Uzyskane metryki i rezultaty (TODO)