In [1]:
import pickle
import numpy as np

# Load datasets
X_train = np.load('../FinalDataset/X_train.npy')
X_test = np.load('../FinalDataset/X_test.npy')
y_train = np.load('../FinalDataset/y_train.npy')
y_test = np.load('../FinalDataset/y_test.npy')

print("X_train shape: ", X_train.shape)
print("X_test shape: ", X_test.shape)
print("y_train shape: ", y_train.shape)
print("y_test shape: ", y_test.shape)

X_train shape:  (126, 500, 3)
X_test shape:  (54, 500, 3)
y_train shape:  (126,)
y_test shape:  (54,)


In [2]:

from sklearn.feature_selection import VarianceThreshold
from sklearn.preprocessing import StandardScaler
from sklearn.decomposition import PCA
import tsfel

# Extract features using TSFEL
cfg = tsfel.get_features_by_domain()  # Get all features by default
X_train_features = tsfel.time_series_features_extractor(cfg, X_train, verbose=1, fs=50)
X_test_features = tsfel.time_series_features_extractor(cfg, X_test, verbose=1, fs=50)

# Remove highly correlated features
correlated_features = tsfel.correlated_features(X_train_features)
X_train_filtered = X_train_features.drop(correlated_features, axis=1)
X_test_filtered = X_test_features.drop(correlated_features, axis=1)

# Remove low variance features
variance_selector = VarianceThreshold(threshold=0)
X_train_reduced = variance_selector.fit_transform(X_train_filtered)
X_test_reduced = variance_selector.transform(X_test_filtered)

# Normalize features
scaler = StandardScaler()
X_train_normalized = scaler.fit_transform(X_train_reduced)
X_test_normalized = scaler.transform(X_test_reduced)

# Apply PCA

pca = PCA(n_components=20)
X_train_pca_20 = pca.fit_transform(X_train_normalized)
X_test_pca_20 = pca.transform(X_test_normalized)

print("X_train_pca shape: ", X_train_pca_20.shape)
print("X_test_pca shape: ", X_test_pca_20.shape)

*** Feature extraction started ***



*** Feature extraction finished ***
*** Feature extraction started ***



*** Feature extraction finished ***
X_train_pca shape:  (126, 20)
X_test_pca shape:  (54, 20)


In [3]:
np.savez('processed_data.npz',
         X_train_pca_20=X_train_pca_20,
         X_test_pca_20=X_test_pca_20,
         y_train=y_train,
         y_test=y_test)

In [4]:
import pandas as pd

print("PCA Explained Variance: ", pca.explained_variance_ratio_)
print("PCA Explained Variance Sum: ", sum(pca.explained_variance_ratio_))

PCA Explained Variance:  [0.28703436 0.0377949  0.03279612 0.03077332 0.02917733 0.0260732
 0.02319608 0.02129447 0.020463   0.01975971 0.01825382 0.01780579
 0.01663475 0.01639236 0.01615722 0.01486602 0.0144819  0.01347625
 0.01301564 0.01278159]
PCA Explained Variance Sum:  0.6822278495259165
