In [1]:
# Load decision tree model using pickel
from sklearn.tree import DecisionTreeClassifier
import numpy as np
import os
import sklearn.metrics
import seaborn as sns
from sklearn.metrics import confusion_matrix
import matplotlib.pyplot as plt
import seaborn as sns

In [2]:
# Load the .npy files from the parent directory
X_train = np.load(os.path.join('..', 'FinalDataset', 'X_train.npy'))
y_train = np.load(os.path.join('..', 'FinalDataset', 'y_train.npy'))

X_test = np.load(os.path.join('..', 'FinalDataset', 'X_test.npy'))
y_test = np.load(os.path.join('..', 'FinalDataset', 'y_test.npy'))

print(f"X_train: {X_train.shape}")
print(f"X_test: {X_test.shape}")
print(f"y_train: {y_train.shape}")
print(f"y_test: {y_test.shape}")



X_train: (126, 500, 3)
X_test: (54, 500, 3)
y_train: (126,)
y_test: (54,)


In [3]:
import tsfel
import numpy as np
import pandas as pd
from sklearn.decomposition import PCA  
from sklearn.preprocessing import StandardScaler
from sklearn.feature_selection import VarianceThreshold

# Extract features using TSFEL
cfg = tsfel.get_features_by_domain()  # Get all features by default
X_train_tsfel = tsfel.time_series_features_extractor(cfg, X_train, verbose=1, fs=50)
X_test_tsfel = tsfel.time_series_features_extractor(cfg, X_test, verbose=1, fs=50)

print(f"X_train_tsfel: {X_train_tsfel.shape}")
print(f"X_test_tsfel: {X_test_tsfel.shape}")
print(f"y_train: {y_train.shape}")
print(f"y_test: {y_test.shape}")

# Convert to DataFrame to retain column names
X_train_df = pd.DataFrame(X_train_tsfel)
X_test_df = pd.DataFrame(X_test_tsfel)

# Remove highly correlated features
correlated_features = tsfel.correlated_features(X_train_df)
X_train_filtered_df = X_train_df.drop(correlated_features, axis=1)
X_test_filtered_df = X_test_df.drop(correlated_features, axis=1)

# Remove low variance features
variance_selector = VarianceThreshold(threshold=0)
X_train_reduced_df = pd.DataFrame(variance_selector.fit_transform(X_train_filtered_df), columns=X_train_filtered_df.columns[variance_selector.get_support()])
X_test_reduced_df = pd.DataFrame(variance_selector.transform(X_test_filtered_df), columns=X_train_filtered_df.columns[variance_selector.get_support()])

print(f"X_train_reduced_df:{X_train_reduced_df.to_numpy().shape}")

# Normalize features
scaler = StandardScaler()
X_train_normalized = scaler.fit_transform(X_train_reduced_df)
X_test_normalized = scaler.transform(X_test_reduced_df)

# Convert to DataFrame to retain column names
X_train_normalized_df = pd.DataFrame(X_train_normalized, columns=X_train_reduced_df.columns)
X_test_normalized_df = pd.DataFrame(X_test_normalized, columns=X_train_reduced_df.columns)

# Display column names
print("Column names in X_test_normalized:")
print(X_test_normalized_df.columns.tolist())



# Apply PCA
pca = PCA(n_components=20)
X_train_pca_20 = pca.fit_transform(X_train_normalized)
X_test_pca_20 = pca.transform(X_test_normalized)

print("X_train_pca shape: ", X_train_pca_20.shape)
print("X_test_pca shape: ", X_test_pca_20.shape)


*** Feature extraction started ***



*** Feature extraction finished ***
*** Feature extraction started ***



*** Feature extraction finished ***
X_train_tsfel: (126, 1152)
X_test_tsfel: (54, 1152)
y_train: (126,)
y_test: (54,)
X_train_reduced_df:(126, 803)
Column names in X_test_normalized:
['0_Absolute energy', '0_Autocorrelation', '0_Centroid', '0_ECDF Percentile_0', '0_Entropy', '0_FFT mean coefficient_0', '0_FFT mean coefficient_1', '0_FFT mean coefficient_10', '0_FFT mean coefficient_100', '0_FFT mean coefficient_101', '0_FFT mean coefficient_102', '0_FFT mean coefficient_103', '0_FFT mean coefficient_104', '0_FFT mean coefficient_105', '0_FFT mean coefficient_106', '0_FFT mean coefficient_107', '0_FFT mean coefficient_108', '0_FFT mean coefficient_109', '0_FFT mean coefficient_11', '0_FFT mean coefficient_110', '0_FFT mean coefficient_111', '0_FFT mean coefficient_112', '0_FFT mean coefficient_113', '0_FFT mean coefficient_114', '0_FFT mean coefficient_115', '0_FFT mean coefficient_116', '0_FFT mean coefficient_117', '0_FFT mean coefficient_118', '0_FFT mean coefficient_119', '0_FFT me

In [4]:
dt2=DecisionTreeClassifier(random_state=0)
dt2.fit(X_train_pca_20,y_train)

In [5]:
y_tsfel_pred=dt2.predict(X_test_pca_20)
print("Accuracy: ",sklearn.metrics.accuracy_score(y_test, y_tsfel_pred))
print("Precision: ",sklearn.metrics.precision_score(y_test, y_tsfel_pred,average='macro'))
print("Recall: ",sklearn.metrics.recall_score(y_test, y_tsfel_pred,average='macro'))

Accuracy:  0.8518518518518519
Precision:  0.8585137085137086
Recall:  0.8518518518518517


In [6]:
import pickle
# Save the model to disk
filename = 'dt2_model.sav'
pickle.dump(dt2, open(filename, 'wb'))

In [7]:
import json

my_variable =X_test_normalized_df.columns.tolist()

with open('variable.json', 'w') as f:
    json.dump({'my_variable': my_variable}, f)


In [8]:
# Save the scaler to a file
with open('scaler.pkl', 'wb') as f:
    pickle.dump(scaler, f)

In [9]:
# Save the scaler to a file
with open('PCA.pkl', 'wb') as f:
    pickle.dump(pca, f)

In [10]:
print(type(pca))
print(type(scaler))
print(type(dt2))

<class 'sklearn.decomposition._pca.PCA'>
<class 'sklearn.preprocessing._data.StandardScaler'>
<class 'sklearn.tree._classes.DecisionTreeClassifier'>
