## 0 Generating Train/Test Sets

In [26]:
# !pip3 install numpy

# !pip3 show numpy

In [27]:
from sklearn.model_selection import train_test_split
import pandas as pd
import numpy as np
import os

# Constants
time = 10
offset = 100
seed = 47
folders = ["LAYING","SITTING","STANDING","WALKING","WALKING_DOWNSTAIRS","WALKING_UPSTAIRS"]
classes = {"WALKING":1,"WALKING_UPSTAIRS":2,"WALKING_DOWNSTAIRS":3,"SITTING":4,"STANDING":5,"LAYING":6}

combined_dir = os.path.join("Combined")


X_train=[]
y_train=[]
dataset_dir = os.path.join(combined_dir,"Train")

for folder in folders:
    files = os.listdir(os.path.join(dataset_dir,folder))

    for file in files:

        df = pd.read_csv(os.path.join(dataset_dir,folder,file),sep=",",header=0)
        df = df[offset:offset+time*50]
        X_train.append(df.values)
        y_train.append(classes[folder])

X_train = np.array(X_train)
y_train = np.array(y_train)

X_test=[]
y_test=[]
dataset_dir = os.path.join(combined_dir,"Test")

for folder in folders:
    files = os.listdir(os.path.join(dataset_dir,folder))
    for file in files:

        df = pd.read_csv(os.path.join(dataset_dir,folder,file),sep=",",header=0)
        df = df[offset:offset+time*50]
        X_test.append(df.values)
        y_test.append(classes[folder])

X_test = np.array(X_test)
y_test = np.array(y_test)

X = np.concatenate((X_train,X_test))
y = np.concatenate((y_train,y_test))

X_train,X_test,y_train,y_test = train_test_split(X,y,test_size=0.3,random_state=seed,stratify=y)

print("Training data shape: ",X_train.shape)
print("Testing data shape: ",X_test.shape)


Training data shape:  (126, 500, 3)
Testing data shape:  (54, 500, 3)


# 1 Train a decision tree model using the raw accelerometer data. Report the accuracy, precision, recall and confusion matrix of the model.

In [28]:
# X_train
# y_train
# X_test
# y_test

# print("X_train", X_train)
# print("Y_train", y_train)
# print("X_test", X_test)
# print("Y_test", y_test)

# X_train.shape
# y_train.shape

In [29]:
# Reshape the data
n_samples, n_timesteps, n_features = X_train.shape
X_train_reshaped = X_train.reshape(n_samples, n_timesteps * n_features)
X_test_reshaped = X_test.reshape(X_test.shape[0], X_test.shape[1] * X_test.shape[2])


In [30]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import accuracy_score, precision_score, recall_score, confusion_matrix, classification_report



# Initialize the Decision Tree classifier
clf = DecisionTreeClassifier(random_state=seed)

# Train the model
clf.fit(X_train_reshaped, y_train)

# Make predictions on the test set
y_pred = clf.predict(X_test_reshaped)



In [31]:
# Calculate accuracy
accuracy = accuracy_score(y_test, y_pred)

# Calculate precision
precision = precision_score(y_test, y_pred, average='weighted')

# Calculate recall
recall = recall_score(y_test, y_pred, average='weighted')

# Calculate confusion matrix
conf_matrix = confusion_matrix(y_test, y_pred)

# Print results
print(f'Accuracy: {accuracy:.2f}')
print(f'Precision: {precision:.2f}')
print(f'Recall: {recall:.2f}')
print('Confusion Matrix:')
print(conf_matrix)

# Optional: Detailed classification report
print('\nClassification Report:')
print(classification_report(y_test, y_pred))


Accuracy: 0.63
Precision: 0.64
Recall: 0.63
Confusion Matrix:
[[4 2 3 0 0 0]
 [0 3 2 0 4 0]
 [1 3 3 1 1 0]
 [0 0 0 8 1 0]
 [0 0 0 2 7 0]
 [0 0 0 0 0 9]]

Classification Report:
              precision    recall  f1-score   support

           1       0.80      0.44      0.57         9
           2       0.38      0.33      0.35         9
           3       0.38      0.33      0.35         9
           4       0.73      0.89      0.80         9
           5       0.54      0.78      0.64         9
           6       1.00      1.00      1.00         9

    accuracy                           0.63        54
   macro avg       0.64      0.63      0.62        54
weighted avg       0.64      0.63      0.62        54



# 2 Train a decision tree model using the features obtained by TSFEL. Report the accuracy, precision, recall and confusion matrix of the model.

In [None]:
# %pip install tsfel


In [38]:
import pandas as pd
import tsfel


sampling_frequency = 50


# Load the default feature configuration
cfg = tsfel.get_features_by_domain()

# Extract features for the training set
X_train_features = []
for i in range(X_train.shape[0]):  # Iterate over samples
    # Extract features from the (500, 3) matrix for each sample
    features = tsfel.time_series_features_extractor(cfg, X_train[i], fs=sampling_frequency, verbose=0)
    X_train_features.append(features)

# Convert list of DataFrames to a single DataFrame
X_train_features = pd.concat(X_train_features).reset_index(drop=True)

# Extract features for the test set
X_test_features = []
for i in range(X_test.shape[0]):  # Iterate over test samples
    features = tsfel.time_series_features_extractor(cfg, X_test[i], fs=sampling_frequency, verbose=0)
    X_test_features.append(features)

# Convert list of DataFrames to a single DataFrame
X_test_features = pd.concat(X_test_features).reset_index(drop=True)


In [39]:
# print(X_train_features.shape)
# print(X_test_features.shape)


(126, 1152)
(54, 1152)


In [40]:
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import accuracy_score, precision_score, recall_score, confusion_matrix, classification_report

# Initialize the Decision Tree classifier
clf = DecisionTreeClassifier(random_state=42)

# Train the model on the extracted features
clf.fit(X_train_features, y_train)

# Predict on the test set features
y_pred = clf.predict(X_test_features)

# Evaluate the model
accuracy = accuracy_score(y_test, y_pred)
precision = precision_score(y_test, y_pred, average='weighted')
recall = recall_score(y_test, y_pred, average='weighted')
conf_matrix = confusion_matrix(y_test, y_pred)

print(f'Accuracy: {accuracy:.2f}')
print(f'Precision: {precision:.2f}')
print(f'Recall: {recall:.2f}')
print('Confusion Matrix:')
print(conf_matrix)
print('\nClassification Report:')
print(classification_report(y_test, y_pred))


Accuracy: 0.83
Precision: 0.85
Recall: 0.83
Confusion Matrix:
[[6 1 2 0 0 0]
 [2 7 0 0 0 0]
 [0 1 8 0 0 0]
 [0 0 0 9 0 0]
 [0 0 0 3 6 0]
 [0 0 0 0 0 9]]

Classification Report:
              precision    recall  f1-score   support

           1       0.75      0.67      0.71         9
           2       0.78      0.78      0.78         9
           3       0.80      0.89      0.84         9
           4       0.75      1.00      0.86         9
           5       1.00      0.67      0.80         9
           6       1.00      1.00      1.00         9

    accuracy                           0.83        54
   macro avg       0.85      0.83      0.83        54
weighted avg       0.85      0.83      0.83        54



# 3 Train a decision tree model using the features provided in the dataset. Report the accuracy, precision, recall and confusion matrix of the model.

In [45]:
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import accuracy_score, precision_score, recall_score, confusion_matrix, classification_report

# Initialize the Decision Tree classifier
clf = DecisionTreeClassifier(random_state=42)

# Train the model
clf.fit(X_train_reshaped, y_train)


# Predict on the test set
y_pred = clf.predict(X_test_reshaped)


In [46]:
# Calculate accuracy
accuracy = accuracy_score(y_test, y_pred)

# Calculate precision
precision = precision_score(y_test, y_pred, average='weighted')

# Calculate recall
recall = recall_score(y_test, y_pred, average='weighted')

# Calculate confusion matrix
conf_matrix = confusion_matrix(y_test, y_pred)

# Print results
print(f'Accuracy: {accuracy:.2f}')
print(f'Precision: {precision:.2f}')
print(f'Recall: {recall:.2f}')
print('Confusion Matrix:')
print(conf_matrix)

# Optional: Detailed classification report
print('\nClassification Report:')
print(classification_report(y_test, y_pred))


Accuracy: 0.67
Precision: 0.65
Recall: 0.67
Confusion Matrix:
[[6 1 2 0 0 0]
 [1 3 1 1 3 0]
 [2 3 3 1 0 0]
 [0 0 0 8 1 0]
 [0 0 0 2 7 0]
 [0 0 0 0 0 9]]

Classification Report:
              precision    recall  f1-score   support

           1       0.67      0.67      0.67         9
           2       0.43      0.33      0.38         9
           3       0.50      0.33      0.40         9
           4       0.67      0.89      0.76         9
           5       0.64      0.78      0.70         9
           6       1.00      1.00      1.00         9

    accuracy                           0.67        54
   macro avg       0.65      0.67      0.65        54
weighted avg       0.65      0.67      0.65        54



# 4 Compare the results of the three models. Which model do you think is better?

# 1 Raw Accelerometer Data:

Data: Time series data in its original form.
Preprocessing: Requires reshaping or transformation.
Pros/Cons: Retains original data but may need significant preprocessing.

# 2 Features from TSFEL:

Data: Extracted features representing statistical, temporal, and spectral properties.
Preprocessing: Feature extraction is done, but feature selection might still be necessary.
Pros/Cons: Captures time series characteristics; reduces dimensionality but may lose some temporal information.

# 3 Provided Features:

Data: Features are already provided and formatted.
Preprocessing: Minimal if features are well-prepared.
Pros/Cons: Easy to use but depends on the quality and relevance of the provided features.