In [160]:
import os
import pandas as pd
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
import tsfel
import sklearn

# **Dataset Preparation**

### Note: the input features have been converted to csv files before being read.


## **Raw Accelerometer Data**

In [None]:
raw_train_signals_path='UCI HAR Dataset/train/Inertial Signals/'
raw_test_signals_path='UCI HAR Datase/test/Inertial Signals/'

In [162]:
raw_train_signals_x=pd.read_csv(os.path.join(raw_train_signals_path,'total_acc_x_train.csv'), header=None)
raw_train_signals_y=pd.read_csv(os.path.join(raw_train_signals_path,'total_acc_y_train.csv'), header=None)
raw_train_signals_z=pd.read_csv(os.path.join(raw_train_signals_path,'total_acc_z_train.csv'), header=None)

In [None]:
train_labels=pd.read_csv(os.path.join('UCI HAR Dataset/train','y_train.csv'), header=None)
test_labels=pd.read_csv(os.path.join('UCI HAR Dataset/test','y_test.csv'), header=None)
print(np.unique(train_labels))
print(np.unique(test_labels))

[1 2 3 4 5 6]
[1 2 3 4 5 6]


In [164]:
X_train_raw=[]
y_train_raw=[]
for i, label in enumerate(train_labels.iloc[:,0]):
    acc_x=raw_train_signals_x.loc[i].values
    acc_y=raw_train_signals_y.loc[i].values
    acc_z=raw_train_signals_z.loc[i].values
    total_accel=np.stack((acc_x,acc_y,acc_z), axis=1)
    X_train_raw.append(total_accel)
    y_train_raw.append(label)
X_train_raw=np.array(X_train_raw)
y_train_raw=np.array(y_train_raw)

In [165]:
print(X_train_raw.shape)
print(y_train_raw.shape)

(7352, 128, 3)
(7352,)


In [166]:
raw_test_signals_x=pd.read_csv(os.path.join(raw_test_signals_path,'total_acc_x_test.csv'), header=None)
raw_test_signals_y=pd.read_csv(os.path.join(raw_test_signals_path,'total_acc_y_test.csv'), header=None)
raw_test_signals_z=pd.read_csv(os.path.join(raw_test_signals_path,'total_acc_z_test.csv'), header=None)

In [167]:
X_test_raw=[]
y_test_raw=[]
for i, label in enumerate(test_labels.iloc[:,0]):
    acc_x=raw_test_signals_x.loc[i].values
    acc_y=raw_test_signals_y.loc[i].values
    acc_z=raw_test_signals_z.loc[i].values
    total_accel=np.stack((acc_x,acc_y,acc_z), axis=1)
    X_test_raw.append(total_accel)
    y_test_raw.append(label)
X_test_raw=np.array(X_test_raw)
y_test_raw=np.array(y_test_raw)

In [168]:
print(X_test_raw.shape)
print(y_test_raw.shape)

(2947, 128, 3)
(2947,)


## **Dataset from Features provided by author.**

In [None]:
features=[]
with open('UCI HAR Dataset/features.txt','r') as file:
    for line in file:
        feature=line.strip().split()[1]
        features.append(feature)

In [None]:
train_features=pd.read_csv(os.path.join('UCI HAR Dataset/train','X_train.csv'),header=None)
test_features=pd.read_csv(os.path.join('UCI HAR Dataset/test','X_test.csv'),header=None)

In [171]:
train_features.columns=features
test_features.columns=features

In [172]:
test_labels.columns=['Activity']

In [173]:
X_train_dataset_feat=np.array(train_features)
y_train_dataset_feat=np.array(train_labels)
X_test_dataset_feat=np.array(test_features)
y_test_dataset_feat=np.array(test_labels)


## **Dataset from TSFEL features**

In [174]:
def get_tsfel_features(dataset, domain):
    tsfel_features_list=[]
    cfg=tsfel.get_features_by_domain(domain)
    for window in range(dataset.shape[0]):
        dataset_window_for_tsfel = pd.DataFrame(dataset[window], columns=['acc_x', 'acc_y', 'acc_z'])
        features_x=tsfel.time_series_features_extractor(cfg, dataset_window_for_tsfel[['acc_x']], fs=100, verbose=0)
        features_y=tsfel.time_series_features_extractor(cfg, dataset_window_for_tsfel[['acc_y']], fs=100, verbose=0)
        features_z=tsfel.time_series_features_extractor(cfg, dataset_window_for_tsfel[['acc_z']], fs=100, verbose=0)
        features=pd.concat([features_x, features_y, features_z], axis=1)
        tsfel_features_list.append(features)
        print(f'Extracted TSFEL features for window-{window+1}')
    tsfel_features_df=pd.concat(tsfel_features_list, ignore_index=True)
    return tsfel_features_df

In [175]:
X_train_tsfel=get_tsfel_features(X_train_raw,'statistical')

Extracted TSFEL features for window-1
Extracted TSFEL features for window-2
Extracted TSFEL features for window-3
Extracted TSFEL features for window-4
Extracted TSFEL features for window-5
Extracted TSFEL features for window-6
Extracted TSFEL features for window-7
Extracted TSFEL features for window-8
Extracted TSFEL features for window-9
Extracted TSFEL features for window-10
Extracted TSFEL features for window-11
Extracted TSFEL features for window-12
Extracted TSFEL features for window-13
Extracted TSFEL features for window-14
Extracted TSFEL features for window-15
Extracted TSFEL features for window-16
Extracted TSFEL features for window-17
Extracted TSFEL features for window-18
Extracted TSFEL features for window-19
Extracted TSFEL features for window-20
Extracted TSFEL features for window-21
Extracted TSFEL features for window-22
Extracted TSFEL features for window-23
Extracted TSFEL features for window-24
Extracted TSFEL features for window-25
Extracted TSFEL features for windo

In [176]:
X_test_tsfel=get_tsfel_features(X_test_raw,'statistical')

Extracted TSFEL features for window-1
Extracted TSFEL features for window-2
Extracted TSFEL features for window-3
Extracted TSFEL features for window-4
Extracted TSFEL features for window-5
Extracted TSFEL features for window-6
Extracted TSFEL features for window-7
Extracted TSFEL features for window-8
Extracted TSFEL features for window-9
Extracted TSFEL features for window-10
Extracted TSFEL features for window-11
Extracted TSFEL features for window-12
Extracted TSFEL features for window-13
Extracted TSFEL features for window-14
Extracted TSFEL features for window-15
Extracted TSFEL features for window-16
Extracted TSFEL features for window-17
Extracted TSFEL features for window-18
Extracted TSFEL features for window-19
Extracted TSFEL features for window-20
Extracted TSFEL features for window-21
Extracted TSFEL features for window-22
Extracted TSFEL features for window-23
Extracted TSFEL features for window-24
Extracted TSFEL features for window-25
Extracted TSFEL features for windo

In [177]:
y_train_tsfel=y_train_raw
y_test_tsfel=y_test_raw

## **Standardization of Data**

In [178]:
from sklearn.preprocessing import StandardScaler

In [179]:
scaler_raw=StandardScaler()
scaled_X_train_raw=scaler_raw.fit_transform(X_train_raw.reshape(X_train_raw.shape[0]*X_train_raw.shape[1],X_train_raw.shape[2]))
scaled_X_test_raw=scaler_raw.fit_transform(X_test_raw.reshape(X_test_raw.shape[0]*X_test_raw.shape[1],X_test_raw.shape[2]))
scaled_X_train_raw=scaled_X_train_raw.reshape(7352,128,3)
scaled_X_test_raw=scaled_X_test_raw.reshape(2947, 128, 3)
print(scaled_X_train_raw.shape)
print(scaled_X_test_raw.shape)

(7352, 128, 3)
(2947, 128, 3)


In [180]:
scaler_dataset_feat=StandardScaler()
scaled_X_train_dataset_feat=scaler_dataset_feat.fit_transform(X_train_dataset_feat)
scaled_X_test_dataset_feat=scaler_dataset_feat.fit_transform(X_test_dataset_feat)
print(scaled_X_train_dataset_feat.shape)
print(scaled_X_test_dataset_feat.shape)

(7352, 561)
(2947, 561)


In [181]:
scaler_tsfel=StandardScaler()
scaled_X_train_tsfel=scaler_tsfel.fit_transform(X_train_tsfel)
scaled_X_test_tsfel=scaler_tsfel.fit_transform(X_test_tsfel)
print(scaled_X_train_tsfel.shape)
print(scaled_X_test_tsfel.shape)

(7352, 93)
(2947, 93)


# **Classification**

In [182]:
from sklearn.metrics import confusion_matrix, accuracy_score, precision_score, recall_score

In [183]:
def get_metrics(y_test,y_pred, model_name,dataset_name):
    accuracy=accuracy_score(y_test,y_pred)
    recall=recall_score(y_test,y_pred, average='weighted')
    precision=precision_score(y_test,y_pred, average='weighted')
    print(f'Accuracy of {model_name} on {dataset_name}: {accuracy}')
    print(f'Recall of {model_name} on {dataset_name}: {recall}')
    print(f'Precision of {model_name} on {dataset_name}: {precision}')
    return accuracy, recall, precision

# **a. Use UCI HAR dataset and build a model to predict various activity classes.**

## **Decision Tree Classifier**

In [184]:
from sklearn.tree import DecisionTreeClassifier

###  1. On Raw Accelerometer Data

In [185]:
X_train_raw_for_dt=scaled_X_train_raw.reshape(-1,3)
y_train_raw_for_dt=np.repeat(y_train_raw,128)
X_test_raw_for_dt=scaled_X_test_raw.reshape(-1,3)
y_test_raw_for_dt=np.repeat(y_test_raw,128)
print(y_train_raw_for_dt.shape)
print(y_test_raw_for_dt.shape)
print(X_train_raw_for_dt.shape)
print(X_test_raw_for_dt.shape)

(941056,)
(377216,)
(941056, 3)
(377216, 3)


In [186]:
decision_tree_on_raw_data=DecisionTreeClassifier(random_state=42)
decision_tree_on_raw_data.fit(X_train_raw_for_dt,y_train_raw_for_dt)

In [188]:
y_pred_dt_raw=decision_tree_on_raw_data.predict(X_test_raw_for_dt)

In [189]:
acc_raw_dt, recall_raw_dt, prec_raw_dt=get_metrics(y_test_raw_for_dt,y_pred_dt_raw, 'Decision Tree Classifier', 'Raw Data')

Accuracy of Decision Tree Classifier on Raw Data: 0.554189111808619
Recall of Decision Tree Classifier on Raw Data: 0.554189111808619
Precision of Decision Tree Classifier on Raw Data: 0.5997412709413114


### 2. On Features provided by the Author

In [190]:
decision_tree_on_dataset_feat=DecisionTreeClassifier(random_state=42)
decision_tree_on_dataset_feat.fit(scaled_X_train_dataset_feat,y_train_dataset_feat)

In [191]:
y_pred_on_dataset_feat_dt=decision_tree_on_dataset_feat.predict(scaled_X_test_dataset_feat)

In [192]:
acc_dataset_feat_dt, recall_dataset_feat_dt, prec_dataset_feat_dt=get_metrics(y_test_dataset_feat,y_pred_on_dataset_feat_dt, 'Decision Tree Classifier', 'Dataset Features')

Accuracy of Decision Tree Classifier on Dataset Features: 0.8116728876823889
Recall of Decision Tree Classifier on Dataset Features: 0.8116728876823889
Precision of Decision Tree Classifier on Dataset Features: 0.8260657713785309


### 3. On TSFEL Features

In [193]:
decision_tree_on_tsfel_feat=DecisionTreeClassifier(random_state=42)
decision_tree_on_tsfel_feat.fit(scaled_X_train_tsfel, y_train_tsfel)

In [195]:
y_pred_on_tsfel_feat_dt=decision_tree_on_tsfel_feat.predict(scaled_X_test_tsfel)

In [197]:
acc_tsfel_dt, recall_tsfel_dt, prec_tsfel_dt= get_metrics(y_test_tsfel, y_pred_on_tsfel_feat_dt,'Decision Tree Classifier', 'TSFEL features')

Accuracy of Decision Tree Classifier on TSFEL features: 0.7539871055310485
Recall of Decision Tree Classifier on TSFEL features: 0.7539871055310485
Precision of Decision Tree Classifier on TSFEL features: 0.7561649384753775


## **b. Use Deep Learning models like LSTM, 1D Cnns for modeling using inertial sensor data (Raw accelerometer data). Do not train DL methods on already existing features provided by the authors.**

In [None]:
X_train_tensor=torch.tensor(scaled_X_train_raw,dtype=torch.float32)
X_test_tensor=torch.tensor(scaled_X_test_raw, dtype=torch.float32)
y_train_tensor=torch.tensor(y_train_raw, dtype=torch.long)
y_test_tensor=torch.tensor(y_test_raw, dtype=torch.long)

### **Using 1D CNN**

In [None]:
X_train_for_cnn=X_train_tensor.reshape(X_train_tensor.shape[0],X_train_tensor.shape[2],X_train_tensor.shape[1])
y_train_for_cnn=y_train_tensor
X_test_for_cnn=X_test_tensor.reshape(X_test_tensor.shape[0],X_test_tensor.shape[2],X_test_tensor.shape[1])
y_test_for_cnn=y_test_tensor

In [117]:
X_train_for_cnn.shape

torch.Size([7352, 3, 128])

In [118]:
train_dataset_cnn=torch.utils.data.TensorDataset(X_train_for_cnn,y_train_for_cnn)
train_loader_cnn=torch.utils.data.DataLoader(train_dataset_cnn, batch_size=32, shuffle=False)

In [123]:
test_dataset_cnn=torch.utils.data.TensorDataset(X_test_for_cnn,y_test_for_cnn)
test_loader_cnn=torch.utils.data.DataLoader(test_dataset_cnn, batch_size=32, shuffle=False)

In [37]:
class cnn_1d(nn.Module):
    def __init__(self, n_features, n_classes, n_steps):
        super(cnn_1d, self).__init__()
        self.conv_layer1=nn.Conv1d(in_channels=n_features, out_channels=32, kernel_size=4)
        self.pooling_layer1=nn.MaxPool1d(2,1)
        self.conv_layer2=nn.Conv1d(in_channels=32, out_channels=64, kernel_size=4)
        self.pooling_layer2=nn.MaxPool1d(2,1)
        self.fc1=nn.Linear(64*120,128)
        self.fc2=nn.Linear(128,n_classes)

    def forward(self, x):
        x=self.conv_layer1(x)
        x=F.relu(x)
        x=self.pooling_layer1(x)
        x=self.conv_layer2(x)
        x=F.relu(x)
        x=self.pooling_layer2(x)
        x=x.view(x.size(0),-1)
        x=self.fc1(x)
        x=F.relu(x)
        x=self.fc2(x) 
        return x

In [38]:
cnn_model_1d=cnn_1d(3,6,128)

In [None]:
def train_cnn_model(model, train_dataset, epochs):
    criterion=nn.CrossEntropyLoss()
    optimizer=optim.Adam(model.parameters(), lr=0.005)

    for epoch in range(epochs):
        epoch_train_loss=0
        batch_train_losses=[]

        for X_train_batch, y_train_batch in train_dataset:
            optimizer.zero_grad()
            results=model(X_train_batch)
            loss=criterion(results, y_train_batch-1)
            loss.backward()
            optimizer.step()
            batch_train_losses.append(loss.item())
        
        epoch_train_loss=sum(batch_train_losses)/len(batch_train_losses)

        print(f'Epoch : {epoch}, training-loss: {epoch_train_loss}')


In [40]:
train_cnn_model(cnn_model_1d, train_loader_cnn, 25)

Epoch : 0, training-loss: 1.1469657893778513
Epoch : 1, training-loss: 0.7625510327072571
Epoch : 2, training-loss: 0.6099000197531574
Epoch : 3, training-loss: 0.5954988334793597
Epoch : 4, training-loss: 0.5878152448643484
Epoch : 5, training-loss: 0.5302365609667385
Epoch : 6, training-loss: 0.4863609299101644
Epoch : 7, training-loss: 0.47141312593366425
Epoch : 8, training-loss: 0.4392689578643236
Epoch : 9, training-loss: 0.4063283897462458
Epoch : 10, training-loss: 0.45196229147584194
Epoch : 11, training-loss: 0.3675628495536449
Epoch : 12, training-loss: 0.35116898210900493
Epoch : 13, training-loss: 0.34198004993643377
Epoch : 14, training-loss: 0.35121365898471396
Epoch : 15, training-loss: 0.30174316928788236
Epoch : 16, training-loss: 0.2847441636587736
Epoch : 17, training-loss: 0.266231406557294
Epoch : 18, training-loss: 0.25064979059725767
Epoch : 19, training-loss: 0.24493582316730603
Epoch : 20, training-loss: 0.238267766703145
Epoch : 21, training-loss: 0.335022834

In [None]:
def get_predictions_cnn(model, test_dataset):
    model.eval()
    preds=[]
    for X_test_batch, y_test_batch in test_dataset:
        with torch.no_grad():
            results=model(X_test_batch)
            batch_preds=results.argmax(dim=1)+1
            preds.extend(batch_preds)
    return preds
            

In [None]:
y_preds_from_cnn=get_predictions_cnn(cnn_model_1d, test_loader_cnn)

In [None]:
acc_cnn, recall_cnn, prec_cnn=get_metrics(y_test_for_cnn,y_preds_from_cnn, '1D-CNN', 'Raw Data')

Accuracy of 1D CNN on Raw Data: 0.8082796063793688
Recall of 1D CNN on Raw Data: 0.8082796063793688
Precision of 1D CNN on Raw Data: 0.8129413350103818


## **Using LSTMs**

In [None]:
X_train_for_lstm=X_train_tensor
y_train_for_lstm=y_train_tensor
X_test_for_lstm=X_test_tensor
y_test_for_lstm=y_test_tensor

  X_train_for_lstm=torch.tensor(X_train_tensor, dtype=torch.float32)
  X_test_for_lstm=torch.tensor(X_test_tensor, dtype=torch.float32)


In [138]:
train_dataset_lstm=torch.utils.data.TensorDataset(X_train_for_lstm,y_train_for_lstm)
train_loader_lstm=torch.utils.data.DataLoader(train_dataset_lstm, batch_size=32, shuffle=False)
test_dataset_lstm=torch.utils.data.TensorDataset(X_test_for_lstm,y_test_for_lstm)
test_loader_lstm=torch.utils.data.DataLoader(test_dataset_lstm, batch_size=32, shuffle=False)

In [148]:
print(type(X_train_for_lstm)) 
print(type(y_train_for_lstm))  
print(X_train_for_lstm.shape)
print(y_train_for_lstm.shape)
print(y_train_for_lstm)

<class 'torch.Tensor'>
<class 'torch.Tensor'>
torch.Size([7352, 128, 3])
torch.Size([7352])
tensor([5, 5, 5,  ..., 2, 2, 2])


In [140]:
print(X_train_for_lstm.shape)

torch.Size([7352, 128, 3])


In [141]:
class lstm_model(nn.Module):
    def __init__(self, n_feat, n_hidden, n_layers, n_classes):
        super(lstm_model, self).__init__()
        self.n_hidden=n_hidden
        self.n_layers=n_layers
        self.lstm=nn.LSTM(n_feat, n_hidden, n_layers, batch_first=True)
        self.fc=nn.Linear(n_hidden, n_classes)
    
    def forward(self, x, h0=None, c0=None):
        if h0 is None or c0 is None:
            h0=torch.zeros(self.n_layers, x.size(0), self.n_hidden)
            c0=torch.zeros(self.n_layers, x.size(0), self.n_hidden)
        result, (hn, cn)=self.lstm(x, (h0, c0))
        last_hidden_state=result[:,-1,:]
        output=self.fc(last_hidden_state)
        return output, (hn, cn)     


In [142]:
def train_lstm_model(model, train_dataset, epochs):
    criterion=nn.CrossEntropyLoss()
    optimizer=optim.Adam(model.parameters(), lr=0.005)

    for epoch in range(epochs):
        epoch_train_loss=0
        batch_train_losses=[]

        hn=None
        cn=None

        for X_train_batch, y_train_batch in train_dataset:
            optimizer.zero_grad()
            batch_size = X_train_batch.size(0)
            if hn is not None and hn.size(1) != batch_size:
                hn, cn=None, None
            results, (hn, cn)=model(X_train_batch, hn, cn)
            loss=criterion(results, y_train_batch-1)
            loss.backward()
            optimizer.step()
            batch_train_losses.append(loss.item())
            hn, cn=hn.detach(), cn.detach()
        
        epoch_train_loss=sum(batch_train_losses)/len(batch_train_losses)

        print(f'Epoch : {epoch}, training-loss: {epoch_train_loss}')


In [143]:
lstm_classifier=lstm_model(3,256,3,6)

In [144]:
train_lstm_model(lstm_classifier,train_loader_lstm, 25)

Epoch : 0, training-loss: 1.809502503146296
Epoch : 1, training-loss: 1.8011861687121185
Epoch : 2, training-loss: 1.7873257870259491
Epoch : 3, training-loss: 1.7794476384701936
Epoch : 4, training-loss: 1.6716562853073296
Epoch : 5, training-loss: 1.546207728722821
Epoch : 6, training-loss: 1.5044316898869432
Epoch : 7, training-loss: 1.4444813661601232
Epoch : 8, training-loss: 1.4322184317137885
Epoch : 9, training-loss: 1.364504798858062
Epoch : 10, training-loss: 1.3572067411049553
Epoch : 11, training-loss: 1.3462289059291714
Epoch : 12, training-loss: 1.2691727783692919
Epoch : 13, training-loss: 1.2871195808700893
Epoch : 14, training-loss: 1.1243067623804446
Epoch : 15, training-loss: 0.9954083297887574
Epoch : 16, training-loss: 1.6323022517173187
Epoch : 17, training-loss: 1.37065323571796
Epoch : 18, training-loss: 1.3648579984903335
Epoch : 19, training-loss: 1.3434341848382483
Epoch : 20, training-loss: 1.3412390948556687
Epoch : 21, training-loss: 1.2762162666443897
Epo

In [None]:
def get_predictions_lstm(model, test_dataset):
    model.eval()
    preds=[]
    for X_test_batch, y_test_batch in test_dataset:
        with torch.no_grad():
            results=model(X_test_batch)
            batch_preds=results[0].argmax(dim=1)+1
            preds.extend(batch_preds)
    return preds
            

In [None]:
y_preds_from_lstm=get_predictions_lstm(lstm_classifier, test_loader_lstm)

In [146]:
acc_lstm, recall_lstm, prec_lstm=get_metrics(y_test_for_lstm,y_preds_from_lstm, 'LSTM', 'Raw Data')

Accuracy of LSTM on Raw Data: 0.4054971157108924
Recall of LSTM on Raw Data: 0.4054971157108924
Precision of LSTM on Raw Data: 0.3612564932570117


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


## **c. How can the same be done using Machine Learning models like Random forest, SVM, and Logistic regression? Use the TSFEL library to generate features from Inertial data. Compare the performance of models trained on your generated features versus features provided by the authors.**

### **Using Random Forest on various Datasets**

In [198]:
from sklearn.ensemble import RandomForestClassifier

In [199]:
rf_model_tsfel=RandomForestClassifier(n_estimators=20, random_state=42, bootstrap=True)
rf_model_tsfel.fit(scaled_X_train_tsfel,y_train_tsfel)
y_pred_rf_tsfel=rf_model_tsfel.predict(scaled_X_test_tsfel)
acc_tsfel_rf, recall_tsfel_rf, prec_tsfel_rf=get_metrics(y_test_tsfel,y_pred_rf_tsfel, 'Random Forest Classifier', 'TSFEL-Features')

Accuracy of Random Forest Classifier on TSFEL-Features: 0.7824906684764167
Recall of Random Forest Classifier on TSFEL-Features: 0.7824906684764167
Precision of Random Forest Classifier on TSFEL-Features: 0.786453974620687


In [202]:
rf_model_raw=RandomForestClassifier(n_estimators=20, random_state=42, bootstrap=True)
rf_model_raw.fit(X_train_raw_for_dt,y_train_raw_for_dt)
y_pred_rf_raw=rf_model_raw.predict(X_test_raw_for_dt)
acc_raw_rf, recall_raw_rf, prec_raw_rf=get_metrics(y_test_raw_for_dt,y_pred_rf_raw, 'Random Forest Classifier', 'Raw Data')


Accuracy of Random Forest Classifier on Raw Data: 0.5969126442144553
Recall of Random Forest Classifier on Raw Data: 0.5969126442144553
Precision of Random Forest Classifier on Raw Data: 0.6299532501663687


In [206]:
rf_model_dataset_feat=RandomForestClassifier(n_estimators=20, random_state=42, bootstrap=True)
rf_model_dataset_feat.fit(scaled_X_train_dataset_feat,y_train_dataset_feat)
y_pred_rf_dataset_feat=rf_model_dataset_feat.predict(scaled_X_test_dataset_feat)
acc_dataset_feat_rf, recall_dataset_feat_rf, prec_dataset_feat_rf=get_metrics(y_test_dataset_feat,y_pred_rf_dataset_feat, 'Random Forest Classifier', 'Dataset Features')

  return fit_method(estimator, *args, **kwargs)


Accuracy of Random Forest Classifier on Dataset Features: 0.8897183576518494
Recall of Random Forest Classifier on Dataset Features: 0.8897183576518494
Precision of Random Forest Classifier on Dataset Features: 0.8944630699119097


### **Using Logistic Regression on various Datasets**

In [207]:
from sklearn.linear_model import LogisticRegression

In [208]:
log_reg_model_tsfel=LogisticRegression()
log_reg_model_tsfel.fit(scaled_X_train_tsfel,y_train_tsfel)
y_pred_log_reg_tsfel=log_reg_model_tsfel.predict(scaled_X_test_tsfel)
acc_tsfel_log_reg, recall_tsfel_log_reg, prec_tsfel_log_reg=get_metrics(y_test_tsfel,y_pred_log_reg_tsfel, 'Logistic Regression', 'TSFEL-Features')

Accuracy of Logistic Regression on TSFEL-Features: 0.8109942314217848
Recall of Logistic Regression on TSFEL-Features: 0.8109942314217848
Precision of Logistic Regression on TSFEL-Features: 0.8135119489262421


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


In [209]:
log_reg_model_raw=LogisticRegression()
log_reg_model_raw.fit(X_train_raw_for_dt,y_train_raw_for_dt)
y_pred_log_reg_raw=log_reg_model_raw.predict(X_test_raw_for_dt)
acc_raw_log_reg, recall_raw_log_reg, prec_raw_log_reg=get_metrics(y_test_raw_for_dt,y_pred_log_reg_raw, 'Logistic Regression', 'Raw Data')


Accuracy of Logistic Regression on Raw Data: 0.49966862487275193
Recall of Logistic Regression on Raw Data: 0.49966862487275193
Precision of Logistic Regression on Raw Data: 0.4684431585481109


In [210]:
log_reg_model_dataset_feat=LogisticRegression()
log_reg_model_dataset_feat.fit(scaled_X_train_dataset_feat,y_train_dataset_feat)
y_pred_log_reg_dataset_feat=log_reg_model_dataset_feat.predict(scaled_X_test_dataset_feat)
acc_dataset_feat_log_reg, recall_dataset_feat_log_reg, prec_dataset_feat_log_reg=get_metrics(y_test_dataset_feat,y_pred_log_reg_dataset_feat, 'Logistic Regression', 'Dataset Features')

  y = column_or_1d(y, warn=True)


Accuracy of Logistic Regression on Dataset Features: 0.9630132337970818
Recall of Logistic Regression on Dataset Features: 0.9630132337970818
Precision of Logistic Regression on Dataset Features: 0.963231711765267


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


## **Tabularizing Observations**

In [217]:
models=['Decision Tree', 'Random Forest', 'Logistic Regression', '1D-CNN', 'LSTM']
metrics=['Accuracy', 'Precision', 'Recall']

In [218]:
df_1=pd.DataFrame(columns=metrics, index=models)

In [219]:
df_1.loc['Decision Tree']=(acc_raw_dt, prec_raw_dt, recall_raw_dt)
df_1.loc['Random Forest']=(acc_raw_rf, prec_raw_rf, recall_raw_rf)
df_1.loc['Logistic Regression']=(acc_raw_log_reg, prec_raw_log_reg, recall_raw_log_reg)
df_1.loc['1D-CNN']=(acc_cnn, prec_cnn, recall_cnn)
df_1.loc['LSTM']=(acc_lstm, prec_lstm, recall_lstm)

In [231]:
ml_models=['Decision Tree', 'Random Forest', 'Logistic Regression']
classes=['Dataset Features', 'TSFEL features']

In [None]:
performance_dict={
    "Dataset Features": {
        "Decision Tree": {
                          "Accuracy": acc_dataset_feat_dt,
                          "Precision":prec_dataset_feat_dt,
                          "Recall": recall_dataset_feat_dt
                        },
        "Random Forest": {
                           "Accuracy":acc_dataset_feat_rf,
                           "Precision":prec_dataset_feat_rf,
                            "Recall":recall_dataset_feat_rf
                        },
        "Logistic Regression": {
                                "Accuracy":acc_dataset_feat_log_reg,
                                "Precision":prec_dataset_feat_log_reg,
                                "Recall":recall_dataset_feat_log_reg
                            },
    },
    "TSFEL Features": {
        "Decision Tree": {
                           "Accuracy":acc_tsfel_dt,
                           "Precision": prec_tsfel_dt,
                           "Recall":recall_tsfel_dt
                             },
        "Random Forest": {
                          "Accuracy": acc_tsfel_rf,
                          "Precision": prec_tsfel_rf,
                          "Recall": recall_tsfel_rf
                          },
        "Logistic Regression": {
                                "Accuracy": acc_tsfel_log_reg,
                                "Precision": prec_tsfel_log_reg,
                                "Recall": recall_tsfel_log_reg
                            }
    }
}

In [None]:
df_performance = pd.DataFrame.from_dict(
    {(feature_set, model): metrics for feature_set, models in performance_dict.items() for model, metrics in models.items()},
    orient="index"
)

## **Comparision: ML models vs. DL Models**

In [220]:
df_1

Unnamed: 0,Accuracy,Precision,Recall
Decision Tree,0.554189,0.599741,0.554189
Random Forest,0.596913,0.629953,0.596913
Logistic Regression,0.499669,0.468443,0.499669
1D-CNN,0.80828,0.812941,0.80828
LSTM,0.405497,0.361256,0.405497


## **Comparision: ML models on dataset features vs. TSFEL features**

In [228]:
df_performance

Unnamed: 0,Unnamed: 1,Accuracy,Precision,Recall
Dataset Features,Decision Tree,0.811673,0.826066,0.811673
Dataset Features,Random Forest,0.889718,0.894463,0.889718
Dataset Features,Logistic Regression,0.963013,0.963232,0.963013
TSFEL Features,Decision Tree,0.753987,0.756165,0.753987
TSFEL Features,Random Forest,0.782491,0.786454,0.782491
TSFEL Features,Logistic Regression,0.810994,0.813512,0.810994


# **Summary of the Task:**

## In this task, we built various models on the the given as well as the extracted dataset. 
## **Dataset Preparation:**
## We had 7352 training data windows and 2947 testing data windows each with 128 time steps and 3 features indicating the triaxial acceleration, as raw inertial signals. We also had 561 length feature vector for each window of both train and test data. 
## 1. Generated train-test data from raw inertial signals of accelerometer
## 2. Generated train-test data from given feature vectors
## 3. Extracted 93-length feature vector from each window of train-test data using TSFEL 

## **Models Trained:**
### **1. ML models (trained on all 3 types of datasets):**
### a. Decision Tree
### b. Random Forest
### c. Logistic Regression
### **2. DL models (trained only on raw inertial signals):**
### a. 1D-CNN
### b. LSTM


## **Observations and Insights:**
## Looking at the tables we obtained about various performance metrics of the models, it was observed and concluded that:
### 1. 1D CNN had the best performence on Human-Activity-Recognition. It is understood as the algorithm is effective in extracing features from sequential data like time series
### 2. Random Forest performed better than other ML models, evidently due to ensemble methods. But still it performs poorly compared to CNN, as it is stii ineffective of grasping important features and patterns in the time-series data. 
### 3. Logistic Regression has worst performance among ML models, which suggests linear decision boundries are not sufficient for such dataset.
### 4. LSTM, ideally, should effectively handle sequential data as they can store sequential dependencies. They should outperform ML models. But here, we observe that they have the worst performance. 
### a. Inefficient training or poor choice of hyperparameters.
### b. Lack of proper optimization techniques (e.g., learning rate tuning, dropout, batch size).
### c. Insufficient dataset size for LSTM to learn long-term dependencies effectively.
### d. High training time under the given parameter set, making optimization difficult.
## This indicates that Deep Learning techniques are super-efficient in extracting meaningful pattrns and features from sequential data, as compared to the traditional Machine Learning algorithms. However, seeing the case of LSTMs here in our notebook, it is also advisable to opt for better data-preprocessing, hyperparameter fine-tuning, and optimization techniques to improve the results.

## Furthermore, we see that models trained on features given by authors fairly outperform those tained on TSFEL-extrcted features. This tells us that the given features-set gives more domain-specific and informative set of features proving it to be the best choice for model training. Features extracted from TSFEL can be less informative and redundant, causing the model to drop in accuracy.