In [1]:
!wget -cO - "https://archive.ics.uci.edu/static/public/226/opportunity+activity+recognition.zip" > uci.zip

--2024-08-11 08:17:05--  https://archive.ics.uci.edu/static/public/226/opportunity+activity+recognition.zip
Resolving archive.ics.uci.edu (archive.ics.uci.edu)... 128.195.10.252
Connecting to archive.ics.uci.edu (archive.ics.uci.edu)|128.195.10.252|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: unspecified
Saving to: ‘STDOUT’

-                       [             <=>    ] 292.43M  65.9MB/s    in 4.3s    

2024-08-11 08:17:10 (67.5 MB/s) - written to stdout [306636009]



In [9]:
import shutil
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.preprocessing import LabelBinarizer
import random
import tensorflow as tf
from tensorflow import keras
import torch
from sklearn.metrics import accuracy_score
from sklearn.model_selection import train_test_split
import os
from sklearn.preprocessing import StandardScaler, MinMaxScaler
from tqdm import tqdm
import warnings
warnings.filterwarnings('ignore')

In [4]:
shutil.unpack_archive("uci.zip")

In [5]:

src_dir = "/content/OpportunityUCIDataset/dataset"
user_num = 4
adl_file_num = 5
files_dir = {}
for user_idx in tqdm(range(1, user_num+1)):
    user_data_path = []
    for file_idx in tqdm(range(1, adl_file_num+1)):
        file_name = f"S{user_idx}-ADL{file_idx}.dat"
        file_path = os.path.join(src_dir, file_name)
        user_data_path.append(file_path)
    files_dir[str(user_idx)] = user_data_path

files_dir

  0%|          | 0/4 [00:00<?, ?it/s]
100%|██████████| 5/5 [00:00<00:00, 47880.18it/s]

100%|██████████| 5/5 [00:00<00:00, 52560.20it/s]

100%|██████████| 5/5 [00:00<00:00, 55043.36it/s]

100%|██████████| 5/5 [00:00<00:00, 54330.36it/s]
100%|██████████| 4/4 [00:00<00:00, 212.04it/s]


{'1': ['/content/OpportunityUCIDataset/dataset/S1-ADL1.dat',
  '/content/OpportunityUCIDataset/dataset/S1-ADL2.dat',
  '/content/OpportunityUCIDataset/dataset/S1-ADL3.dat',
  '/content/OpportunityUCIDataset/dataset/S1-ADL4.dat',
  '/content/OpportunityUCIDataset/dataset/S1-ADL5.dat'],
 '2': ['/content/OpportunityUCIDataset/dataset/S2-ADL1.dat',
  '/content/OpportunityUCIDataset/dataset/S2-ADL2.dat',
  '/content/OpportunityUCIDataset/dataset/S2-ADL3.dat',
  '/content/OpportunityUCIDataset/dataset/S2-ADL4.dat',
  '/content/OpportunityUCIDataset/dataset/S2-ADL5.dat'],
 '3': ['/content/OpportunityUCIDataset/dataset/S3-ADL1.dat',
  '/content/OpportunityUCIDataset/dataset/S3-ADL2.dat',
  '/content/OpportunityUCIDataset/dataset/S3-ADL3.dat',
  '/content/OpportunityUCIDataset/dataset/S3-ADL4.dat',
  '/content/OpportunityUCIDataset/dataset/S3-ADL5.dat'],
 '4': ['/content/OpportunityUCIDataset/dataset/S4-ADL1.dat',
  '/content/OpportunityUCIDataset/dataset/S4-ADL2.dat',
  '/content/OpportunityUC

In [6]:


def get_cleaned_user_data(file_pth, user_idx):
    invalid_feature = np.arange( 46, 50 )  # BACK Quaternion
    invalid_feature = np.concatenate( [invalid_feature, np.arange(34, 37)] )  # RH_acc
    invalid_feature = np.concatenate( [invalid_feature, np.arange(59, 63)] )  # RUA Quaternion
    invalid_feature = np.concatenate( [invalid_feature, np.arange(72, 76)] )  # RLA
    invalid_feature = np.concatenate( [invalid_feature, np.arange(85, 89)] )  # LUA
    invalid_feature = np.concatenate( [invalid_feature, np.arange(99, 102)] )  # LLA
    invalid_feature = np.concatenate( [invalid_feature, np.arange(117, 118)] )  # L-SHOE Compass
    invalid_feature = np.concatenate( [invalid_feature, np.arange(133, 134)] )  # R-SHOE Compass
    invalid_feature = np.concatenate( [invalid_feature, np.arange(134, 244)] )  # environment sensor
    invalid_feature = np.concatenate( [invalid_feature, np.arange(245, 250)] )  # LL, ML level label
    drop_columns = invalid_feature

    raw_data = np.loadtxt(file_pth)

    used_data = np.delete(raw_data, drop_columns, axis=1)
    print(used_data.shape)

    used_columns = ["MILLISEC",
                    "acc_RKN_upper_accX","acc_RKN_upper_accY","acc_RKN_upper_accZ",
                    "acc_HIP_accX","acc_HIP_accY","acc_HIP_accZ",
                    "acc_LUA_upper_accX","acc_LUA_upper_accY","acc_LUA_upper_accZ",
                    "acc_RUA_lower_accX","acc_RUA_lower_accY","acc_RUA_lower_accZ",
                    "acc_LH_accX","acc_LH_accY","acc_LH_accZ",
                    "acc_BACK_accX","acc_BACK_accY","acc_BACK_accZ",
                    "acc_RKN_lower_accX","acc_RKN_lower_accY","acc_RKN_lower_accZ",
                    "acc_RWR_accX","acc_RWR_accY","acc_RWR_accZ",
                    "acc_RUA_upper_accX","acc_RUA_upper_accY","acc_RUA_upper_accZ",
                    "acc_LUA_lower_accX","acc_LUA_lower_accY","acc_LUA_lower_accZ",
                    "acc_LWR_accX","acc_LWR_accY","acc_LWR_accZ",
#                     "acc_RH_accX","acc_RH_accY","acc_RH_accZ",
                    "imu_BACK_accX","imu_BACK_accY","imu_BACK_accZ",
                    "imu_BACK_gyroX","imu_BACK_gyroY","imu_BACK_gyroZ",
                    "imu_BACK_magneticX","imu_BACK_magneticY","imu_BACK_magneticZ",
                    "imu_RUA_accX","imu_RUA_accY","imu_RUA_accZ",
                    "imu_RUA_gyroX","imu_RUA_gyroY","imu_RUA_gyroZ",
                    "imu_RUA_magneticX","imu_RUA_magneticY","imu_RUA_magneticZ",
                    "imu_RLA_accX","imu_RLA_accY","imu_RLA_accZ",
                    "imu_RLA_gyroX","imu_RLA_gyroY","imu_RLA_gyroZ",
                    "imu_RLA_magneticX","imu_RLA_magneticY","imu_RLA_magneticZ",
                    "imu_LUA_accX","imu_LUA_accY","imu_LUA_accZ",
                    "imu_LUA_gyroX","imu_LUA_gyroY","imu_LUA_gyroZ",
                    "imu_LUA_magneticX","imu_LUA_magneticY","imu_LUA_magneticZ",
                    "imu_LLA_accX","imu_LLA_accY","imu_LLA_accZ",
                    "imu_LLA_gyroX","imu_LLA_gyroY","imu_LLA_gyroZ",
                    "imu_LLA_magneticX","imu_LLA_magneticY","imu_LLA_magneticZ",
                    "imu_L-SHOE_EuX","imu_L-SHOE_EuY","imu_L-SHOE_EuZ",
                    "imu_L-SHOE_Nav_Ax","imu_L-SHOE_Nav_Ay","imu_L-SHOE_Nav_Az",
                    "imu_L-SHOE_Body_Ax","imu_L-SHOE_Body_Ay","imu_L-SHOE_Body_Az",
                    "imu_L-SHOE_AngVelBodyFrameX","imu_L-SHOE_AngVelBodyFrameY","imu_L-SHOE_AngVelBodyFrameZ",
                    "imu_L-SHOE_AngVelNavFrameX","imu_L-SHOE_AngVelNavFrameY","imu_L-SHOE_AngVelNavFrameZ",
                    "imu_R-SHOE_EuX","imu_R-SHOE_EuY","imu_R-SHOE_EuZ",
                    "imu_R-SHOE_Nav_Ax","imu_R-SHOE_Nav_Ay","imu_R-SHOE_Nav_Az",
                    "imu_R-SHOE_Body_Ax","imu_R-SHOE_Body_Ay","imu_R-SHOE_Body_Az",
                    "imu_R-SHOE_AngVelBodyFrameX","imu_R-SHOE_AngVelBodyFrameY","imu_R-SHOE_AngVelBodyFrameZ",
                    "imu_R-SHOE_AngVelNavFrameX","imu_R-SHOE_AngVelNavFrameY","imu_R-SHOE_AngVelNavFrameZ",
                    "Locomotion",
                    "HL_Activity"]
    used_data = pd.DataFrame(used_data, columns=used_columns)

    used_data = used_data[used_data['HL_Activity'] != 0]


    used_data['HL_Activity'][used_data['HL_Activity']==101] = 0  # Relaxing
    used_data['HL_Activity'][used_data['HL_Activity']==102] = 1  # Coffee time
    used_data['HL_Activity'][used_data['HL_Activity']==103] = 2  # Early morning
    used_data['HL_Activity'][used_data['HL_Activity']==104] = 3  # Cleanup
    used_data['HL_Activity'][used_data['HL_Activity']==105] = 4  # Sandwich time


    used_data = used_data.interpolate()


    pos = used_data.isnull().stack()[lambda x:x].index.tolist()

    used_data = used_data.dropna(axis=0)
    print(used_data.shape)
    return used_data

In [7]:

scaler = StandardScaler()

def sliding_window(time_series, width, step, order='F'):
    w = np.hstack([time_series[i:1 + i - width or None:step] for i in range(0, width)])
    result = w.reshape((int(len(w) / width), width), order='F')
    if order == 'F':
        return result
    else:
        return np.ascontiguousarray(result)

def calc_normalization(data):
    num_instances, num_time_steps, num_features = data.shape
    data = np.reshape(data, (num_instances, -1))
    scaler.fit(data)
def apply_normalization(data):
    scaler = StandardScaler()
    num_instances, num_time_steps, num_features = data.shape
    data = np.reshape(data, (num_instances, -1))
    norm_data = scaler.fit_transform(data)

    norm_data[np.isnan(norm_data)] = 0
    norm_data = np.reshape(norm_data, (num_instances, num_time_steps, num_features))
    return norm_data

In [10]:


src_dir = "/content/OpportunityUCIDataset/dataset"
user_num = 4
adl_file_num = 5

seq_length = 300
shifting_step = 30
channel_num = 3*36
used_channels = [
    "acc_RKN_upper_accX","acc_RKN_upper_accY","acc_RKN_upper_accZ",
    "acc_HIP_accX","acc_HIP_accY","acc_HIP_accZ",
    "acc_LUA_upper_accX","acc_LUA_upper_accY","acc_LUA_upper_accZ",
    "acc_RUA_lower_accX","acc_RUA_lower_accY","acc_RUA_lower_accZ",
    "acc_LH_accX","acc_LH_accY","acc_LH_accZ",
    "acc_BACK_accX","acc_BACK_accY","acc_BACK_accZ",
    "acc_RKN_lower_accX","acc_RKN_lower_accY","acc_RKN_lower_accZ",
    "acc_RWR_accX","acc_RWR_accY","acc_RWR_accZ",
    "acc_RUA_upper_accX","acc_RUA_upper_accY","acc_RUA_upper_accZ",
    "acc_LUA_lower_accX","acc_LUA_lower_accY","acc_LUA_lower_accZ",
    "acc_LWR_accX","acc_LWR_accY","acc_LWR_accZ",
    "imu_BACK_accX","imu_BACK_accY","imu_BACK_accZ",
    "imu_BACK_gyroX","imu_BACK_gyroY","imu_BACK_gyroZ",
    "imu_BACK_magneticX","imu_BACK_magneticY","imu_BACK_magneticZ",
    "imu_RUA_accX","imu_RUA_accY","imu_RUA_accZ",
    "imu_RUA_gyroX","imu_RUA_gyroY","imu_RUA_gyroZ",
    "imu_RUA_magneticX","imu_RUA_magneticY","imu_RUA_magneticZ",
    "imu_RLA_accX","imu_RLA_accY","imu_RLA_accZ",
    "imu_RLA_gyroX","imu_RLA_gyroY","imu_RLA_gyroZ",
    "imu_RLA_magneticX","imu_RLA_magneticY","imu_RLA_magneticZ",
    "imu_LUA_accX","imu_LUA_accY","imu_LUA_accZ",
    "imu_LUA_gyroX","imu_LUA_gyroY","imu_LUA_gyroZ",
    "imu_LUA_magneticX","imu_LUA_magneticY","imu_LUA_magneticZ",
    "imu_LLA_accX","imu_LLA_accY","imu_LLA_accZ",
    "imu_LLA_gyroX","imu_LLA_gyroY","imu_LLA_gyroZ",
    "imu_LLA_magneticX","imu_LLA_magneticY","imu_LLA_magneticZ",
    "imu_L-SHOE_EuX","imu_L-SHOE_EuY","imu_L-SHOE_EuZ",
    "imu_L-SHOE_Nav_Ax","imu_L-SHOE_Nav_Ay","imu_L-SHOE_Nav_Az",
    "imu_L-SHOE_Body_Ax","imu_L-SHOE_Body_Ay","imu_L-SHOE_Body_Az",
    "imu_L-SHOE_AngVelBodyFrameX","imu_L-SHOE_AngVelBodyFrameY","imu_L-SHOE_AngVelBodyFrameZ",
    "imu_L-SHOE_AngVelNavFrameX","imu_L-SHOE_AngVelNavFrameY","imu_L-SHOE_AngVelNavFrameZ",
    "imu_R-SHOE_EuX","imu_R-SHOE_EuY","imu_R-SHOE_EuZ",
    "imu_R-SHOE_Nav_Ax","imu_R-SHOE_Nav_Ay","imu_R-SHOE_Nav_Az",
    "imu_R-SHOE_Body_Ax","imu_R-SHOE_Body_Ay","imu_R-SHOE_Body_Az",
    "imu_R-SHOE_AngVelBodyFrameX","imu_R-SHOE_AngVelBodyFrameY","imu_R-SHOE_AngVelBodyFrameZ",
    "imu_R-SHOE_AngVelNavFrameX","imu_R-SHOE_AngVelNavFrameY","imu_R-SHOE_AngVelNavFrameZ",
]


for user_idx in tqdm(range(1, user_num+1)):
    user_data, user_labels = [], []
    for file_idx in range(1, adl_file_num+1):
        # gen src_data path
        file_name = f"S{user_idx}-ADL{file_idx}.dat"
        file_path = os.path.join(src_dir, file_name)

        # load cleaned data
        used_data = get_cleaned_user_data(file_path, user_idx)

        # split data by label
        for act_id, act_data in used_data.groupby('HL_Activity'):
#             print(act_id)
#             print(act_data.shape)
            sample_cnt = int((act_data.shape[0]-seq_length)//shifting_step + 1)
            if sample_cnt < 2:
                print(f"user {user_index} has only {act_data.shape[0]} samplings, drop\n")
                continue
            data_shape = (sample_cnt, seq_length, channel_num)  # (N, 300, 3*36)
            act_sliced_data = np.empty(data_shape)
            channl_idx = 0
            for channel_name in used_channels:
                channel_data = act_data[channel_name]
                act_sliced_data[:,:,channl_idx] = sliding_window(channel_data.values, seq_length, shifting_step, 'T')
                channl_idx += 1

            # append label data
            user_data.append(act_sliced_data)
            # gen labels
            class_labels = np.empty(act_sliced_data.shape[0])
            actual_label = int(act_id)
            class_labels.fill(actual_label)
            user_labels.append(class_labels.astype(int))

    # data and labels for each users
    array_user_data= np.concatenate(user_data, axis=0)
    array_user_labels= np.concatenate(user_labels, axis=0)
    # print(user_idx, array_user_data.shape, array_user_labels.shape)

    # Stratified train, validation, test split of the data
    X_train, X_test, y_train, y_test = train_test_split(array_user_data, array_user_labels,  stratify=array_user_labels,  test_size=0.3,random_state=1)
    #print(X_train.shape)
    # print(y_train.shape)

    # Data normalization
    # Calculate mean and standard deviation based on train
    #scaler = calc_normalization(X_train)

    #Apply normalization
    X_train = apply_normalization(X_train)
    X_test = apply_normalization(X_test)

    print(f"user: {user_idx}")
    print(f"train data: {X_train.shape}, train label: {y_train.shape}")
    print(f"test data: {X_test.shape}, test label: {y_test.shape}\n")

    # prepare samples
    train_data = {'samples':X_train, 'labels':y_train}
    test_data  = {'samples':X_test, 'labels':y_test}

    os.makedirs(f'/kaggle/working/OPPORTUNITY_data', exist_ok=True)
    torch.save(train_data, f'/kaggle/working/OPPORTUNITY_data/train_{user_idx}.pt')
    # torch.save(val_data,  f'HHAR_user_data/val_{user_name}.pt')
    torch.save(test_data, f'/kaggle/working/OPPORTUNITY_data/test_{user_idx}.pt')

  0%|          | 0/4 [00:00<?, ?it/s]

(51116, 111)
(35530, 111)
(32224, 111)
(0, 111)
(33273, 111)
(28975, 111)
(32955, 111)
(29535, 111)
(30127, 111)
(0, 111)
user: 1
train data: (2096, 300, 108), train label: (2096,)
test data: (899, 300, 108), test label: (899,)



 25%|██▌       | 1/4 [00:29<01:28, 29.46s/it]

(42797, 111)
(36620, 111)
(30182, 111)
(25967, 111)
(34232, 111)
(28313, 111)
(32748, 111)
(27578, 111)
(31826, 111)
(26967, 111)
user: 2
train data: (3227, 300, 108), train label: (3227,)
test data: (1383, 300, 108), test label: (1383,)



 50%|█████     | 2/4 [01:38<01:45, 52.95s/it]

(37223, 111)
(27886, 111)
(27825, 111)
(24089, 111)
(26717, 111)
(23157, 111)
(27681, 111)
(23880, 111)
(26495, 111)
(23253, 111)
user: 3
train data: (2685, 300, 108), train label: (2685,)
test data: (1151, 300, 108), test label: (1151,)



 75%|███████▌  | 3/4 [02:57<01:04, 64.89s/it]

(41588, 111)
(29666, 111)
(27737, 111)
(22709, 111)
(25132, 111)
(0, 111)
(22230, 111)
(0, 111)
(30527, 111)
(22263, 111)
user: 4
train data: (1641, 300, 108), train label: (1641,)
test data: (704, 300, 108), test label: (704,)



100%|██████████| 4/4 [03:26<00:00, 51.71s/it]


In [11]:
df=pd.DataFrame(y_train)

In [12]:
y_train=pd.get_dummies(y_train,dtype=int)

In [13]:
y_train=y_train.to_numpy()

In [14]:
y_test=pd.get_dummies(y_test,dtype=int)

In [15]:
y_test=y_test.to_numpy()

In [16]:
print(y_test.shape)

(704, 5)


In [17]:
X_train.shape


(1641, 300, 108)

In [18]:
import tensorflow as tf
from tensorflow import keras

In [27]:
inp=keras.layers.Input(shape=(300,108))
one=keras.layers.LSTM(64,return_sequences=True)(inp)
two=keras.layers.LSTM(64,return_sequences=True)(one)
three=keras.layers.LSTM(50)(two)
out=keras.layers.Dense(5,activation='softmax')(three)

In [39]:
model=keras.Model(inputs=inp,outputs=out)

In [40]:
model.compile(optimizer='adam',loss='categorical_crossentropy',metrics=['accuracy'])

In [41]:
model.summary()

In [43]:
hist=model.fit(X_train,y_train,batch_size=32,callbacks=tf.keras.callbacks.TensorBoard(log_dir='./logs'),validation_data=[X_test,y_test],epochs=50)

Epoch 1/50
[1m52/52[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 43ms/step - accuracy: 0.9854 - loss: 0.0475 - val_accuracy: 0.9759 - val_loss: 0.1080
Epoch 2/50
[1m52/52[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 49ms/step - accuracy: 0.9888 - loss: 0.0360 - val_accuracy: 0.9815 - val_loss: 0.0767
Epoch 3/50
[1m52/52[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 37ms/step - accuracy: 0.9951 - loss: 0.0151 - val_accuracy: 0.9801 - val_loss: 0.0991
Epoch 4/50
[1m52/52[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 36ms/step - accuracy: 0.9924 - loss: 0.0243 - val_accuracy: 0.9915 - val_loss: 0.0452
Epoch 5/50
[1m52/52[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 35ms/step - accuracy: 0.9988 - loss: 0.0073 - val_accuracy: 0.9929 - val_loss: 0.0296
Epoch 6/50
[1m52/52[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 43ms/step - accuracy: 0.9990 - loss: 0.0051 - val_accuracy: 0.9886 - val_loss: 0.0291
Epoch 7/50
[1m52/52[0m [32m━━━━

In [44]:
evel=model.evaluate(X_test,y_test,batch_size=32)

[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 21ms/step - accuracy: 0.9957 - loss: 0.0185
