In [1]:
import os

import numpy as np
import pandas as pd
import zipfile
import matplotlib.pyplot as plt
from tqdm import tqdm
import math

# 解壓縮資料

In [2]:
def unzip_data(path):
    for folder, _, files in os.walk(path):
        for file in files:
            if file.endswith('zip'):
                file_path = os.path.join(folder, file)
                print(file_path)

                sotre_path = os.path.join(folder, file.rsplit('.')[0])
                # 開啟 ZIP 壓縮檔 
                with zipfile.ZipFile(file_path, 'r') as zf:
                    # 解壓縮所有檔案至 /my/folder 目錄
                    zf.extractall(path=sotre_path)

In [3]:
# unzip_data('./swing')

In [4]:
def convert_csv(path):
    acc_df = pd.read_csv(os.path.join(path, 'Accelerometer.csv'), delimiter=',')
    gyo_df = pd.read_csv(os.path.join(path, 'Gyroscope.csv'), delimiter=',')
    linacc_df = pd.read_csv(os.path.join(path, 'Linear Accelerometer.csv'), delimiter=',')
    mag_df = pd.read_csv(os.path.join(path, 'Magnetometer.csv'), delimiter=',')
    device_df = pd.read_csv(os.path.join(path, 'meta', 'device.csv'), delimiter=',')
    time_df = pd.read_csv(os.path.join(path, 'meta', 'time.csv'), delimiter=',')
    
    acc_df.to_csv(os.path.join(path, 'Accelerometer.csv'), index=False, sep=';')
    gyo_df.to_csv(os.path.join(path, 'Gyroscope.csv'), index=False, sep=';')
    linacc_df.to_csv(os.path.join(path, 'Linear Accelerometer.csv'), index=False, sep=';')
    mag_df.to_csv(os.path.join(path, 'Magnetometer.csv'), index=False, sep=';')
    device_df.to_csv(os.path.join(path, 'meta', 'device.csv'), index=False, sep=';')
    time_df.to_csv(os.path.join(path, 'meta', 'time.csv'), index=False, sep=';')

In [5]:
# convert_csv('./pocket/202301101952/target')
# convert_csv('./pocket/202301101952/source')

# 讀檔

In [6]:
def rename_data(df):
    new_names = ['system_time', 'acc_times', 'acc_x', 'acc_y', 'acc_z', 'gyo_times', 'gyo_x', 'gyo_y', 'gyo_z', 'lin_acc_times', 'lin_acc_x', 'lin_acc_y', 'lin_acc_z', 'mag_times', 'mag_x', 'mag_y', 'mag_z']
    df.columns = new_names
    
    return df


def device_start_system_time(path):
    time_df = pd.read_csv(path, delimiter=';', index_col=0)
    time = time_df.T.loc['system time', 'START']
    
    return time


def load_original_data(path):
    acc_df = pd.read_csv(os.path.join(path, 'Accelerometer.csv'), delimiter=';')
    gyo_df = pd.read_csv(os.path.join(path, 'Gyroscope.csv'), delimiter=';')
    linacc_df = pd.read_csv(os.path.join(path, 'Linear Accelerometer.csv'), delimiter=';')
    mag_df = pd.read_csv(os.path.join(path, 'Magnetometer.csv'), delimiter=';')
    start_time = device_start_system_time(os.path.join(path, 'meta/time.csv'))
    time_df = acc_df.iloc[:, 0] + start_time
    
    total_df = pd.concat([time_df, acc_df, gyo_df, linacc_df, mag_df], axis=1)
    total_df = rename_data(total_df)
    
    return total_df

In [7]:
def align_data(source_df, target_df):
    source_start_time = source_df.loc[0, 'system_time']
    target_start_time = target_df.loc[0, 'system_time']
    
    # align start time
    if source_start_time > target_start_time:  # source start time > target start time
        target_start_idx = np.argmin(np.abs(target_df.system_time - source_start_time))
        target_df = target_df.iloc[target_start_idx:].reset_index(drop=True)
    else:  # source start time < target start time
        source_start_idx = np.argmin(np.abs(source_df.system_time - target_start_time))
        source_df = source_df.iloc[source_start_idx:].reset_index(drop=True)
        
    # align end idx
    end_idx = min(len(source_df), len(target_df))
    source_df = source_df.iloc[:end_idx]
    target_df = target_df.iloc[:end_idx]
    
    return source_df, target_df


def bound_range(df):
    start = datapoint_per_second * 35
    end = len(df) - datapoint_per_second * 20
    
    return df.iloc[start:end].reset_index(drop=True)


def split_segments(df, duration=5):
    length = datapoint_per_second * duration
    num_of_segs = int(np.floor(len(df) / length))
    
    segments = []
    for i in range(num_of_segs):
        segments.append(df.iloc[int(i * length):int((i + 1) * length)].to_numpy())
        
    return segments


def select_data(df):
    return df[['gyo_x', 'gyo_y', 'gyo_z', 'lin_acc_x', 'lin_acc_y', 'lin_acc_z', 'mag_x', 'mag_y', 'mag_z', 'system_time']]


def preprocess_data(df, duration):
    pre_df = select_data(df)
    segs = split_segments(pre_df, duration)
    
    return segs

In [8]:
# test_df = load_original_data('./front_pocket/202302071724/source')
# segs = preprocess_data(test_df)

In [9]:
# source_df = load_original_data('./front_pocket/202302071704/source')
# target_df = load_original_data('./front_pocket/202302071704/target')
# print(source_df.system_time[0], target_df.system_time[0])
# sdf, tdf = align_data(source_df, target_df)
# print(sdf.system_time[0], tdf.system_time[0])
# print(sdf.system_time[len(sdf) - 1], tdf.system_time[len(tdf) - 1])
# print(len(sdf), len(tdf))

In [10]:
datapoint_per_second = 20
duration = 2
classes = {'target': 0, 'front_pocket': 1, 'pocket': 2, 'swing': 3}

def device_version(path):
    device_df = pd.read_csv(path, delimiter=';', index_col=0)
    version = device_df.loc['deviceRelease'].value
    
    return version


def check_data_device(source_path, target_path):
    while True:
        source_version = device_version(os.path.join(source_path, 'meta/device.csv'))
        target_version = device_version(os.path.join(target_path, 'meta/device.csv'))

        print(source_path, target_path)

        if source_version[:2] == '15' and target_version[:2] == '16':
            return source_path, target_path
        elif source_version[:2] == '16' and target_version[:2] == '15':
            source_path = os.path.join(folder_path, 'target')
            target_path = os.path.join(folder_path, 'source')
            print('--- GG ---')
            continue
        else:
            raise


def load_pair_data(root_folder, class_num):
    pair_data = []

    for folder in os.listdir(root_folder):
        if folder.startswith('.'):
            continue

        folder_path = os.path.join(root_folder, folder)
        source_path = os.path.join(folder_path, 'source')
        target_path = os.path.join(folder_path, 'target')
        
        print(folder_path)
        
        #########################
        ##### check devices #####
        #########################
        source_path, target_path = check_data_device(source_path, target_path)
        
        ####################################
        ##### load and preprocess data #####
        ####################################
        source_df = load_original_data(source_path)
        target_df = load_original_data(target_path)
        
#         print(source_df.system_time[0], target_df.system_time[0])
        
        source_df, target_df = align_data(source_df, target_df)
        source_df, target_df = bound_range(source_df), bound_range(target_df)
        
#         print(source_df.system_time[0], target_df.system_time[0])
#         print(source_df.system_time[len(source_df) - 1], target_df.system_time[len(target_df) - 1])
#         print(len(source_df), len(target_df))
        
#         plt.figure(figsize=(30, 5))
#         plt.plot(np.arange(len(source_df)), source_df.acc_x)
#         plt.plot(np.arange(len(target_df)), target_df.acc_y)
#         plt.show()
        
        source_segs = preprocess_data(source_df, duration)
        target_segs = preprocess_data(target_df, duration)
        
        idx = min(len(source_segs), len(target_segs))
        source_tags = [class_num] * idx
        target_tags = [0] * idx
        
        pair_data.extend(zip(source_segs[:idx], source_tags, target_segs[:idx], target_tags))
        
    return pair_data

In [11]:
device_version('./front_pocket/202302071523/source/meta/device.csv')  # source version: 15.4

'15.4'

In [12]:
device_version('./front_pocket/202302071523/target/meta/device.csv')  # target version: 16.3

'16.3'

In [13]:
front_pocket_pair_data = load_pair_data('./front_pocket', class_num=1)
pocket_pair_data = load_pair_data('./pocket', class_num=2)
swing_pair_data = load_pair_data('./swing', class_num=3)

./front_pocket/202302071628
./front_pocket/202302071628/source ./front_pocket/202302071628/target
./front_pocket/202302071652
./front_pocket/202302071652/source ./front_pocket/202302071652/target
./front_pocket/202302071523
./front_pocket/202302071523/source ./front_pocket/202302071523/target
./front_pocket/202302071531
./front_pocket/202302071531/source ./front_pocket/202302071531/target
./front_pocket/202302071715
./front_pocket/202302071715/source ./front_pocket/202302071715/target
./front_pocket/202302071641
./front_pocket/202302071641/source ./front_pocket/202302071641/target
./front_pocket/202302071541
./front_pocket/202302071541/source ./front_pocket/202302071541/target
./front_pocket/202302071619
./front_pocket/202302071619/source ./front_pocket/202302071619/target
./front_pocket/202302071704
./front_pocket/202302071704/source ./front_pocket/202302071704/target
./front_pocket/202302071724
./front_pocket/202302071724/source ./front_pocket/202302071724/target
./pocket/20230213210

In [14]:
print(len(front_pocket_pair_data), len(pocket_pair_data), len(swing_pair_data))

1439 1374 1346


In [15]:
front_pocket_pair_data[0][0][0, -1], front_pocket_pair_data[0][2][0, -1]

(1675758113.8518965, 1675758113.7758677)

# 建立dataloader

In [16]:
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import DataLoader, Dataset
from sklearn.metrics import accuracy_score

In [17]:
# class ClassDataset(Dataset):
#     def __init__(self, data, label):
#         self.data = data
#         self.label = label

#     def __len__(self):
#         return len(self.data)

#     def __getitem__(self, idx):
#         return self.data[idx], self.label[idx]


# class SimpleRNN(nn.Module):
#     def __init__(self, seq_len=100, num_of_classes=2):
#         super(SimpleRNN, self).__init__()
        
#         self.seq_len = seq_len
        
#         self.layer0 = nn.Sequential(
#             nn.Linear(12, 24),
#             nn.LeakyReLU(),
#             nn.Linear(24, 32),
#             nn.LeakyReLU(),
#         )
        
#         self.rnn = nn.RNN(input_size=32, hidden_size=32, num_layers=2, batch_first=True)
# #         self.lstm = nn.LSTM(input_size=16, hidden_size=16, num_layers=2, batch_first=True, bidirectional=True)
        
#         self.last = nn.Sequential(
#             nn.Linear(32, 32),
#             nn.LeakyReLU(),
#             nn.Linear(32, num_of_classes),
#             nn.Softmax(dim=1),
#         )
        
#     def forward(self, x):
#         h = self.layer0(x)
        
#         hz, _ = self.rnn(h)
        
#         out = self.last(hz[:, -1])
        
#         return out

In [18]:
class PairDataset(Dataset):
    def __init__(self, source_data, source_label, target_data, target_label):
        self.source_data = source_data
        self.source_label = source_label
        self.target_data = target_data
        self.target_label = target_label

    def __len__(self):
        return len(self.source_data)

    def __getitem__(self, idx):
        return self.source_data[idx], self.source_label[idx], self.target_data[idx], self.target_label[idx]


class NotSimpleRNN(nn.Module):
    def __init__(self, seq_len=100, num_of_classes=2):
        super(NotSimpleRNN, self).__init__()
        
        self.seq_len = seq_len
        
        ################
        ### Imu Part ###
        ################
        self.imu_layer = nn.Sequential(
            nn.Linear(8, 16),
            nn.Tanh(),
            nn.Linear(16, 32),
            nn.LeakyReLU(),
        )
        
        self.imu_rnn = nn.LSTM(input_size=32, hidden_size=32, num_layers=2, batch_first=True)
#         self.lstm = nn.LSTM(input_size=16, hidden_size=16, num_layers=2, batch_first=True, bidirectional=True)
        
        self.imu_last = nn.Sequential(
            nn.Linear(32, 32),
            nn.LeakyReLU(),
            nn.Linear(32, num_of_classes),
            nn.Softmax(dim=1),
        )
        
        ################
        ### Mag Part ###
        ################
        self.mag_layer = nn.Sequential(
            nn.Linear(3, 16),
            nn.LeakyReLU(),
            nn.Linear(16, 16),
            nn.LeakyReLU(),
        )
        
        self.mag_rnn_encoder = nn.LSTM(input_size=16, hidden_size=16, num_layers=4, batch_first=True)
        self.mag_rnn_decoder = nn.LSTM(input_size=48, hidden_size=16, num_layers=4, batch_first=True)
        self.mag_last = nn.Sequential(
            nn.Linear(16, 8),
            nn.LeakyReLU(),
            nn.Linear(8, 3),
        )
        
    def forward(self, source_imu, source_mag):
        ### imu part ###
        source_h = self.imu_layer(source_imu)
        source_hz, _ = self.imu_rnn(source_h)
        source_predict_probability = self.imu_last(source_hz[:, -1])
        
        ### mag part ###
        source_mag_h = self.mag_layer(source_mag)  # (batch, seq_len, 16)
        source_mag_hz, _ = self.mag_rnn_encoder(source_mag_h)  # (batch, seq_len, 16)
        
        source_latent = torch.concat([source_hz, source_mag_hz], dim=-1)  # (batch, seq_len, 48)
        
        predict_mag_latent, _ = self.mag_rnn_decoder(source_latent)
        predict_mag = self.mag_last(predict_mag_latent)
        
        return source_predict_probability, predict_mag

In [19]:
def get_tgt_mask(size) -> torch.tensor:
    # Generates a squeare matrix where the each row allows one word more to be seen
    mask = torch.tril(torch.ones(size, size) == 1) # Lower triangular matrix
    mask = mask.float()
    mask = mask.masked_fill(mask == 0, float('-inf')) # Convert zeros to -inf
    mask = mask.masked_fill(mask == 1, float(0.0)) # Convert ones to 0

    # EX for size=5:
    # [[0., -inf, -inf, -inf, -inf],
    #  [0.,   0., -inf, -inf, -inf],
    #  [0.,   0.,   0., -inf, -inf],
    #  [0.,   0.,   0.,   0., -inf],
    #  [0.,   0.,   0.,   0.,   0.]]

    return mask

In [20]:
class NotSimpleTransformer(nn.Module):
    def __init__(self, seq_len=100, num_of_classes=2):
        super(NotSimpleTransformer, self).__init__()
        
        self.seq_len = seq_len
        self.pos_encoder = PositionalEncoding(64, 0.1)
        ################
        ### Imu Part ###
        ################
#         self.imu_layer = nn.Sequential(
#             nn.Linear(8, 16),
#             nn.LeakyReLU(),
#             nn.Linear(16, 16),
#             nn.LeakyReLU(),
#         )
        
#         self.imu_rnn = nn.LSTM(input_size=16, hidden_size=16, num_layers=2, batch_first=True)
        
#         self.imu_last = nn.Sequential(
#             nn.Linear(16, 16),
#             nn.LeakyReLU(),
#             nn.Linear(16, num_of_classes),
#             nn.Softmax(dim=1),
#         )
        
        ################
        ### Mag Part ###
        ################
        self.mag_layer = nn.Sequential(
            nn.Linear(3, 16),
            nn.LeakyReLU(),
            nn.Linear(16, 64),
            nn.LeakyReLU(),
        )
        
        self.mag_transformer_encoder = nn.TransformerEncoder(nn.TransformerEncoderLayer(d_model=64, nhead=8, dropout=0.1, batch_first=True), num_layers=4)
        self.mag_transformer_decoder = nn.TransformerDecoder(nn.TransformerDecoderLayer(d_model=64, nhead=8, dropout=0.1, batch_first=True), num_layers=4)
        self.mag_last = nn.Sequential(
            nn.Linear(64, 16),
            nn.LeakyReLU(),
            nn.Linear(16, 3),
        )
        
    def forward(self, source_mag, target_mag=None):
        ### imu part ###
        if target_mag != None:

            ### mag part ###
            source_mag_h = self.mag_layer(source_mag)  # (batch, seq_len, 16)
            source_mag_h = self.pos_encoder(source_mag_h)
            target_mag = torch.concat([torch.zeros(len(target_mag), 1,  3).to(source_mag.device), target_mag], dim=1)
            tgt = self.mag_layer(target_mag)
            tgt = self.pos_encoder(tgt)
            source_mag_hz= self.mag_transformer_encoder(source_mag_h)  # (batch, seq_len, 16)

#             source_latent = torch.add(source_hz, source_mag_hz)  # (batch, seq_len, 48)
            tgt_mask = get_tgt_mask()
            predict_mag_latent = self.mag_transformer_decoder(tgt, source_mag_hz)
            predict_mag = self.mag_last(predict_mag_latent)

            return predict_mag
        
        else:
            
#             source_h = self.imu_layer(source_imu)
#             source_hz, _ = self.imu_rnn(source_h)
#             source_predict_probability = self.imu_last(source_hz[:, -1])

            ### mag part ###
            source_mag_h = self.mag_layer(source_mag)  # (batch, seq_len, 16)
            source_mag_h = self.pos_encoder(source_mag_h)
            source_mag_hz= self.mag_transformer_encoder(source_mag_h)  # (batch, seq_len, 16)

#             source_latent = torch.add(source_hz, source_mag_hz)  # (batch, seq_len, 48)
            tgt = torch.zeros_like(source_mag_hz)
            for i in range(source_mag_hz.size(1)):
                tgt_pos = self.pos_encoder(torch.zeros(source_mag_hz.size(0), 1, source_mag_hz.size(-1)).to(source_mag_hz.device) + i)
                decode_position = self.mag_transformer_decoder(tgt_pos, source_mag_hz[:, :i+1, :], memory_key_padding_mask=None)
                tgt[:, i, :] = decode_position[:, -1, :]
            predict_mag = self.mag_last(tgt)
            return predict_mag
        
class PositionalEncoding(nn.Module):
    def __init__(self, d_model, dropout=0.1, max_len=5000):
        super(PositionalEncoding, self).__init__()
        self.dropout = nn.Dropout(p=dropout)

        pe = torch.zeros(max_len, d_model)
        position = torch.arange(0, max_len, dtype=torch.float).unsqueeze(1)
        div_term = torch.exp(torch.arange(0, d_model, 2).float() * (-math.log(10000.0) / d_model))
        pe[:, 0::2] = torch.sin(position * div_term)
        pe[:, 1::2] = torch.cos(position * div_term)
        pe = pe.unsqueeze(0).transpose(0, 1)
        self.register_buffer('pe', pe)

    def forward(self, x):
        x = x + self.pe[:x.size(0), :].to(x.device)
        return self.dropout(x)


In [21]:
front_pocket_half = int(len(front_pocket_pair_data) / 2)
pocket_half = int(len(pocket_pair_data) / 2)
swing_half = int(len(swing_pair_data) / 2)

train_data = front_pocket_pair_data[:front_pocket_half] #+ pocket_pair_data[:pocket_half] + swing_pair_data[:swing_half]
valid_data = front_pocket_pair_data[front_pocket_half:] #+ pocket_pair_data[pocket_half:] + swing_pair_data[swing_half:]

# train
train_source_data = np.array([d[0] for d in train_data])
train_source_label = np.array([d[1] for d in train_data])
train_target_data = np.array([d[2] for d in train_data])
train_target_label = np.array([d[3] for d in train_data])
train_dataset = PairDataset(
                    source_data = torch.tensor(train_source_data, dtype=torch.float),
                    source_label = train_source_label,
                    target_data = torch.tensor(train_target_data, dtype=torch.float),
                    target_label = train_target_label,
                )
train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)

# valid
valid_source_data = np.array([d[0] for d in valid_data])
valid_source_label = np.array([d[1] for d in valid_data])
valid_target_data = np.array([d[2] for d in valid_data])
valid_target_label = np.array([d[3] for d in valid_data])
valid_dataset = PairDataset(
                    source_data = torch.tensor(valid_source_data, dtype=torch.float),
                    source_label = valid_source_label,
                    target_data = torch.tensor(valid_target_data, dtype=torch.float),
                    target_label = valid_target_label,
                )
valid_loader = DataLoader(valid_dataset, batch_size=32)

In [22]:
batch = next(iter(train_loader))

In [32]:
#batch[0][0]

In [24]:
EPOCH = 1000
num_of_classes = 4
device = torch.device("cuda" if (torch.cuda.is_available()) else "cpu")

In [25]:
model = NotSimpleTransformer(seq_len=int(datapoint_per_second * duration), num_of_classes=num_of_classes).to(device)
optimizer = torch.optim.Adam(model.parameters(), lr=0.01)
ce_loss = torch.nn.CrossEntropyLoss()
mse_loss = torch.nn.MSELoss()

In [33]:
def train(model, dataloader, optimizer):
    model.train()

    losses = []
    pred_loss = []

    for source_data, source_label, target_data, target_label in tqdm(dataloader):
        optimizer.zero_grad()

        source_data = source_data.to(device)
        target_data = target_data.to(device)

        one_hot = F.one_hot(source_label, num_classes=num_of_classes).to(device).float()

        predict_mag = model(source_data[:, :, 6:9], target_data[:, :-1, 6:9])
        torch.cuda.empty_cache()
        #_, predict_classes = torch.max(predict_probability, 1)

#         class_loss = ce_loss(predict_probability, one_hot)
        predict_loss = mse_loss(predict_mag, target_data[:, :, 9:12])
        loss = predict_loss

        # backward
        loss.backward()
#         class_loss.backward()
#         predict_loss.backward()
        optimizer.step()
        
        losses.append(loss.item())
        pred_loss.append(predict_loss.item())
    
    return np.mean(losses), np.mean(pred_loss)

In [34]:
def evalute(model, dataloader):
    model.eval()

    losses = []
    pred_loss = []
    
    with torch.no_grad():
        for source_data, source_label, target_data, target_label in dataloader:
            source_data = source_data.to(device)
            target_data = target_data.to(device)
            
            one_hot = F.one_hot(source_label, num_classes=num_of_classes).to(device).float()

            #############
            # generator #
            #############
            predict_mag = model(source_data[:, :, 9:12])
            #_, predict_classes = torch.max(predict_probability, 1)

            #class_loss = ce_loss(predict_probability, one_hot)
            predict_loss = mse_loss(predict_mag, target_data[:, :, 9:12])
            loss = predict_loss
            
            losses.append(loss.item())
            pred_loss.append(predict_loss.item())
    
    return np.mean(losses), np.mean(pred_loss)

In [35]:
for epoch in range(EPOCH):
    #####
    # 1. 用上半部訓練50epoch
    # 2. 隨機用上或下半部訓練Model
    # 3. 印出trajectory結果
    #####
    
    train_loss, train_pred_loss = train(model, train_loader, optimizer)
    valid_loss, valid_pred_loss = evalute(model, valid_loader)
    
    ep = str(epoch).zfill(5)

    print(f'{ep}: train total loss: {train_loss: 2.3f}, pred loss: {train_pred_loss: 2.3f}, valid total loss: {valid_loss: 2.3f}, pred loss: {valid_pred_loss: 2.3f}')

  0%|          | 0/23 [00:00<?, ?it/s]


TypeError: get_tgt_mask() missing 1 required positional argument: 'size'

In [None]:
def output_eval(model, dataloader):
    model.eval()

    losses = []
    accuracies = []
    
    with torch.no_grad():
        for source_data, source_label, target_data, target_label in dataloader:
            source_data = source_data.to(device)
            target_data = target_data.to(device)
            
            one_hot = F.one_hot(source_label, num_classes=num_of_classes).to(device).float()

            #############
            # generator #
            #############
            predict_mag = model(source_data[:, :, 9:12])
            #_, predict_classes = torch.max(predict_probability, 1)

            #class_loss = ce_loss(predict_probability, one_hot)
            predict_loss = mse_loss(predict_mag, target_data[:, :, 9:12])
            loss = predict_loss
            
            #print(f'{i: >3} predict class: {predict_classes.cpu().detach().numpy()}')
            #print(f'{"": >3}  ground truth: {labels.numpy()}')

            #loss = bce_loss(predict_probability, one_hot)

            losses.append(loss.item())
            #accuracies.append(accuracy_score(labels.cpu().detach().numpy(), predict_classes.cpu().detach().numpy()))
            
    print(f'loss: {np.mean(losses): 2.3f}')

In [None]:
output_eval(model, train_loader)