In [1]:
import os

import numpy as np
import pandas as pd
import zipfile
import matplotlib.pyplot as plt

# 解壓縮資料

In [2]:
def unzip_data(path):
    for folder, _, files in os.walk(path):
        for file in files:
            if file.endswith('zip'):
                file_path = os.path.join(folder, file)
                print(file_path)

                sotre_path = os.path.join(folder, file.rsplit('.')[0])
                # 開啟 ZIP 壓縮檔 
                with zipfile.ZipFile(file_path, 'r') as zf:
                    # 解壓縮所有檔案至 /my/folder 目錄
                    zf.extractall(path=sotre_path)

In [3]:
# unzip_data('./swing')

In [3]:
def convert_csv(path):
    acc_df = pd.read_csv(os.path.join(path, 'Accelerometer.csv'), delimiter=',')
    gyo_df = pd.read_csv(os.path.join(path, 'Gyroscope.csv'), delimiter=',')
    linacc_df = pd.read_csv(os.path.join(path, 'Linear Accelerometer.csv'), delimiter=',')
    mag_df = pd.read_csv(os.path.join(path, 'Magnetometer.csv'), delimiter=',')
    device_df = pd.read_csv(os.path.join(path, 'meta', 'device.csv'), delimiter=',')
    time_df = pd.read_csv(os.path.join(path, 'meta', 'time.csv'), delimiter=',')
    
    acc_df.to_csv(os.path.join(path, 'Accelerometer.csv'), index=False, sep=';')
    gyo_df.to_csv(os.path.join(path, 'Gyroscope.csv'), index=False, sep=';')
    linacc_df.to_csv(os.path.join(path, 'Linear Accelerometer.csv'), index=False, sep=';')
    mag_df.to_csv(os.path.join(path, 'Magnetometer.csv'), index=False, sep=';')
    device_df.to_csv(os.path.join(path, 'meta', 'device.csv'), index=False, sep=';')
    time_df.to_csv(os.path.join(path, 'meta', 'time.csv'), index=False, sep=';')

In [5]:
# convert_csv('./pocket/202301101952/target')
# convert_csv('./pocket/202301101952/source')

# 讀檔

In [2]:
def rename_data(df):
    new_names = ['system_time', 'acc_times', 'acc_x', 'acc_y', 'acc_z', 'gyo_times', 'gyo_x', 'gyo_y', 'gyo_z', 'lin_acc_times', 'lin_acc_x', 'lin_acc_y', 'lin_acc_z', 'mag_times', 'mag_x', 'mag_y', 'mag_z']
    df.columns = new_names
    
    return df


def device_start_system_time(path):
    time_df = pd.read_csv(path, delimiter=';', index_col=0)
    time = time_df.T.loc['system time', 'START']
    
    return time


def load_original_data(path):
    acc_df = pd.read_csv(os.path.join(path, 'Accelerometer.csv'), delimiter=';')
    gyo_df = pd.read_csv(os.path.join(path, 'Gyroscope.csv'), delimiter=';')
    linacc_df = pd.read_csv(os.path.join(path, 'Linear Accelerometer.csv'), delimiter=';')
    mag_df = pd.read_csv(os.path.join(path, 'Magnetometer.csv'), delimiter=';')
    start_time = device_start_system_time(os.path.join(path, 'meta/time.csv'))
    time_df = acc_df.iloc[:, 0] + start_time
    
    total_df = pd.concat([time_df, acc_df, gyo_df, linacc_df, mag_df], axis=1)
    total_df = rename_data(total_df)
    
    return total_df

In [3]:
def align_data(source_df, target_df):
    source_start_time = source_df.loc[0, 'system_time']
    target_start_time = target_df.loc[0, 'system_time']
    
    # align start time
    if source_start_time > target_start_time:  # source start time > target start time
        target_start_idx = np.argmin(np.abs(target_df.system_time - source_start_time))
        target_df = target_df.iloc[target_start_idx:].reset_index(drop=True)
    else:  # source start time < target start time
        source_start_idx = np.argmin(np.abs(source_df.system_time - target_start_time))
        source_df = source_df.iloc[source_start_idx:].reset_index(drop=True)
        
    # align end idx
    end_idx = min(len(source_df), len(target_df))
    source_df = source_df.iloc[:end_idx]
    target_df = target_df.iloc[:end_idx]
    
    return source_df, target_df


def bound_range(df):
    start = datapoint_per_second * 35
    end = len(df) - datapoint_per_second * 20
    
    return df.iloc[start:end].reset_index(drop=True)


def split_segments(df, seq_len=5, chunk_size=26):
#     length = datapoint_per_second * duration
    length = seq_len * chunk_size
    num_of_segs = int(np.floor(len(df) / length))
    
    segments = []
    for i in range(num_of_segs):
        seg = df.iloc[int(i * length):int((i + 1) * length)].to_numpy()
        segments.append(np.array(np.split(seg, seq_len)))
        
    return segments


def select_data(df):
    return df[['acc_x', 'acc_y', 'acc_z', 'gyo_x', 'gyo_y', 'gyo_z', 'lin_acc_x', 'lin_acc_y', 'lin_acc_z', 'mag_x', 'mag_y', 'mag_z', 'system_time']]


def preprocess_data(df, duration):
    pre_df = select_data(df)
    segs = split_segments(pre_df, 5)
    
    return segs

In [4]:
# test_df = load_original_data('./front_pocket/202302071724/source')
# segs = preprocess_data(test_df)

In [4]:
datapoint_per_second = 20
duration = 2
classes = {'target': 0, 'front_pocket': 1, 'pocket': 2, 'swing': 3}

def device_version(path):
    device_df = pd.read_csv(path, delimiter=';', index_col=0)
    version = device_df.loc['deviceRelease'].value
    
    return version


def check_data_device(source_path, target_path):
    while True:
        source_version = device_version(os.path.join(source_path, 'meta/device.csv'))
        target_version = device_version(os.path.join(target_path, 'meta/device.csv'))

        print(source_path, target_path)

        if source_version[:2] == '15' and target_version[:2] == '16':
            return source_path, target_path
        elif source_version[:2] == '16' and target_version[:2] == '15':
            source_path = os.path.join(folder_path, 'target')
            target_path = os.path.join(folder_path, 'source')
            print('--- GG ---')
            continue
        else:
            raise


def load_pair_data(root_folder, class_num):
    pair_data = []

    for folder in os.listdir(root_folder):
        if folder.startswith('.'):
            continue

        folder_path = os.path.join(root_folder, folder)
        source_path = os.path.join(folder_path, 'source')
        target_path = os.path.join(folder_path, 'target')
        
        print(folder_path)
        
        #########################
        ##### check devices #####
        #########################
        source_path, target_path = check_data_device(source_path, target_path)
        
        ####################################
        ##### load and preprocess data #####
        ####################################
        source_df = load_original_data(source_path)
        target_df = load_original_data(target_path)
        
        source_df, target_df = align_data(source_df, target_df)
        source_df, target_df = bound_range(source_df), bound_range(target_df)
        
        source_segs = preprocess_data(source_df, duration)
        target_segs = preprocess_data(target_df, duration)
        
        idx = min(len(source_segs), len(target_segs))
        source_tags = [class_num] * idx
        target_tags = [0] * idx
        
        pair_data.extend(zip(source_segs[:idx], source_tags, target_segs[:idx], target_tags))
        
    return pair_data


In [5]:
device_version('./front_pocket/202302071523/source/meta/device.csv')  # source version: 15.4

'15.4'

In [6]:
device_version('./front_pocket/202302071523/target/meta/device.csv')  # target version: 16.3

'16.3'

In [7]:
front_pocket_pair_data = load_pair_data('./front_pocket', class_num=1)
pocket_pair_data = load_pair_data('./pocket', class_num=2)
swing_pair_data = load_pair_data('./swing', class_num=3)

./front_pocket/202302071628
./front_pocket/202302071628/source ./front_pocket/202302071628/target
./front_pocket/202302071652
./front_pocket/202302071652/source ./front_pocket/202302071652/target
./front_pocket/202302071523
./front_pocket/202302071523/source ./front_pocket/202302071523/target
./front_pocket/202302071531
./front_pocket/202302071531/source ./front_pocket/202302071531/target
./front_pocket/202302071715
./front_pocket/202302071715/source ./front_pocket/202302071715/target
./front_pocket/202302071641
./front_pocket/202302071641/source ./front_pocket/202302071641/target
./front_pocket/202302071541
./front_pocket/202302071541/source ./front_pocket/202302071541/target
./front_pocket/202302071619
./front_pocket/202302071619/source ./front_pocket/202302071619/target
./front_pocket/202302071704
./front_pocket/202302071704/source ./front_pocket/202302071704/target
./front_pocket/202302071724
./front_pocket/202302071724/source ./front_pocket/202302071724/target
./pocket/20230213210

In [8]:
print(len(front_pocket_pair_data), len(pocket_pair_data), len(swing_pair_data))

441 420 412


# 建立dataloader

In [9]:
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import DataLoader, Dataset
from sklearn.metrics import accuracy_score

In [10]:
class ClassDataset(Dataset):
    def __init__(self, data, label):
        self.data = data
        self.label = label

    def __len__(self):
        return len(self.data)

    def __getitem__(self, idx):
        return self.data[idx], self.label[idx]

In [81]:
class SimpleRNN(nn.Module):
    def __init__(self, seq_len=5, chunk_size=21, num_of_classes=2):
        super(SimpleRNN, self).__init__()
        
        self.seq_len = seq_len
        self.chunk_size = chunk_size
        
#         self.feature_extractor = nn.Sequential(
#             nn.Linear(9, 16),
#             nn.LeakyReLU(),
#             nn.Linear(16, 16),
#             nn.LeakyReLU(),
#         )
        
        self.cnn = nn.Sequential(
            nn.Conv2d(1, 5, kernel_size=(1, 5)),
            nn.LeakyReLU(),
            nn.Conv2d(5, 5, kernel_size=(1, 5)),
            nn.LeakyReLU(),
        )
        
        self.rnn = nn.RNN(input_size=13, hidden_size=13, num_layers=2, batch_first=True)
#         self.lstm = nn.LSTM(input_size=16, hidden_size=16, num_layers=2, batch_first=True, bidirectional=True)
        
        self.last = nn.Sequential(
            nn.Linear(13, 8),
            nn.LeakyReLU(),
            nn.Linear(8, num_of_classes),
            nn.Softmax(dim=1),
        )
        
    def forward(self, x):
#         h = self.feature_extractor(x)
        print(x.shape)
        h = torch.reshape(x, (len(x) * self.seq_len, self.chunk_size, -1))
        print(h.shape)
        h = torch.permute(h, (0, 2, 1))[:, None, :]
        print(h.shape)
        
        h = self.cnn(h)
        print(h.shape)
        h = torch.permute(h, (0, 2, 1))
        h = torch.reshape(h, (len(x), self.seq_len, -1))
        hz, _ = self.rnn(h)
        
        out = self.last(hz[:, -1])
        
        return out

In [11]:
class SimpleRNN(nn.Module):
    def __init__(self, seq_len=5, chunk_size=21, num_of_classes=2):
        super(SimpleRNN, self).__init__()
        
        self.seq_len = seq_len
        self.chunk_size = chunk_size
        self.num_of_classes = num_of_classes
        
#         self.feature_extractor = nn.Sequential(
#             nn.Linear(9, 16),
#             nn.LeakyReLU(),
#             nn.Linear(16, 16),
#             nn.LeakyReLU(),
#         )
        
        self.cnn = nn.Sequential(
            nn.Conv1d(9, 16, kernel_size=5),
            nn.LeakyReLU(),
            nn.Conv1d(16, 16, kernel_size=5),
            nn.LeakyReLU(),
        )
        
        # 16 * (chunk_size-8)
        self.rnn = nn.RNN(input_size=16 * (chunk_size - 8), hidden_size=64, num_layers=2, batch_first=True)
#         self.lstm = nn.LSTM(input_size=16, hidden_size=16, num_layers=2, batch_first=True, bidirectional=True)
        
        self.last = nn.Sequential(
            nn.Linear(64, 16),
            nn.Tanh(),
            nn.Linear(16, num_of_classes),
            nn.Softmax(dim=2),
        )
        
    def forward(self, x):
#         h = self.feature_extractor(x)
        h = torch.reshape(x, (len(x) * self.seq_len, self.chunk_size, -1))
        h = torch.permute(h, (0, 2, 1))
        
        h = self.cnn(h)

        h = torch.permute(h, (0, 2, 1))
        h = torch.reshape(h, (len(x), self.seq_len, -1))
        
        hz, _ = self.rnn(h)
        out = self.last(hz)

        out = torch.reshape(out, (len(x), self.seq_len, self.num_of_classes))
        
        return out

In [80]:
front_pocket_half = min(int(len(front_pocket_pair_data) * 0.5), 1)
pocket_half = min(int(len(pocket_pair_data) * 0.5), 1)
swing_half = min(int(len(swing_pair_data) * 0.5), 1)

train_data = front_pocket_pair_data[:front_pocket_half] + pocket_pair_data[:pocket_half] + swing_pair_data[:swing_half]
valid_data = front_pocket_pair_data[front_pocket_half:] + pocket_pair_data[pocket_half:] + swing_pair_data[swing_half:]

# train
t_data = np.array([d[0] for d in train_data] + [d[2] for d in train_data[::3]])
t_label = [d[1] for d in train_data] + [d[3] for d in train_data[::3]]
train_dataset = ClassDataset(
                    data = torch.tensor(t_data, dtype=torch.float),
                    label = t_label,
                )
train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)

# valid
v_data = np.array([d[0] for d in valid_data] + [d[2] for d in valid_data])
v_label = [d[1] for d in valid_data] + [d[3] for d in valid_data]
valid_dataset = ClassDataset(
                    data = torch.tensor(v_data, dtype=torch.float),
                    label = v_label,
                )
valid_loader = DataLoader(valid_dataset, batch_size=32)

np.unique(t_label, return_counts=True), np.unique(v_label, return_counts=True)

((array([0, 1, 2, 3]), array([1, 1, 1, 1])),
 (array([0, 1, 2, 3]), array([1270,  440,  419,  411])))

In [52]:
batch = next(iter(train_loader))

In [53]:
batch[0][0].shape

torch.Size([5, 26, 13])

In [65]:
EPOCH = 500
num_of_classes = 4
seq_len = 5
chunk_size = 26
device = torch.device("cuda" if (torch.cuda.is_available()) else "cpu")

In [81]:
model = SimpleRNN(seq_len=seq_len, chunk_size=chunk_size, num_of_classes=num_of_classes).to(device)
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)
lr_scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(optimizer, T_max=50, eta_min=0.0000001)
ce_loss = torch.nn.CrossEntropyLoss()

In [56]:
def train(model, dataloader, optimizer):
    model.train()

    losses = []
    accuracies = []

    for sequences, labels in dataloader:
        optimizer.zero_grad()

        sequences = sequences.to(device)
        one_hot = F.one_hot(labels, num_classes=num_of_classes).to(device).float()
        one_hot = one_hot[:, None, :].repeat((1, seq_len, 1)).float()

        #############
        # generator #
        #############
        predict_probability = model(sequences[:, :, :, :9])
        
        predict_probability = predict_probability.reshape((-1, num_of_classes))
        one_hot = one_hot.reshape((-1, num_of_classes))
        loss = ce_loss(predict_probability, one_hot)
        
        # backward
        loss.backward()
        optimizer.step()
        
        _, predict_classes = torch.max(predict_probability, 1)
        predict_classes = predict_classes.cpu().detach().numpy()
        _, labels = torch.max(one_hot, 1)
        labels = labels.cpu().detach().numpy()
        
        losses.append(loss.item())
        accuracies.append(accuracy_score(labels, predict_classes))
    
    return np.mean(losses), np.mean(accuracies)

In [57]:
def evalute(model, dataloader):
    model.eval()

    losses = []
    accuracies = []
    
    with torch.no_grad():
        for sequences, labels in dataloader:
            
            sequences = sequences.to(device)
            one_hot = F.one_hot(labels, num_classes=num_of_classes).to(device).float()
            one_hot = one_hot[:, None, :].repeat((1, seq_len, 1)).float()

            #############
            # generator #
            #############
            predict_probability = model(sequences[:, :, :, :9])
            
            predict_probability = predict_probability.reshape((-1, num_of_classes))
            one_hot = one_hot.reshape((-1, num_of_classes))
            loss = ce_loss(predict_probability, one_hot)

            _, predict_classes = torch.max(predict_probability, 1)
            predict_classes = predict_classes.cpu().detach().numpy()
            _, labels = torch.max(one_hot, 1)
            labels = labels.cpu().detach().numpy()
            
            losses.append(loss.item())
            accuracies.append(accuracy_score(labels, predict_classes))
    
    return np.mean(losses), np.mean(accuracies)

In [82]:
for epoch in range(EPOCH):
    train_loss, train_acc = train(model, train_loader, optimizer)
    lr_scheduler.step()
    valid_loss, valid_acc = evalute(model, valid_loader)
    
    
    
    ep = str(epoch).zfill(5)

    print(f'{ep}: train loss: {train_loss:2.3f}, acc: {train_acc:2.3f}   valid loss: {valid_loss:2.3f}, acc: {valid_acc:2.3f}')

00000: train loss: 1.379, acc: 0.350   valid loss: 1.376, acc: 0.240
00001: train loss: 1.354, acc: 0.500   valid loss: 1.363, acc: 0.329
00002: train loss: 1.327, acc: 0.700   valid loss: 1.351, acc: 0.404
00003: train loss: 1.299, acc: 0.750   valid loss: 1.339, acc: 0.431
00004: train loss: 1.271, acc: 0.800   valid loss: 1.327, acc: 0.488
00005: train loss: 1.243, acc: 0.800   valid loss: 1.314, acc: 0.517
00006: train loss: 1.217, acc: 0.800   valid loss: 1.302, acc: 0.546
00007: train loss: 1.192, acc: 0.800   valid loss: 1.291, acc: 0.579
00008: train loss: 1.169, acc: 0.850   valid loss: 1.279, acc: 0.608
00009: train loss: 1.148, acc: 0.950   valid loss: 1.269, acc: 0.629
00010: train loss: 1.129, acc: 0.950   valid loss: 1.258, acc: 0.646
00011: train loss: 1.112, acc: 0.950   valid loss: 1.249, acc: 0.659
00012: train loss: 1.096, acc: 0.950   valid loss: 1.240, acc: 0.672
00013: train loss: 1.081, acc: 1.000   valid loss: 1.231, acc: 0.680
00014: train loss: 1.067, acc: 1.0

In [45]:
def output_eval(model, dataloader):
    model.eval()

    losses = []
    accuracies = []
    
    with torch.no_grad():
        for i, (sequences, labels) in enumerate(dataloader):
            
            sequences = sequences.to(device)
#             labels = labels.to(device)
            one_hot = F.one_hot(labels, num_classes=num_of_classes).to(device)
            one_hot = one_hot[:, None, :].repeat((1, 5, 1)).float()

            #############
            # generator #
            #############
            predict_probability = model(sequences[:, :, :, :9])

            predict_probability = predict_probability.reshape((-1, num_of_classes))
            one_hot = one_hot.reshape((-1, num_of_classes))
            _, predict_classes = torch.max(predict_probability, 1)
            predict_classes = predict_classes.cpu().detach().numpy()
            _, labels = torch.max(one_hot, 1)
            labels = labels.cpu().detach().numpy()
            
            print(f'{i: >3} predict class: {predict_classes}')
            print(f'{i: >3}  ground truth: {labels}')

            loss = ce_loss(predict_probability, one_hot)

            losses.append(loss.item())
            accuracies.append(accuracy_score(labels, predict_classes))
            
    print(f'loss: {np.mean(losses): 2.3f}, accuracy: {np.mean(accuracies): 2.3f}')

In [71]:
output_eval(model, valid_loader)

  0 predict class: [1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
 1 1 1 1 1 1 1 1 1 1 1 1]
  0  ground truth: [1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
 1 1 1 1 1 1 1 1 1 1 1 1]
  1 predict class: [1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1