In [1]:
import os

import numpy as np
import pandas as pd
import zipfile
import matplotlib.pyplot as plt

# 解壓縮資料

In [2]:
def unzip_data(path):
    for folder, _, files in os.walk(path):
        for file in files:
            if file.endswith('zip'):
                file_path = os.path.join(folder, file)
                print(file_path)

                sotre_path = os.path.join(folder, file.rsplit('.')[0])
                # 開啟 ZIP 壓縮檔 
                with zipfile.ZipFile(file_path, 'r') as zf:
                    # 解壓縮所有檔案至 /my/folder 目錄
                    zf.extractall(path=sotre_path)

In [3]:
# unzip_data('./swing')

In [4]:
def convert_csv(path):
    acc_df = pd.read_csv(os.path.join(path, 'Accelerometer.csv'), delimiter=',')
    gyo_df = pd.read_csv(os.path.join(path, 'Gyroscope.csv'), delimiter=',')
    linacc_df = pd.read_csv(os.path.join(path, 'Linear Accelerometer.csv'), delimiter=',')
    mag_df = pd.read_csv(os.path.join(path, 'Magnetometer.csv'), delimiter=',')
    device_df = pd.read_csv(os.path.join(path, 'meta', 'device.csv'), delimiter=',')
    time_df = pd.read_csv(os.path.join(path, 'meta', 'time.csv'), delimiter=',')
    
    acc_df.to_csv(os.path.join(path, 'Accelerometer.csv'), index=False, sep=';')
    gyo_df.to_csv(os.path.join(path, 'Gyroscope.csv'), index=False, sep=';')
    linacc_df.to_csv(os.path.join(path, 'Linear Accelerometer.csv'), index=False, sep=';')
    mag_df.to_csv(os.path.join(path, 'Magnetometer.csv'), index=False, sep=';')
    device_df.to_csv(os.path.join(path, 'meta', 'device.csv'), index=False, sep=';')
    time_df.to_csv(os.path.join(path, 'meta', 'time.csv'), index=False, sep=';')

In [5]:
# convert_csv('./pocket/202301101952/target')
# convert_csv('./pocket/202301101952/source')

# 讀檔

In [6]:
def rename_data(df):
    new_names = ['acc_times', 'acc_x', 'acc_y', 'acc_z', 'gyo_times', 'gyo_x', 'gyo_y', 'gyo_z', 'lin_acc_times', 'lin_acc_x', 'lin_acc_y', 'lin_acc_z', 'mag_times', 'mag_x', 'mag_y', 'mag_z']
    df.columns = new_names
    
    return df


def load_original_data(path):
    acc_df = pd.read_csv(os.path.join(path, 'Accelerometer.csv'), delimiter=';')
    gyo_df = pd.read_csv(os.path.join(path, 'Gyroscope.csv'), delimiter=';')
    linacc_df = pd.read_csv(os.path.join(path, 'Linear Accelerometer.csv'), delimiter=';')
    mag_df = pd.read_csv(os.path.join(path, 'Magnetometer.csv'), delimiter=';')
    
    total_df = pd.concat([acc_df, gyo_df, linacc_df, mag_df], axis=1)
    total_df = rename_data(total_df)
    
    return total_df

In [7]:
def bound_range(df):
    start = datapoint_per_second * 35
    end = len(df) - datapoint_per_second * 20
    
    return df.iloc[start:end]


def split_segments(df, duration=5):
    length = datapoint_per_second * duration
    len_of_segs = int(np.floor(len(df) / length))
    
    segments = []
    for i in range(len_of_segs):
        segments.append(df.iloc[int(i * length):int((i + 1) * length)].to_numpy())
        
    return segments


def select_data(df):
    return df[['acc_x', 'acc_y', 'acc_z', 'gyo_x', 'gyo_y', 'gyo_z', 'lin_acc_x', 'lin_acc_y', 'lin_acc_z', 'mag_x', 'mag_y', 'mag_z']]


def preprocess_data(df, duration):
    pre_df = bound_range(df)
    pre_df = select_data(pre_df)
    segs = split_segments(pre_df, duration)
    
    return segs

In [8]:
# test_df = load_original_data('./front_pocket/202302071724/source')
# segs = preprocess_data(test_df)

In [9]:
datapoint_per_second = 20
duration = 2
classes = {'target': 0, 'front_pocket': 1, 'pocket': 2, 'swing': 3}

def device_version(path):
    device_df = pd.read_csv(os.path.join(path, 'meta/device.csv'), delimiter=';', index_col=0)
    version = device_df.loc['deviceRelease'].value
    
    return version

def load_pair_data(root_folder, class_num):
    pair_data = []

    for folder in os.listdir(root_folder):
        if folder.startswith('.'):
            continue

        folder_path = os.path.join(root_folder, folder)
        source_path = os.path.join(folder_path, 'source')
        target_path = os.path.join(folder_path, 'target')
        
        print(folder_path)
        
        #########################
        ##### check devices #####
        #########################
        while True:
            source_version = device_version(source_path)
            target_version = device_version(target_path)
            
            print(source_path, target_path)
            
            if source_version[:2] == '15' and target_version[:2] == '16':
                break
            elif source_version[:2] == '16' and target_version[:2] == '15':
                source_path = os.path.join(folder_path, 'target')
                target_path = os.path.join(folder_path, 'source')
                print('--- GG ---')
                continue
            else:
                raise
        
        ####################################
        ##### load and preprocess data #####
        ####################################
        source_segs = preprocess_data(load_original_data(source_path), duration)
        target_segs = preprocess_data(load_original_data(target_path), duration)

        idx = min(len(source_segs), len(target_segs))
        source_tags = [class_num] * idx
        target_tags = [0] * idx

        pair_data.extend(zip(source_segs[:idx], source_tags, target_segs[:idx], target_tags))
        
    return pair_data

In [10]:
device_version('./front_pocket/202302071523/source')  # source version: 15.4

'15.4'

In [11]:
device_version('./front_pocket/202302071523/target')  # target version: 16.3

'16.3'

In [12]:
front_pocket_pair_data = load_pair_data('./front_pocket', class_num=1)
pocket_pair_data = load_pair_data('./pocket', class_num=2)
swing_pair_data = load_pair_data('./swing', class_num=3)

./front_pocket/202302071628
./front_pocket/202302071628/source ./front_pocket/202302071628/target
./front_pocket/202302071652
./front_pocket/202302071652/source ./front_pocket/202302071652/target
./front_pocket/202302071523
./front_pocket/202302071523/source ./front_pocket/202302071523/target
./front_pocket/202302071531
./front_pocket/202302071531/source ./front_pocket/202302071531/target
./front_pocket/202302071715
./front_pocket/202302071715/source ./front_pocket/202302071715/target
./front_pocket/202302071641
./front_pocket/202302071641/source ./front_pocket/202302071641/target
./front_pocket/202302071541
./front_pocket/202302071541/source ./front_pocket/202302071541/target
./front_pocket/202302071619
./front_pocket/202302071619/source ./front_pocket/202302071619/target
./front_pocket/202302071704
./front_pocket/202302071704/source ./front_pocket/202302071704/target
./front_pocket/202302071724
./front_pocket/202302071724/source ./front_pocket/202302071724/target
./pocket/20230213210

In [13]:
print(len(front_pocket_pair_data), len(pocket_pair_data), len(swing_pair_data))

1439 1375 1346


# 建立dataloader

In [14]:
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import DataLoader, Dataset
from sklearn.metrics import accuracy_score

In [15]:
class ClassDataset(Dataset):
    def __init__(self, data, label):
        self.data = data
        self.label = label

    def __len__(self):
        return len(self.data)

    def __getitem__(self, idx):
        return self.data[idx], self.label[idx]

In [16]:
class SimpleRNN(nn.Module):
    def __init__(self, seq_len=100, num_of_classes=2):
        super(SimpleRNN, self).__init__()
        
        self.seq_len = seq_len
        
        self.layer0 = nn.Sequential(
            nn.Linear(9, 16),
            nn.LeakyReLU(),
            nn.Linear(16, 16),
            nn.LeakyReLU(),
        )
        
        self.rnn = nn.RNN(input_size=16, hidden_size=16, num_layers=2, batch_first=True)
#         self.lstm = nn.LSTM(input_size=16, hidden_size=16, num_layers=2, batch_first=True, bidirectional=True)
        
        self.last = nn.Sequential(
            nn.Linear(16, 16),
            nn.LeakyReLU(),
            nn.Linear(16, num_of_classes),
            nn.Softmax(dim=1),
        )
        
    def forward(self, x):
        h = self.layer0(x)
        
        hz, _ = self.rnn(h)
        
        out = self.last(hz)
        
        return out

In [17]:
front_pocket_half = int(len(front_pocket_pair_data) / 2)
pocket_half = int(len(pocket_pair_data) / 2)
swing_half = int(len(swing_pair_data) / 2)

train_data = front_pocket_pair_data[:front_pocket_half] + pocket_pair_data[:pocket_half] + swing_pair_data[:swing_half]
valid_data = front_pocket_pair_data[front_pocket_half:] + pocket_pair_data[pocket_half:] + swing_pair_data[:swing_half]

# train
t_data = np.array([d[0] for d in train_data] + [d[2] for d in train_data[::3]])
t_label = [d[1] for d in train_data] + [d[3] for d in train_data[::3]]
train_dataset = ClassDataset(
                    data = torch.tensor(t_data, dtype=torch.float),
                    label = t_label,
                )
train_loader = DataLoader(train_dataset, batch_size=16, shuffle=True)

# valid
v_data = np.array([d[0] for d in valid_data] + [d[2] for d in valid_data])
v_label = [d[1] for d in valid_data] + [d[3] for d in valid_data]
valid_dataset = ClassDataset(
                    data = torch.tensor(v_data, dtype=torch.float),
                    label = v_label,
                )
valid_loader = DataLoader(valid_dataset, batch_size=16)

np.unique(t_label, return_counts=True), np.unique(v_label, return_counts=True)

((array([0, 1, 2, 3]), array([693, 719, 687, 673])),
 (array([0, 1, 2, 3]), array([2081,  720,  688,  673])))

In [18]:
batch = next(iter(train_loader))

In [19]:
EPOCH = 30
num_of_classes = 4
device = torch.device("cuda" if (torch.cuda.is_available()) else "cpu")

In [37]:
model = SimpleRNN(seq_len=int(datapoint_per_second * duration), num_of_classes=num_of_classes).to(device)
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)
ce_loss = torch.nn.CrossEntropyLoss()

In [38]:
def train(model, dataloader, optimizer):
    model.train()

    losses = []
    accuracies = []

    for sequences, labels in dataloader:
        optimizer.zero_grad()

        sequences = sequences.to(device)
#             labels = labels.to(device)
        one_hot = F.one_hot(labels, num_classes=num_of_classes).to(device)

        predict_probability = model(sequences[:, :, :9])
        _, predict_classes = torch.max(predict_probability, 1)

        loss = ce_loss(predict_probability, one_hot)

        # backward
        loss.backward()
        optimizer.step()
        
        losses.append(loss.item())
        #accuracies.append(accuracy_score(labels.cpu().detach().numpy(), predict_classes.cpu().detach().numpy()))
    
    return np.mean(losses)

In [39]:
def evalute(model, dataloader):
    model.eval()

    losses = []
    accuracies = []
    
    with torch.no_grad():
        for sequences, labels in dataloader:
            
            sequences = sequences.to(device)
#             labels = labels.to(device)
            one_hot = F.one_hot(labels, num_classes=num_of_classes).to(device)

            #############
            # generator #
            #############
            predict_probability = model(sequences[:, :, :9])
            _, predict_classes = torch.max(predict_probability, 1)

            loss = ce_loss(predict_probability, one_hot)

            losses.append(loss.item())
            #accuracies.append(accuracy_score(labels.cpu().detach().numpy(), predict_classes.cpu().detach().numpy()))
    
    return np.mean(losses)

In [40]:
for epoch in range(EPOCH):
    #####
    # 1. 用上半部訓練50epoch
    # 2. 隨機用上或下半部訓練Model
    # 3. 印出trajectory結果
    #####
    
    train_loss = train(model, train_loader, optimizer)
    valid_loss = evalute(model, valid_loader)
    
    ep = str(epoch).zfill(5)

    print(f'{ep}: train loss: {train_loss: 2.3f}, valid loss: {valid_loss: 2.3f}')

00000: train loss:  3.319, valid loss:  2.984
00001: train loss:  2.982, valid loss:  2.981
00002: train loss:  2.981, valid loss:  2.981
00003: train loss:  2.981, valid loss:  2.981
00004: train loss:  2.981, valid loss:  2.981
00005: train loss:  2.981, valid loss:  2.981
00006: train loss:  2.981, valid loss:  2.981


KeyboardInterrupt: 

In [None]:
def output_eval(model, dataloader):
    model.eval()

    losses = []
    accuracies = []
    
    with torch.no_grad():
        for i, (sequences, labels) in enumerate(dataloader):
            
            sequences = sequences.to(device)
#             labels = labels.to(device)
            one_hot = F.one_hot(labels, num_classes=num_of_classes).to(device).float()

            #############
            # generator #
            #############
            predict_probability = model(sequences[:, :, :9])
            _, predict_classes = torch.max(predict_probability, 1)
            
            print(f'{i: >3} predict class: {predict_classes.cpu().detach().numpy()}')
            print(f'{"": >3}  ground truth: {labels.numpy()}')

            loss = ce_loss(predict_probability, one_hot)

            losses.append(loss.item())
            accuracies.append(accuracy_score(labels.cpu().detach().numpy(), predict_classes.cpu().detach().numpy()))
            
    print(f'loss: {np.mean(losses): 2.3f}, accuracy: {np.mean(accuracies): 2.3f}')

In [None]:
output_eval(model, valid_loader)

In [41]:
a = [1,2,3]
a[:-1]

[1, 2]