In [36]:
import os

import numpy as np
import pandas as pd
import zipfile
import matplotlib.pyplot as plt

# 解壓縮資料

In [11]:
for folder, _, files in os.walk('./front_pocket/'):
    for file in files:
        if file.endswith('zip'):
            file_path = os.path.join(folder, file)
            print(file_path)

            sotre_path = os.path.join(folder, file.rsplit('.')[0])
            # 開啟 ZIP 壓縮檔 
            with zipfile.ZipFile(file_path, 'r') as zf:
                # 解壓縮所有檔案至 /my/folder 目錄
                zf.extractall(path=sotre_path)

./front_pocket/202302071628/target.zip
./front_pocket/202302071628/source.zip
./front_pocket/202302071652/target.zip
./front_pocket/202302071652/source.zip
./front_pocket/202302071523/target.zip
./front_pocket/202302071523/source.zip
./front_pocket/202302071531/target.zip
./front_pocket/202302071531/source.zip
./front_pocket/202302071715/target.zip
./front_pocket/202302071715/source.zip
./front_pocket/202302071641/target.zip
./front_pocket/202302071641/source.zip
./front_pocket/202302071541/target.zip
./front_pocket/202302071541/source.zip
./front_pocket/202302071619/target.zip
./front_pocket/202302071619/source.zip
./front_pocket/202302071704/target.zip
./front_pocket/202302071704/source.zip
./front_pocket/202302071724/target.zip
./front_pocket/202302071724/source.zip


# 讀檔

In [100]:
def rename_data(df):
    new_names = ['acc_times', 'acc_x', 'acc_y', 'acc_z', 'gyo_times', 'gyo_x', 'gyo_y', 'gyo_z', 'lin_acc_times', 'lin_acc_x', 'lin_acc_y', 'lin_acc_z', 'mag_times', 'mag_x', 'mag_y', 'mag_z']
    df.columns = new_names
    
    return df


def load_original_data(path):
    acc_df = pd.read_csv(os.path.join(path, 'Accelerometer.csv'), delimiter=';')
    gyo_df = pd.read_csv(os.path.join(path, 'Gyroscope.csv'), delimiter=';')
    linacc_df = pd.read_csv(os.path.join(path, 'Linear Accelerometer.csv'), delimiter=';')
    mag_df = pd.read_csv(os.path.join(path, 'Magnetometer.csv'), delimiter=';')
    
    total_df = pd.concat([acc_df, gyo_df, linacc_df, mag_df], axis=1)
    total_df = rename_data(total_df)
    
    return total_df

In [165]:
def bound_range(df, second=10):
    start = 20 * second
    end = len(df) - 20 * second
    
    return df.iloc[start:end]


def split_segments(df, second=5):
    length = 20 * second
    len_of_segs = int(np.floor(len(df) / length))
    
    segments = []
    for i in range(len_of_segs):
        segments.append(df.iloc[int(i * length):int((i + 1) * length)])
        
    return segments


def select_data(df):
    return df[['acc_times', 'acc_x', 'acc_y', 'acc_z', 'gyo_x', 'gyo_y', 'gyo_z', 'lin_acc_x', 'lin_acc_y', 'lin_acc_z', 'mag_x', 'mag_y', 'mag_z']]


def preprocess_data(df):
    pre_df = bound_range(df)
    pre_df = select_data(pre_df)
    segs = split_segments(pre_df)
    
    return segs

In [None]:
# test_df = load_original_data('./front_pocket/202302071724/source')
# segs = preprocess_data(test_df)

In [139]:
check_device('./front_pocket/202302071523/source').loc['deviceRelease']  # source version: 15.4

value    15.4
Name: deviceRelease, dtype: object

In [140]:
check_device('./front_pocket/202302071523/target').loc['deviceRelease']  # target version: 16.3

value    16.3
Name: deviceRelease, dtype: object

In [158]:
def device_version(path):
    device_df =  pd.read_csv(os.path.join(path, 'meta/device.csv'), delimiter=';', index_col=0)
    return device_df.loc['deviceRelease'].value

def load_pair_data(root_folder):
    pair_data = []

    for folder in os.listdir(root_folder):
        if folder.startswith('.'):
            continue

        folder_path = os.path.join(root_folder, folder)
        source_path = os.path.join(folder_path, 'source')
        target_path = os.path.join(folder_path, 'target')

        #########################
        ##### check devices #####
        #########################
        while True:
            source_version = device_version(source_path)
            target_version = device_version(target_path)
            
            print(source_path, target_path)
            
            if source_version == '15.4' and target_version == '16.3':
                break
            elif source_version == '16.3' and target_version == '15.4':
                source_path = os.path.join(folder_path, 'target')
                target_path = os.path.join(folder_path, 'source')
                print('--- GG ---')
                continue
            else:
                raise
        
        ####################################
        ##### load and preprocess data #####
        ####################################
        
        source_segs = preprocess_data(load_original_data(source_path))
        target_segs = preprocess_data(load_original_data(target_path))

        idx = min(len(source_segs), len(target_segs))

        pair_data.extend(zip(source_segs[:idx], target_segs[:idx]))
        
    return pair_data

In [166]:
pair_data = load_pair_data('./front_pocket')

./front_pocket/202302071628/source ./front_pocket/202302071628/target
./front_pocket/202302071652/source ./front_pocket/202302071652/target
./front_pocket/202302071523/source ./front_pocket/202302071523/target
./front_pocket/202302071531/source ./front_pocket/202302071531/target
./front_pocket/202302071715/source ./front_pocket/202302071715/target
./front_pocket/202302071641/source ./front_pocket/202302071641/target
./front_pocket/202302071541/source ./front_pocket/202302071541/target
./front_pocket/202302071619/source ./front_pocket/202302071619/target
./front_pocket/202302071704/source ./front_pocket/202302071704/target
./front_pocket/202302071724/source ./front_pocket/202302071724/target


# 建立dataloader

In [160]:
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import DataLoader, Dataset

In [161]:
class PairDataset(Dataset):
    def __init__(self, pair_data):
        self.pair_data = pair_data

    def __len__(self):
        return len(self.pair_data)

    def __getitem__(self, idx):
        return self.pair_data[idx]

In [None]:
class SimpleRNN(nn.Module):
    def __init__(self, seq_len=100, classes=2):
        super(SimpleRNN, self).__init__()
        
        self.seq_len = seq_len
        
        self.layer0 = nn.Sequential(
            nn.Linear(12, 24),
            nn.LeakyReLU(),
            nn.Linear(24, 32),
            nn.LeakyReLU(),
        )
        
        self.rnn = nn.RNN(input_size=32, hidden_size=32, num_layers=2, batch_first=True, bidirectional=True)
#         self.lstm = nn.LSTM(input_size=16, hidden_size=16, num_layers=2, batch_first=True, bidirectional=True)
        
        self.last = nn.Sequential(
            nn.Linear(32, 32),
            nn.LeakyReLU(),
            nn.Linear(32, classes),
            nn.Softmax(),
        )
        
    def forward(self, x):
        h = self.layer0(x)
        
        hz, _ = self.lstm(h)
        
        out = self.last(hz)
        
        return out

In [162]:
train_data = pair_data[:-50]
valid_data = pair_data[-50:]

# train
train_dataset = PairDataset(
                    pair_data=torch.tensor(np.array(pair_data), dtype=torch.float),
                )
train_loader = DataLoader(train_dataset, batch_size=16, shuffle=True)

# valid
valid_dataset = PairDataset(
                    pair_data=torch.tensor(np.array(pair_data), dtype=torch.float),
                )
valid_loader = DataLoader(valid_dataset, batch_size=16)

In [163]:
batch = next(iter(train_loader))

In [None]:
EPOCH = 1000
device = torch.device("cuda" if (torch.cuda.is_available()) else "cpu")

In [None]:
model = SimpleRNN().to(device)
optimizer = torch.optim.Adam(model.parameters(), lr=0.0001)
bce_loss = torch.nn.BCELoss()

In [None]:
def train(model, dataloader, optimizer, epoch):
    model.train()

    train_losses = []

    for sources, source_labels, targets in tqdm(dataloader):
        optimizer.zero_grad()

        sources = sources.to(device)
        source_labels = source_labels.to(device)
        targets = targets.to(device)

        #############
        # generator #
        #############
        predict_classes = model(sources)

        loss = bce_loss()

        # backward

    ### mkae sequence save
    if epoch > Tepoch:
        seq_pos = seq_pos.detach().cpu().numpy().reshape(-1, seq_len, 2)
    else:
        seq_pos = np.zeros((len(target), seq_len, 2))

    train_loss = np.mean(train_losses, axis=0, dtype=np.float64)
    train_mag_loss = np.mean(train_mag_losses, axis=0, dtype=np.float64)

    print(f'time: {time.time() - ep_start_time:>6.0f}s, ep: {epoch + 1:>5}, train: ms loss: {train_loss[3]:>5.3f}, generator loss: {train_loss[0]:>5.3f}, dis* loss: {train_loss[1]:>5.3f}, lstm dis* loss: {train_loss[4]:>5.3f}, seq dis* loss: {train_loss[5]:>5.3f}, pos loss: {train_loss[6]:>5.3f}' + 
          f'\n{" ":>32} mag: {np.mean(train_mag_loss[:3]):>5.3f} ({train_loss[2]:>5.3f}), magN: {train_mag_loss[0]:>5.3f}, magE: {train_mag_loss[1]:>5.3f}, magD: {train_mag_loss[2]:>5.3f}, magT: {train_mag_loss[3]:>5.3f}')
    
    return train_loss, train_mag_loss, seq_pos