In [None]:
# Mount Google Drive
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [None]:
import pandas as pd
import numpy as np
import os
import matplotlib.pyplot as plt
import seaborn as sns
from natsort import natsorted

In [None]:
data_path = '/content/drive/My Drive/BUN_BO/Data.SHL/Raw_data'
data_name_files = [f.name for f in os.scandir(data_path) if f.is_file()]
data_name_files = natsorted(data_name_files)


gen_data_path = '/content/drive/My Drive/BUN_BO/Data.SHL/Gen_data'
gen_data_name_files = [f.name for f in os.scandir(gen_data_path) if f.is_file()]
gen_data_name_files_path = []

for i in data_name_files:
  gen_data_name_files_path.append(os.path.join(gen_data_path, i))
print(gen_data_name_files_path)

['/content/drive/My Drive/BUN_BO/Data.SHL/Gen_data/File_1.csv', '/content/drive/My Drive/BUN_BO/Data.SHL/Gen_data/File_2.csv', '/content/drive/My Drive/BUN_BO/Data.SHL/Gen_data/File_3.csv', '/content/drive/My Drive/BUN_BO/Data.SHL/Gen_data/File_4.csv', '/content/drive/My Drive/BUN_BO/Data.SHL/Gen_data/File_5.csv', '/content/drive/My Drive/BUN_BO/Data.SHL/Gen_data/File_6.csv', '/content/drive/My Drive/BUN_BO/Data.SHL/Gen_data/File_7.csv', '/content/drive/My Drive/BUN_BO/Data.SHL/Gen_data/File_8.csv', '/content/drive/My Drive/BUN_BO/Data.SHL/Gen_data/File_9.csv', '/content/drive/My Drive/BUN_BO/Data.SHL/Gen_data/File_10.csv', '/content/drive/My Drive/BUN_BO/Data.SHL/Gen_data/File_11.csv', '/content/drive/My Drive/BUN_BO/Data.SHL/Gen_data/File_12.csv', '/content/drive/My Drive/BUN_BO/Data.SHL/Gen_data/File_13.csv']


In [None]:
def extract_features(data):
    # Tính độ lớn vector (magnitude) cho acc, gyro và quat
    for i in range(1, 5):
        acc_x, acc_y, acc_z = f'atr0{i}/acc_x', f'atr0{i}/acc_y', f'atr0{i}/acc_z'
        gyro_x, gyro_y, gyro_z = f'atr0{i}/gyro_x', f'atr0{i}/gyro_y', f'atr0{i}/gyro_z'
        quat_w, quat_x, quat_y, quat_z = f'atr0{i}/quat_w', f'atr0{i}/quat_x', f'atr0{i}/quat_y', f'atr0{i}/quat_z'

        data[f'atr0{i}/acc'] = np.sqrt(data[acc_x]**2 + data[acc_y]**2 + data[acc_z]**2)
        data[f'atr0{i}/gyro'] = np.sqrt(data[gyro_x]**2 + data[gyro_y]**2 + data[gyro_z]**2)
        data[f'atr0{i}/quat'] = np.sqrt(data[quat_w]**2 + data[quat_x]**2 + data[quat_y]**2 + data[quat_z]**2)

    # Tính tích phân góc quay (gyro angle)
    dt = 0.01
    alpha = 0.98
    for i in range(1, 5):
        gyro_cols = [f'atr0{i}/gyro_x', f'atr0{i}/gyro_y', f'atr0{i}/gyro_z']
        gyro_angle = np.zeros_like(data[gyro_cols].values)

        for j in range(1, len(data)):
            if not data.loc[j, gyro_cols].isnull().any():
                gyro_angle[j] = alpha * (gyro_angle[j - 1] + data.loc[j, gyro_cols].values * dt)
            else:
                gyro_angle[j] = gyro_angle[j - 1]  # Giữ giá trị trước đó nếu gặp NaN

        for axis, angle in zip(['x', 'y', 'z'], gyro_angle.T):
            data[f'atr0{i}/gyro_angle_{axis}'] = angle

    # Tính góc nghiêng (tilt angle) từ accelerometer
    for i in range(1, 5):
        acc_x, acc_y, acc_z = f'atr0{i}/acc_x', f'atr0{i}/acc_y', f'atr0{i}/acc_z'

        # Kiểm tra để tránh chia cho 0
        valid_mask = (data[acc_z] != 0) & (data[acc_x] != 0) & (data[acc_y] != 0)

        data[f'atr0{i}/tilt_angle_x'] = np.where(valid_mask, np.arctan2(data[acc_y], data[acc_z]) * 180 / np.pi, 0)
        data[f'atr0{i}/tilt_angle_y'] = np.where(valid_mask, np.arctan2(data[acc_x], np.sqrt(data[acc_y]**2 + data[acc_z]**2)) * 180 / np.pi, 0)

    # Thống kê trên cửa sổ trượt (rolling window)
    window_size = 10
    for i in range(1, 5):
        for axis in ['x', 'y', 'z']:
            for sensor in ['acc', 'gyro']:
                col = f'atr0{i}/{sensor}_{axis}'
                data[f'{col}_mean'] = data[col].rolling(window=window_size, min_periods=1).mean()
                data[f'{col}_std'] = data[col].rolling(window=window_size, min_periods=1).std()


    return data


In [None]:
def load_and_process_data(file_paths, indices):
    data_list = []
    for i in indices:
        try:
            data_file = pd.read_csv(file_paths[i])
            if 'Unnamed: 0' in data_file.columns:
                data_file.drop(['Unnamed: 0'], axis=1, inplace=True)
            data_file = extract_features(data_file)
            data_list.append(data_file)
        except Exception as e:
            print(f"Lỗi khi xử lý tệp {file_paths[i]}: {e}")
    return pd.concat(data_list, ignore_index=True) if data_list else pd.DataFrame()


# Train: File_1.csv và từ File_4.csv đến File_13.csv
train_indices = [0] + list(range(3, 13))
data_train = load_and_process_data(gen_data_name_files_path, train_indices)

# Test: File_2.csv và File_3.csv
test_indices = [1, 2]
data_test = load_and_process_data(gen_data_name_files_path, test_indices)

print(f"Train shape: {data_train.shape}, Test shape: {data_test.shape}")


  data.fillna(method='ffill', inplace=True)
  data.fillna(method='ffill', inplace=True)
  data.fillna(method='ffill', inplace=True)
  data.fillna(method='ffill', inplace=True)
  data.fillna(method='ffill', inplace=True)
  data.fillna(method='ffill', inplace=True)
  data.fillna(method='ffill', inplace=True)
  data.fillna(method='ffill', inplace=True)
  data.fillna(method='ffill', inplace=True)
  data.fillna(method='ffill', inplace=True)
  data.fillna(method='ffill', inplace=True)
  data.fillna(method='ffill', inplace=True)


Train shape: (756499, 122), Test shape: (155878, 122)


  data.fillna(method='ffill', inplace=True)


In [None]:
data_train.to_csv('/content/drive/My Drive/BUN_BO/Data.SHL/Data_using/data_train.csv', index=False)
data_test.to_csv('/content/drive/My Drive/BUN_BO/Data.SHL/Data_using/data_test.csv', index=False)