In [1]:
# Mount Google Drive
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [11]:
import pandas as pd
import numpy as np
import os
import matplotlib.pyplot as plt
import seaborn as sns
from natsort import natsorted
import scipy.stats as stats
from scipy.fft import rfft
from scipy.integrate import cumulative_trapezoid as cumtrapz
import scipy.integrate as integrate
from itertools import combinations

# cumtrapz = integrate.cumulative_trapezoid



In [12]:
data_path = '/content/drive/My Drive/BUN_BO/Data.SHL/Raw_data'
data_name_files = [f.name for f in os.scandir(data_path) if f.is_file()]
data_name_files = natsorted(data_name_files)


gen_data_path = '/content/drive/My Drive/BUN_BO/Data.SHL/Gen_data'
gen_data_name_files = [f.name for f in os.scandir(gen_data_path) if f.is_file()]
gen_data_name_files_path = []

for i in data_name_files:
  gen_data_name_files_path.append(os.path.join(gen_data_path, i))
print(gen_data_name_files_path)

['/content/drive/My Drive/BUN_BO/Data.SHL/Gen_data/File_1.csv', '/content/drive/My Drive/BUN_BO/Data.SHL/Gen_data/File_2.csv', '/content/drive/My Drive/BUN_BO/Data.SHL/Gen_data/File_3.csv', '/content/drive/My Drive/BUN_BO/Data.SHL/Gen_data/File_4.csv', '/content/drive/My Drive/BUN_BO/Data.SHL/Gen_data/File_5.csv', '/content/drive/My Drive/BUN_BO/Data.SHL/Gen_data/File_6.csv', '/content/drive/My Drive/BUN_BO/Data.SHL/Gen_data/File_7.csv', '/content/drive/My Drive/BUN_BO/Data.SHL/Gen_data/File_8.csv', '/content/drive/My Drive/BUN_BO/Data.SHL/Gen_data/File_9.csv', '/content/drive/My Drive/BUN_BO/Data.SHL/Gen_data/File_10.csv', '/content/drive/My Drive/BUN_BO/Data.SHL/Gen_data/File_11.csv', '/content/drive/My Drive/BUN_BO/Data.SHL/Gen_data/File_12.csv', '/content/drive/My Drive/BUN_BO/Data.SHL/Gen_data/File_13.csv']


In [13]:

def extract_features(data):
    """
    Phiên bản đã sửa lỗi:
    - Thay thế x.median() bằng np.median(x)
    - Thêm xử lý cho cửa sổ dữ liệu trống
    - Tối ưu tính toán rolling features
    """
    original_columns = data.columns.tolist()
    new_features = pd.DataFrame(index=data.index)

    # ===== 1. Vector magnitude =====
    for i in range(1, 5):
        prefix = f'atr0{i}/'
        acc_cols = [f'{prefix}acc_x', f'{prefix}acc_y', f'{prefix}acc_z']
        gyro_cols = [f'{prefix}gyro_x', f'{prefix}gyro_y', f'{prefix}gyro_z']
        quat_cols = [f'{prefix}quat_w', f'{prefix}quat_x', f'{prefix}quat_y', f'{prefix}quat_z']

        new_features[f'{prefix}acc'] = np.linalg.norm(data[acc_cols], axis=1)
        new_features[f'{prefix}gyro'] = np.linalg.norm(data[gyro_cols], axis=1)
        new_features[f'{prefix}quat'] = np.linalg.norm(data[quat_cols], axis=1)

    # ===== 2. Gyro angle integration =====
    dt = 0.01
    alpha = 0.98
    for i in range(1, 5):
        prefix = f'atr0{i}/'
        gyro_cols = [f'{prefix}gyro_x', f'{prefix}gyro_y', f'{prefix}gyro_z']
        gyro_data = data[gyro_cols].fillna(0).values

        if gyro_data.shape[0] > 1:
            integrated = cumtrapz(gyro_data, dx=dt, axis=0, initial=0)
        else:
            integrated = np.zeros_like(gyro_data)

        for j, axis in enumerate(['x', 'y', 'z']):
            new_features[f'{prefix}gyro_angle_{axis}'] = alpha * integrated[:, j]

    # ===== 3. Tilt angle từ accelerometer =====
    for i in range(1, 5):
        prefix = f'atr0{i}/'
        acc_x, acc_y, acc_z = f'{prefix}acc_x', f'{prefix}acc_y', f'{prefix}acc_z'

        valid_mask = (data[acc_z] != 0) & (data[acc_x] != 0) & (data[acc_y] != 0)
        new_features[f'{prefix}tilt_angle_x'] = np.where(
            valid_mask,
            np.arctan2(data[acc_y], data[acc_z]) * 180 / np.pi,
            0
        )
        new_features[f'{prefix}tilt_angle_y'] = np.where(
            valid_mask,
            np.arctan2(data[acc_x], np.sqrt(data[acc_y]**2 + data[acc_z]**2)) * 180 / np.pi,
            0
        )

    # ===== 4. Đặc trưng thống kê và FFT (ĐÃ SỬA LỖI MEDIAN) =====
    window_size = 10
    stats_funcs = {
        'mean': lambda x: np.nanmean(x),
        'std': lambda x: np.nanstd(x),
        'median': lambda x: np.nanmedian(x),  # Sửa thành numpy median
        'max': lambda x: np.nanmax(x),
        'min': lambda x: np.nanmin(x),
        'rms': lambda x: np.sqrt(np.nanmean(x**2)),
        'kurtosis': lambda x: stats.kurtosis(x, nan_policy='omit'),
        'skew': lambda x: stats.skew(x, nan_policy='omit')
    }

    def safe_fft_energy(x):
        return np.sum(np.abs(rfft(x[~np.isnan(x)]))**2)/len(x) if len(x) > 1 else 0.0

    sensor_cols = [col for col in original_columns if any(s in col for s in ['acc', 'gyro'])]

    for col in sensor_cols:
        roller = data[col].rolling(window_size, min_periods=2)
        for stat, func in stats_funcs.items():
            new_features[f'{col}_{stat}'] = roller.apply(func, raw=True)
        new_features[f'{col}_fft_energy'] = roller.apply(safe_fft_energy, raw=True)

    # ===== 5. Tương quan accelerometer =====
    for i in range(1, 5):
        prefix = f'atr0{i}/'
        acc_cols = [f'{prefix}acc_x', f'{prefix}acc_y', f'{prefix}acc_z']

        for col1, col2 in combinations(acc_cols, 2):
            new_features[f'{col1}_{col2}_corr'] = data[col1].rolling(window_size).corr(data[col2])

    data = pd.concat([data, new_features], axis=1)
    return data

def fill_nan_with_int(data):
    """
    Phiên bản cải tiến:
    - Sử dụng phương pháp nội suy cho dữ liệu sensor
    - Giữ nguyên giá trị 0 hợp lệ
    """
    sensor_cols = [col for col in data.columns if any(k in col for k in ['acc', 'gyro', 'quat'])]

    # Thay thế 0 bất thường bằng NaN (chỉ ở nơi có giá trị 0 liên tiếp)
    for col in sensor_cols:
        mask = (data[col] == 0) & (data[col].shift() != 0) & (data[col].shift(-1) != 0)
        data.loc[mask, col] = np.nan

    # Nội suy tuyến tính + fill giá trị biên
    data[sensor_cols] = data[sensor_cols].interpolate(method='linear', limit_area='inside', axis=0)
    data[sensor_cols] = data[sensor_cols].ffill(limit=3).bfill(limit=3)

    return data



In [14]:
def load_and_process_data(file_paths, indices):
    """
    Phiên bản cải tiến:
    - Thêm kiểm tra dữ liệu đầu vào
    - Xử lý lỗi chi tiết
    - Tự động phát hiện encoding file
    """
    data_list = []
    column_structure = None

    for idx in indices:
        file_path = file_paths[idx]
        try:
            # Thử các encoding phổ biến
            for encoding in ['utf-8', 'latin-1', 'cp1252']:
                try:
                    data_file = pd.read_csv(file_path, engine='c', encoding=encoding)
                    break
                except UnicodeDecodeError:
                    continue

            # Kiểm tra cấu trúc cột
            if column_structure is None:
                column_structure = set(data_file.columns)
            elif set(data_file.columns) != column_structure:
                print(f"File {file_path} bị bỏ qua do cấu trúc cột khác biệt")
                continue

            # Xử lý dữ liệu
            data_file = data_file.drop(columns=['Unnamed: 0'], errors='ignore')
            data_file = extract_features(data_file)
            data_file = fill_nan_with_int(data_file)

            if not data_file.empty:
                data_list.append(data_file)
                print(f"✅ Đã xử lý thành công {file_path}")
            else:
                print(f"⚠️ File {file_path} trống sau xử lý")

        except Exception as e:
            print(f"❌ Lỗi nghiêm trọng ở {file_path}: {str(e)}")
            continue

    return pd.concat(data_list, ignore_index=True) if data_list else pd.DataFrame()



# Train: File_1.csv và từ File_4.csv đến File_13.csv
train_indices = [0] + list(range(3, 13))
data_train = load_and_process_data(gen_data_name_files_path, train_indices)

# Test: File_2.csv và File_3.csv
test_indices = [1, 2]
data_test = load_and_process_data(gen_data_name_files_path, test_indices)

print(f"Train shape: {data_train.shape}, Test shape: {data_test.shape}")


  new_features[f'{col}_{stat}'] = roller.apply(func, raw=True)
  new_features[f'{col}_{stat}'] = roller.apply(func, raw=True)
  new_features[f'{col}_{stat}'] = roller.apply(func, raw=True)
  new_features[f'{col}_fft_energy'] = roller.apply(safe_fft_energy, raw=True)
  new_features[f'{col}_{stat}'] = roller.apply(func, raw=True)
  new_features[f'{col}_{stat}'] = roller.apply(func, raw=True)
  new_features[f'{col}_{stat}'] = roller.apply(func, raw=True)
  new_features[f'{col}_{stat}'] = roller.apply(func, raw=True)
  new_features[f'{col}_{stat}'] = roller.apply(func, raw=True)
  new_features[f'{col}_{stat}'] = roller.apply(func, raw=True)
  new_features[f'{col}_{stat}'] = roller.apply(func, raw=True)
  new_features[f'{col}_{stat}'] = roller.apply(func, raw=True)
  new_features[f'{col}_fft_energy'] = roller.apply(safe_fft_energy, raw=True)
  new_features[f'{col}_{stat}'] = roller.apply(func, raw=True)
  new_features[f'{col}_{stat}'] = roller.apply(func, raw=True)
  new_features[f'{col}_{s

✅ Đã xử lý thành công /content/drive/My Drive/BUN_BO/Data.SHL/Gen_data/File_1.csv


  new_features[f'{col}_{stat}'] = roller.apply(func, raw=True)
  new_features[f'{col}_{stat}'] = roller.apply(func, raw=True)
  new_features[f'{col}_{stat}'] = roller.apply(func, raw=True)
  new_features[f'{col}_fft_energy'] = roller.apply(safe_fft_energy, raw=True)
  new_features[f'{col}_{stat}'] = roller.apply(func, raw=True)
  new_features[f'{col}_{stat}'] = roller.apply(func, raw=True)
  new_features[f'{col}_{stat}'] = roller.apply(func, raw=True)
  new_features[f'{col}_{stat}'] = roller.apply(func, raw=True)
  new_features[f'{col}_{stat}'] = roller.apply(func, raw=True)
  new_features[f'{col}_{stat}'] = roller.apply(func, raw=True)
  new_features[f'{col}_{stat}'] = roller.apply(func, raw=True)
  new_features[f'{col}_{stat}'] = roller.apply(func, raw=True)
  new_features[f'{col}_fft_energy'] = roller.apply(safe_fft_energy, raw=True)
  new_features[f'{col}_{stat}'] = roller.apply(func, raw=True)
  new_features[f'{col}_{stat}'] = roller.apply(func, raw=True)
  new_features[f'{col}_{s

✅ Đã xử lý thành công /content/drive/My Drive/BUN_BO/Data.SHL/Gen_data/File_4.csv


  new_features[f'{col}_{stat}'] = roller.apply(func, raw=True)
  new_features[f'{col}_{stat}'] = roller.apply(func, raw=True)
  new_features[f'{col}_{stat}'] = roller.apply(func, raw=True)
  new_features[f'{col}_fft_energy'] = roller.apply(safe_fft_energy, raw=True)
  new_features[f'{col}_{stat}'] = roller.apply(func, raw=True)
  new_features[f'{col}_{stat}'] = roller.apply(func, raw=True)
  new_features[f'{col}_{stat}'] = roller.apply(func, raw=True)
  new_features[f'{col}_{stat}'] = roller.apply(func, raw=True)
  new_features[f'{col}_{stat}'] = roller.apply(func, raw=True)
  new_features[f'{col}_{stat}'] = roller.apply(func, raw=True)
  new_features[f'{col}_{stat}'] = roller.apply(func, raw=True)
  new_features[f'{col}_{stat}'] = roller.apply(func, raw=True)
  new_features[f'{col}_fft_energy'] = roller.apply(safe_fft_energy, raw=True)
  new_features[f'{col}_{stat}'] = roller.apply(func, raw=True)
  new_features[f'{col}_{stat}'] = roller.apply(func, raw=True)
  new_features[f'{col}_{s

✅ Đã xử lý thành công /content/drive/My Drive/BUN_BO/Data.SHL/Gen_data/File_5.csv


  new_features[f'{col}_{stat}'] = roller.apply(func, raw=True)
  new_features[f'{col}_{stat}'] = roller.apply(func, raw=True)
  new_features[f'{col}_{stat}'] = roller.apply(func, raw=True)
  new_features[f'{col}_fft_energy'] = roller.apply(safe_fft_energy, raw=True)
  new_features[f'{col}_{stat}'] = roller.apply(func, raw=True)
  new_features[f'{col}_{stat}'] = roller.apply(func, raw=True)
  new_features[f'{col}_{stat}'] = roller.apply(func, raw=True)
  new_features[f'{col}_{stat}'] = roller.apply(func, raw=True)
  new_features[f'{col}_{stat}'] = roller.apply(func, raw=True)
  new_features[f'{col}_{stat}'] = roller.apply(func, raw=True)
  new_features[f'{col}_{stat}'] = roller.apply(func, raw=True)
  new_features[f'{col}_{stat}'] = roller.apply(func, raw=True)
  new_features[f'{col}_fft_energy'] = roller.apply(safe_fft_energy, raw=True)
  new_features[f'{col}_{stat}'] = roller.apply(func, raw=True)
  new_features[f'{col}_{stat}'] = roller.apply(func, raw=True)
  new_features[f'{col}_{s

✅ Đã xử lý thành công /content/drive/My Drive/BUN_BO/Data.SHL/Gen_data/File_6.csv


  new_features[f'{col}_{stat}'] = roller.apply(func, raw=True)
  new_features[f'{col}_{stat}'] = roller.apply(func, raw=True)
  new_features[f'{col}_{stat}'] = roller.apply(func, raw=True)
  new_features[f'{col}_fft_energy'] = roller.apply(safe_fft_energy, raw=True)
  new_features[f'{col}_{stat}'] = roller.apply(func, raw=True)
  new_features[f'{col}_{stat}'] = roller.apply(func, raw=True)
  new_features[f'{col}_{stat}'] = roller.apply(func, raw=True)
  new_features[f'{col}_{stat}'] = roller.apply(func, raw=True)
  new_features[f'{col}_{stat}'] = roller.apply(func, raw=True)
  new_features[f'{col}_{stat}'] = roller.apply(func, raw=True)
  new_features[f'{col}_{stat}'] = roller.apply(func, raw=True)
  new_features[f'{col}_{stat}'] = roller.apply(func, raw=True)
  new_features[f'{col}_fft_energy'] = roller.apply(safe_fft_energy, raw=True)
  new_features[f'{col}_{stat}'] = roller.apply(func, raw=True)
  new_features[f'{col}_{stat}'] = roller.apply(func, raw=True)
  new_features[f'{col}_{s

✅ Đã xử lý thành công /content/drive/My Drive/BUN_BO/Data.SHL/Gen_data/File_7.csv


  new_features[f'{col}_{stat}'] = roller.apply(func, raw=True)
  new_features[f'{col}_{stat}'] = roller.apply(func, raw=True)
  new_features[f'{col}_{stat}'] = roller.apply(func, raw=True)
  new_features[f'{col}_fft_energy'] = roller.apply(safe_fft_energy, raw=True)
  new_features[f'{col}_{stat}'] = roller.apply(func, raw=True)
  new_features[f'{col}_{stat}'] = roller.apply(func, raw=True)
  new_features[f'{col}_{stat}'] = roller.apply(func, raw=True)
  new_features[f'{col}_{stat}'] = roller.apply(func, raw=True)
  new_features[f'{col}_{stat}'] = roller.apply(func, raw=True)
  new_features[f'{col}_{stat}'] = roller.apply(func, raw=True)
  new_features[f'{col}_{stat}'] = roller.apply(func, raw=True)
  new_features[f'{col}_{stat}'] = roller.apply(func, raw=True)
  new_features[f'{col}_fft_energy'] = roller.apply(safe_fft_energy, raw=True)
  new_features[f'{col}_{stat}'] = roller.apply(func, raw=True)
  new_features[f'{col}_{stat}'] = roller.apply(func, raw=True)
  new_features[f'{col}_{s

✅ Đã xử lý thành công /content/drive/My Drive/BUN_BO/Data.SHL/Gen_data/File_8.csv


  new_features[f'{col}_{stat}'] = roller.apply(func, raw=True)
  new_features[f'{col}_{stat}'] = roller.apply(func, raw=True)
  new_features[f'{col}_{stat}'] = roller.apply(func, raw=True)
  new_features[f'{col}_fft_energy'] = roller.apply(safe_fft_energy, raw=True)
  new_features[f'{col}_{stat}'] = roller.apply(func, raw=True)
  new_features[f'{col}_{stat}'] = roller.apply(func, raw=True)
  new_features[f'{col}_{stat}'] = roller.apply(func, raw=True)
  new_features[f'{col}_{stat}'] = roller.apply(func, raw=True)
  new_features[f'{col}_{stat}'] = roller.apply(func, raw=True)
  new_features[f'{col}_{stat}'] = roller.apply(func, raw=True)
  new_features[f'{col}_{stat}'] = roller.apply(func, raw=True)
  new_features[f'{col}_{stat}'] = roller.apply(func, raw=True)
  new_features[f'{col}_fft_energy'] = roller.apply(safe_fft_energy, raw=True)
  new_features[f'{col}_{stat}'] = roller.apply(func, raw=True)
  new_features[f'{col}_{stat}'] = roller.apply(func, raw=True)
  new_features[f'{col}_{s

✅ Đã xử lý thành công /content/drive/My Drive/BUN_BO/Data.SHL/Gen_data/File_9.csv


  new_features[f'{col}_{stat}'] = roller.apply(func, raw=True)
  new_features[f'{col}_{stat}'] = roller.apply(func, raw=True)
  new_features[f'{col}_{stat}'] = roller.apply(func, raw=True)
  new_features[f'{col}_fft_energy'] = roller.apply(safe_fft_energy, raw=True)
  new_features[f'{col}_{stat}'] = roller.apply(func, raw=True)
  new_features[f'{col}_{stat}'] = roller.apply(func, raw=True)
  new_features[f'{col}_{stat}'] = roller.apply(func, raw=True)
  new_features[f'{col}_{stat}'] = roller.apply(func, raw=True)
  new_features[f'{col}_{stat}'] = roller.apply(func, raw=True)
  new_features[f'{col}_{stat}'] = roller.apply(func, raw=True)
  new_features[f'{col}_{stat}'] = roller.apply(func, raw=True)
  new_features[f'{col}_{stat}'] = roller.apply(func, raw=True)
  new_features[f'{col}_fft_energy'] = roller.apply(safe_fft_energy, raw=True)
  new_features[f'{col}_{stat}'] = roller.apply(func, raw=True)
  new_features[f'{col}_{stat}'] = roller.apply(func, raw=True)
  new_features[f'{col}_{s

✅ Đã xử lý thành công /content/drive/My Drive/BUN_BO/Data.SHL/Gen_data/File_10.csv


  new_features[f'{col}_{stat}'] = roller.apply(func, raw=True)
  new_features[f'{col}_{stat}'] = roller.apply(func, raw=True)
  new_features[f'{col}_{stat}'] = roller.apply(func, raw=True)
  new_features[f'{col}_fft_energy'] = roller.apply(safe_fft_energy, raw=True)
  new_features[f'{col}_{stat}'] = roller.apply(func, raw=True)
  new_features[f'{col}_{stat}'] = roller.apply(func, raw=True)
  new_features[f'{col}_{stat}'] = roller.apply(func, raw=True)
  new_features[f'{col}_{stat}'] = roller.apply(func, raw=True)
  new_features[f'{col}_{stat}'] = roller.apply(func, raw=True)
  new_features[f'{col}_{stat}'] = roller.apply(func, raw=True)
  new_features[f'{col}_{stat}'] = roller.apply(func, raw=True)
  new_features[f'{col}_{stat}'] = roller.apply(func, raw=True)
  new_features[f'{col}_fft_energy'] = roller.apply(safe_fft_energy, raw=True)
  new_features[f'{col}_{stat}'] = roller.apply(func, raw=True)
  new_features[f'{col}_{stat}'] = roller.apply(func, raw=True)
  new_features[f'{col}_{s

✅ Đã xử lý thành công /content/drive/My Drive/BUN_BO/Data.SHL/Gen_data/File_11.csv


  new_features[f'{col}_{stat}'] = roller.apply(func, raw=True)
  new_features[f'{col}_{stat}'] = roller.apply(func, raw=True)
  new_features[f'{col}_{stat}'] = roller.apply(func, raw=True)
  new_features[f'{col}_fft_energy'] = roller.apply(safe_fft_energy, raw=True)
  new_features[f'{col}_{stat}'] = roller.apply(func, raw=True)
  new_features[f'{col}_{stat}'] = roller.apply(func, raw=True)
  new_features[f'{col}_{stat}'] = roller.apply(func, raw=True)
  new_features[f'{col}_{stat}'] = roller.apply(func, raw=True)
  new_features[f'{col}_{stat}'] = roller.apply(func, raw=True)
  new_features[f'{col}_{stat}'] = roller.apply(func, raw=True)
  new_features[f'{col}_{stat}'] = roller.apply(func, raw=True)
  new_features[f'{col}_{stat}'] = roller.apply(func, raw=True)
  new_features[f'{col}_fft_energy'] = roller.apply(safe_fft_energy, raw=True)
  new_features[f'{col}_{stat}'] = roller.apply(func, raw=True)
  new_features[f'{col}_{stat}'] = roller.apply(func, raw=True)
  new_features[f'{col}_{s

✅ Đã xử lý thành công /content/drive/My Drive/BUN_BO/Data.SHL/Gen_data/File_12.csv


  new_features[f'{col}_{stat}'] = roller.apply(func, raw=True)
  new_features[f'{col}_{stat}'] = roller.apply(func, raw=True)
  new_features[f'{col}_{stat}'] = roller.apply(func, raw=True)
  new_features[f'{col}_fft_energy'] = roller.apply(safe_fft_energy, raw=True)
  new_features[f'{col}_{stat}'] = roller.apply(func, raw=True)
  new_features[f'{col}_{stat}'] = roller.apply(func, raw=True)
  new_features[f'{col}_{stat}'] = roller.apply(func, raw=True)
  new_features[f'{col}_{stat}'] = roller.apply(func, raw=True)
  new_features[f'{col}_{stat}'] = roller.apply(func, raw=True)
  new_features[f'{col}_{stat}'] = roller.apply(func, raw=True)
  new_features[f'{col}_{stat}'] = roller.apply(func, raw=True)
  new_features[f'{col}_{stat}'] = roller.apply(func, raw=True)
  new_features[f'{col}_fft_energy'] = roller.apply(safe_fft_energy, raw=True)
  new_features[f'{col}_{stat}'] = roller.apply(func, raw=True)
  new_features[f'{col}_{stat}'] = roller.apply(func, raw=True)
  new_features[f'{col}_{s

✅ Đã xử lý thành công /content/drive/My Drive/BUN_BO/Data.SHL/Gen_data/File_13.csv


  new_features[f'{col}_{stat}'] = roller.apply(func, raw=True)
  new_features[f'{col}_{stat}'] = roller.apply(func, raw=True)
  new_features[f'{col}_{stat}'] = roller.apply(func, raw=True)
  new_features[f'{col}_fft_energy'] = roller.apply(safe_fft_energy, raw=True)
  new_features[f'{col}_{stat}'] = roller.apply(func, raw=True)
  new_features[f'{col}_{stat}'] = roller.apply(func, raw=True)
  new_features[f'{col}_{stat}'] = roller.apply(func, raw=True)
  new_features[f'{col}_{stat}'] = roller.apply(func, raw=True)
  new_features[f'{col}_{stat}'] = roller.apply(func, raw=True)
  new_features[f'{col}_{stat}'] = roller.apply(func, raw=True)
  new_features[f'{col}_{stat}'] = roller.apply(func, raw=True)
  new_features[f'{col}_{stat}'] = roller.apply(func, raw=True)
  new_features[f'{col}_fft_energy'] = roller.apply(safe_fft_energy, raw=True)
  new_features[f'{col}_{stat}'] = roller.apply(func, raw=True)
  new_features[f'{col}_{stat}'] = roller.apply(func, raw=True)
  new_features[f'{col}_{s

✅ Đã xử lý thành công /content/drive/My Drive/BUN_BO/Data.SHL/Gen_data/File_2.csv


  new_features[f'{col}_{stat}'] = roller.apply(func, raw=True)
  new_features[f'{col}_{stat}'] = roller.apply(func, raw=True)
  new_features[f'{col}_{stat}'] = roller.apply(func, raw=True)
  new_features[f'{col}_fft_energy'] = roller.apply(safe_fft_energy, raw=True)
  new_features[f'{col}_{stat}'] = roller.apply(func, raw=True)
  new_features[f'{col}_{stat}'] = roller.apply(func, raw=True)
  new_features[f'{col}_{stat}'] = roller.apply(func, raw=True)
  new_features[f'{col}_{stat}'] = roller.apply(func, raw=True)
  new_features[f'{col}_{stat}'] = roller.apply(func, raw=True)
  new_features[f'{col}_{stat}'] = roller.apply(func, raw=True)
  new_features[f'{col}_{stat}'] = roller.apply(func, raw=True)
  new_features[f'{col}_{stat}'] = roller.apply(func, raw=True)
  new_features[f'{col}_fft_energy'] = roller.apply(safe_fft_energy, raw=True)
  new_features[f'{col}_{stat}'] = roller.apply(func, raw=True)
  new_features[f'{col}_{stat}'] = roller.apply(func, raw=True)
  new_features[f'{col}_{s

✅ Đã xử lý thành công /content/drive/My Drive/BUN_BO/Data.SHL/Gen_data/File_3.csv
Train shape: (756499, 302), Test shape: (155878, 302)


In [15]:
data_train.to_csv('/content/drive/My Drive/BUN_BO/Data.SHL/Data_using/data_train.csv', index=False)
data_test.to_csv('/content/drive/My Drive/BUN_BO/Data.SHL/Data_using/data_test.csv', index=False)