In [None]:
import os
import pandas as pd
import numpy as np
import tensorflow as tf
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler


class VibrationDataset:
    def __init__(self, data_dir, condition_dim=5, batch_size=32, shuffle=True, validation_split=0.1, test_split=0.1):
        self.data_dir = data_dir
        self.condition_dim = condition_dim
        self.batch_size = batch_size
        self.shuffle = shuffle
        self.validation_split = validation_split
        self.test_split = test_split
        self.categories = sorted(os.listdir(self.data_dir))
        self.num_categories = len(self.categories)

    def _load_data(self, file_path):
        df = pd.read_csv(file_path)  # Assuming CSV format, adjust accordingly if using different file types
        data = df.values
        features = data[:, :-self.condition_dim]
        conditions = data[:, -self.condition_dim:]
        return features, conditions

    def _load_category_data(self, category):
        category_dir = os.path.join(self.data_dir, category)
        file_names = sorted(os.listdir(category_dir))
        file_paths = [os.path.join(category_dir, file_name) for file_name in file_names]

        all_features = []
        all_conditions = []
        for file_path in file_paths:
            features, conditions = self._load_data(file_path)
            all_features.append(features)
            all_conditions.append(conditions)

        features = np.concatenate(all_features, axis=0)
        conditions = np.concatenate(all_conditions, axis=0)

        # Perform feature scaling (optional, but can be beneficial for training)
        scaler = StandardScaler()
        features = scaler.fit_transform(features)

        return features, conditions

    def _load_all_data(self):
        all_features = []
        all_conditions = []
        for category in self.categories:
            features, conditions = self._load_category_data(category)
            all_features.append(features)
            all_conditions.append(conditions)

        features = np.concatenate(all_features, axis=0)
        conditions = np.concatenate(all_conditions, axis=0)

        return features, conditions

    def _preprocess_dataset(self, features, conditions):
        dataset = tf.data.Dataset.from_tensor_slices((features, conditions))

        if self.shuffle:
            dataset = dataset.shuffle(buffer_size=len(features))

        dataset = dataset.batch(self.batch_size)
        dataset = dataset.prefetch(tf.data.experimental.AUTOTUNE)

        return dataset

    def get_dataset_splits(self):
        features, conditions = self._load_all_data()

        # 將整體數據集拆分成訓練集、驗證集和測試集
        x_train, x_test, y_train, y_test = train_test_split(features, conditions, test_size=self.test_split, random_state=42)
        x_train, x_val, y_train, y_val = train_test_split(x_train, y_train, test_size=self.validation_split, random_state=42)

        # 對特徵進行標準化
        scaler = StandardScaler()
        x_train = scaler.fit_transform(x_train)
        x_val = scaler.transform(x_val)
        x_test = scaler.transform(x_test)

        # 創建訓練集、驗證集和測試集的tf.data.Dataset
        train_dataset = self._preprocess_dataset(x_train, y_train)
        val_dataset = self._preprocess_dataset(x_val, y_val)
        test_dataset = self._preprocess_dataset(x_test, y_test)

        return train_dataset, val_dataset, test_dataset

: 

In [None]:
data_dir = 'data'
condition_dim = 5
batch_size = 32
shuffle = True
validation_split = 0.1
test_split = 0.1

# 建立VibrationDataset實例
vibration_dataset = VibrationDataset(data_dir, condition_dim, batch_size, shuffle, validation_split, test_split)

# 獲取訓練集、驗證集和測試集
train_dataset, val_dataset, test_dataset = vibration_dataset.get_dataset_splits()
