## regression_normalization

-   回帰
-   正規化あり
-   シャッフルでの効果検証
-   sklearn の StandardScaler による正規化


### 準備


In [1]:
import os
import pandas as pd
import numpy as np
import lightgbm as lgb
from sklearn.metrics import mean_squared_error

from sklearn.preprocessing import StandardScaler

In [2]:
DATA_DIR = './dms_data'
N_TRIALS = 5

In [3]:

def load_and_preprocess_data(file_path):
  """CSVを読み込んで前処理する関数。"""
  df = pd.read_csv(file_path)
  df = df.drop(['timestamp'], axis=1)
  return df.dropna()


def get_data_from_directory(directory_path):
  """指定ディレクトリのCSV全部を読み込んで結合する関数。"""
  files = [f for f in os.listdir(directory_path) if f.endswith('.csv')]
  data_frames = [load_and_preprocess_data(os.path.join(directory_path, file)) for file in files]
  return pd.concat(data_frames, ignore_index=True)


# 特徴量を定義
features = [
    'm_speed', 'm_speed_var_480', 'm_speed_stddev_480', 'm_acceleration',
    'm_acceleration_var_480', 'm_acceleration_stddev_480', 'm_jerk',
    'm_jerk_var_480', 'm_jerk_stddev_480', 'oss', 'Sleepiness'
]


def evaluate_model(model, X, y):
  """モデルの評価。RMSE"""
  predictions = model.predict(X)
  return np.sqrt(mean_squared_error(y, predictions))


def printResults(rmse_results, title, if_print=True):
  mean_rmse = np.mean(rmse_results)
  median_rmse = np.median(rmse_results)
  variance_rmse = np.var(rmse_results)
  std_rmse = np.std(rmse_results)
  min_rmse = np.min(rmse_results)
  max_rmse = np.max(rmse_results)

  out = ''
  out += f"[{title}]RMSEの平均値: {mean_rmse}\n"
  # out += f"[{title}]RMSEの中央値: {median_rmse}\n"
  # out += f"[{title}]RMSEの分散: {variance_rmse}\n"
  # out += f"[{title}]RMSEの標準偏差: {std_rmse}\n"
  out += f"[{title}]RMSEの最小値: {min_rmse}\n"
  out += f"[{title}]RMSEの最大値: {max_rmse}\n"

  if if_print:
    print(out)
  return out

  # print(f"[{title}]RMSEの平均値: {mean_rmse}")
  # # print(f"[{title}]RMSEの中央値: {median_rmse}")
  # # print(f"[{title}]RMSEの分散: {variance_rmse}")
  # # print(f"[{title}]RMSEの標準偏差: {std_rmse}")
  # print(f"[{title}]RMSEの最小値: {min_rmse}")
  # print(f"[{title}]RMSEの最大値: {max_rmse}")


def get_data_from_directory_with_scaling(directory_path, scaler=None):
  """指定ディレクトリのCSV全部を読み込んで結合し、スケーリングする関数。"""
  files = [f for f in os.listdir(directory_path) if f.endswith('.csv')]
  data_frames = [load_and_preprocess_data(os.path.join(directory_path, file)) for file in files]
  combined_df = pd.concat(data_frames, ignore_index=True)

  if scaler is not None:
    # 既存のスケーラーを使用してデータを変換
    combined_df[features] = scaler.transform(combined_df[features])
  else:
    # 新しいスケーラーをフィットしてデータを変換
    scaler = StandardScaler()
    combined_df[features] = scaler.fit_transform(combined_df[features])

  return combined_df, scaler

In [4]:
# トレインデータとテストデータを読み込む
# train = get_data_from_directory(os.path.join(DATA_DIR, 'train'))
# test = get_data_from_directory(os.path.join(DATA_DIR, 'test'))
train, scaler = get_data_from_directory_with_scaling(os.path.join(DATA_DIR, 'train'))
test, _ = get_data_from_directory_with_scaling(os.path.join(DATA_DIR, 'test'), scaler=scaler)

# oss_variance = train_data['oss'].var()
# sleepiness_variance = train_data['Sleepiness'].var()

# print(f"ossの分散: {oss_variance}")
# print(f"Sleepinessの分散: {sleepiness_variance}")
# print(train_data.head())
# print(train_data.describe())

### LightGBM


#### 正規化導入前


In [5]:
def train_lightgbm_full(X, y):
  """Train LightGBM model on the full training data."""
  train_data = lgb.Dataset(X, label=y)
  params = {
      'objective': 'regression',
      'metric': 'rmse',
      'verbose': -1
  }
  model = lgb.train(params, train_data)
  return model


def evaluate_on_test(model, X_test, y_test):
  """Evaluate model on the test data."""
  predictions = model.predict(X_test)
  return np.sqrt(mean_squared_error(y_test, predictions))


def evaluate_trials_with_test(X, y, X_test, y_test):
  rmses_train = []
  rmses_test = []
  for _ in range(N_TRIALS):
    model = train_lightgbm_full(X, y)
    rmse_train = evaluate_model(model, X, y)
    rmse_test = evaluate_on_test(model, X_test, y_test)
    rmses_train.append(rmse_train)
    rmses_test.append(rmse_test)
  return rmses_train, rmses_test

In [6]:
train_data = get_data_from_directory(os.path.join(DATA_DIR, 'train'))
X = train_data.drop(['oss', 'Sleepiness'], axis=1)

test_data = get_data_from_directory(os.path.join(DATA_DIR, 'test'))
X_test = test_data.drop(['oss', 'Sleepiness'], axis=1)

In [7]:
oss_rmses_train, oss_rmses_test = evaluate_trials_with_test(X, train_data['oss'], X_test, test_data['oss'])
printResults(oss_rmses_train, 'lgbm-oss-train')
printResults(oss_rmses_test, 'lgbm-oss-test')

[lgbm-oss-train]RMSEの平均値: 0.12133256979736869
[lgbm-oss-train]RMSEの最小値: 0.12133256979736869
[lgbm-oss-train]RMSEの最大値: 0.12133256979736869

[lgbm-oss-test]RMSEの平均値: 0.5634352602546109
[lgbm-oss-test]RMSEの最小値: 0.5634352602546109
[lgbm-oss-test]RMSEの最大値: 0.5634352602546109



'[lgbm-oss-test]RMSEの平均値: 0.5634352602546109\n[lgbm-oss-test]RMSEの最小値: 0.5634352602546109\n[lgbm-oss-test]RMSEの最大値: 0.5634352602546109\n'

In [8]:
sleepiness_rmses_train, sleepiness_rmses_test = evaluate_trials_with_test(X, train_data['Sleepiness'], X_test, test_data['Sleepiness'])
printResults(sleepiness_rmses_train, 'lgbm-sleepiness-train')
printResults(sleepiness_rmses_test, 'lgbm-sleepiness-test')

[lgbm-sleepiness-train]RMSEの平均値: 0.21806987053339172
[lgbm-sleepiness-train]RMSEの最小値: 0.21806987053339172
[lgbm-sleepiness-train]RMSEの最大値: 0.21806987053339172

[lgbm-sleepiness-test]RMSEの平均値: 1.4501764251981017
[lgbm-sleepiness-test]RMSEの最小値: 1.4501764251981017
[lgbm-sleepiness-test]RMSEの最大値: 1.4501764251981017



'[lgbm-sleepiness-test]RMSEの平均値: 1.4501764251981017\n[lgbm-sleepiness-test]RMSEの最小値: 1.4501764251981017\n[lgbm-sleepiness-test]RMSEの最大値: 1.4501764251981017\n'

In [9]:
from sklearn.model_selection import KFold


def evaluate_with_kfold(X, y, n_splits=5):
  """5分割交差検証を使用してモデルを評価する関数"""
  kf = KFold(n_splits=n_splits, shuffle=True, random_state=42)
  rmses = []

  for train_index, test_index in kf.split(X):
    X_train, X_test = X.iloc[train_index], X.iloc[test_index]
    y_train, y_test = y.iloc[train_index], y.iloc[test_index]

    model = train_lightgbm_full(X_train, y_train)
    rmse = evaluate_on_test(model, X_test, y_test)
    rmses.append(rmse)

  return rmses


all_data = pd.concat([train, test], ignore_index=True)

X_all = all_data.drop(["oss", "Sleepiness"], axis=1)
y_all_oss = all_data["oss"]
y_all_sleepiness = all_data["Sleepiness"]

oss_kfold_rmses = evaluate_with_kfold(X_all, y_all_oss)
printResults(oss_kfold_rmses, "lgbm-oss-kfold")

sleepiness_kfold_rmses = evaluate_with_kfold(X_all, y_all_sleepiness)
printResults(sleepiness_kfold_rmses, "lgbm-sleepiness-kfold")

[lgbm-oss-kfold]RMSEの平均値: 0.4242293079010827
[lgbm-oss-kfold]RMSEの最小値: 0.3997471486842964
[lgbm-oss-kfold]RMSEの最大値: 0.4558611393616859

[lgbm-sleepiness-kfold]RMSEの平均値: 0.3162353774008916
[lgbm-sleepiness-kfold]RMSEの最小値: 0.3035231749457593
[lgbm-sleepiness-kfold]RMSEの最大値: 0.336566900053838



'[lgbm-sleepiness-kfold]RMSEの平均値: 0.3162353774008916\n[lgbm-sleepiness-kfold]RMSEの最小値: 0.3035231749457593\n[lgbm-sleepiness-kfold]RMSEの最大値: 0.336566900053838\n'

#### 正規化導入


In [10]:
from sklearn.preprocessing import MinMaxScaler

In [11]:

train_data = get_data_from_directory(os.path.join(DATA_DIR, 'train'))
test_data = get_data_from_directory(os.path.join(DATA_DIR, 'test'))


def normalize_data(series):
  scaler = MinMaxScaler()
  series = series.values.reshape(-1, 1)
  series = scaler.fit_transform(series)
  series = series.ravel()
  return pd.Series(series)


def evaluate_trials_with_test_and_normalization(X, y, X_test, y_test):
  y = normalize_data(y)  # 正規化
  y_test = normalize_data(y_test)  # テストデータも正規化
  rmses_train = []
  rmses_test = []
  for _ in range(N_TRIALS):
    model = train_lightgbm_full(X, y)
    rmse_train = evaluate_model(model, X, y)
    rmse_test = evaluate_on_test(model, X_test, y_test)
    rmses_train.append(rmse_train)
    rmses_test.append(rmse_test)
  return rmses_train, rmses_test


X = train_data.drop(['oss', 'Sleepiness'], axis=1)
X_test = test_data.drop(['oss', 'Sleepiness'], axis=1)

oss_rmses_train_norm, oss_rmses_test_norm = evaluate_trials_with_test_and_normalization(X, train_data['oss'], X_test, test_data['oss'])
# printResults(oss_rmses_train_norm, 'lgbm-oss-train-normalized')
printResults(oss_rmses_test_norm, 'lgbm-oss-test-normalized')

sleepiness_rmses_train_norm, sleepiness_rmses_test_norm = evaluate_trials_with_test_and_normalization(X, train_data['Sleepiness'], X_test, test_data['Sleepiness'])
# printResults(sleepiness_rmses_train_norm, 'lgbm-sleepiness-train-normalized')
printResults(sleepiness_rmses_test_norm, 'lgbm-sleepiness-test-normalized')

[lgbm-oss-test-normalized]RMSEの平均値: 0.16889086825505312
[lgbm-oss-test-normalized]RMSEの最小値: 0.16889086825505312
[lgbm-oss-test-normalized]RMSEの最大値: 0.16889086825505312

[lgbm-sleepiness-test-normalized]RMSEの平均値: 0.18127205293997622
[lgbm-sleepiness-test-normalized]RMSEの最小値: 0.18127205293997622
[lgbm-sleepiness-test-normalized]RMSEの最大値: 0.18127205293997622



'[lgbm-sleepiness-test-normalized]RMSEの平均値: 0.18127205293997622\n[lgbm-sleepiness-test-normalized]RMSEの最小値: 0.18127205293997622\n[lgbm-sleepiness-test-normalized]RMSEの最大値: 0.18127205293997622\n'

#### 正規化 & 5 分割交差検証 & データシャッフル


In [12]:
def evaluate_with_kfold_and_normalization(X, y, n_splits=5):
  """5分割交差検証を使用してモデルを評価する関数（目的変数の正規化付き）"""
  y = normalize_data(y)
  kf = KFold(n_splits=n_splits, shuffle=True, random_state=42)
  rmses = []

  for train_index, test_index in kf.split(X):
    X_train, X_test = X.iloc[train_index], X.iloc[test_index]
    y_train, y_test = y.iloc[train_index], y.iloc[test_index]

    model = train_lightgbm_full(X_train, y_train)
    rmse = evaluate_on_test(model, X_test, y_test)
    rmses.append(rmse)

  return rmses


# 正規化区別
outStr = ''


def shuffle_dataframe(df):
  return df.sample(frac=1.0).reset_index(drop=True)

In [13]:
outStr += "train,testを分けて正規化\n"
train = shuffle_dataframe(get_data_from_directory(os.path.join(DATA_DIR, 'train')))
test = shuffle_dataframe(get_data_from_directory(os.path.join(DATA_DIR, 'test')))

X_train = train.drop(['oss', 'Sleepiness'], axis=1)
y_train_oss = train['oss']
y_train_sleepiness = train['Sleepiness']
X_test = test.drop(['oss', 'Sleepiness'], axis=1)
y_test_oss = test['oss']
y_test_sleepiness = test['Sleepiness']

oss_kfold_rmses_norm_divided = evaluate_with_kfold_and_normalization(X_train, y_train_oss)
outStr += printResults(oss_kfold_rmses_norm_divided, "lgbm-oss-kfold-normalized-divided", False)

sleepiness_kfold_rmses_norm_divided = evaluate_with_kfold_and_normalization(X_train, y_train_sleepiness)
outStr += printResults(sleepiness_kfold_rmses_norm_divided, "lgbm-sleepiness-kfold-normalized-divided", False)

In [14]:
outStr += "\ntrain,testを分けずに正規化\n"
X_all = shuffle_dataframe(all_data.drop(['oss', 'Sleepiness'], axis=1))
y_all_oss = all_data['oss']
y_all_sleepiness = all_data['Sleepiness']

oss_kfold_rmses_norm = evaluate_with_kfold_and_normalization(X_all, y_all_oss)
sleepiness_kfold_rmses_norm = evaluate_with_kfold_and_normalization(X_all, y_all_sleepiness)

outStr += printResults(sleepiness_kfold_rmses_norm, 'lgbm-sleepiness-kfold-normalized-full', False)
outStr += printResults(oss_kfold_rmses_norm, 'lgbm-oss-kfold-normalized-full', False)

In [15]:
print(outStr)

train,testを分けて正規化
[lgbm-oss-kfold-normalized-divided]RMSEの平均値: 0.09519694108360775
[lgbm-oss-kfold-normalized-divided]RMSEの最小値: 0.08680661331097099
[lgbm-oss-kfold-normalized-divided]RMSEの最大値: 0.1020520511187413
[lgbm-sleepiness-kfold-normalized-divided]RMSEの平均値: 0.0698522376620578
[lgbm-sleepiness-kfold-normalized-divided]RMSEの最小値: 0.06342449327861141
[lgbm-sleepiness-kfold-normalized-divided]RMSEの最大値: 0.07627391987913379

train,testを分けずに正規化
[lgbm-sleepiness-kfold-normalized-full]RMSEの平均値: 0.26859956930851575
[lgbm-sleepiness-kfold-normalized-full]RMSEの最小値: 0.2607842522534266
[lgbm-sleepiness-kfold-normalized-full]RMSEの最大値: 0.2756251987352747
[lgbm-oss-kfold-normalized-full]RMSEの平均値: 0.22731717795051304
[lgbm-oss-kfold-normalized-full]RMSEの最小値: 0.22537494573499295
[lgbm-oss-kfold-normalized-full]RMSEの最大値: 0.22863433313686574



#### \_


In [16]:
# 元
oss_rmses_train, oss_rmses_test = evaluate_trials_with_test(X, train_data['oss'], X_test, test_data['oss'])
printResults(oss_rmses_train, 'lgbm-oss-train')
printResults(oss_rmses_test, 'lgbm-oss-test')
###
sleepiness_rmses_train, sleepiness_rmses_test = evaluate_trials_with_test(X, train_data['Sleepiness'], X_test, test_data['Sleepiness'])
printResults(sleepiness_rmses_train, 'lgbm-sleepiness-train')
printResults(sleepiness_rmses_test, 'lgbm-sleepiness-test')

[lgbm-oss-train]RMSEの平均値: 0.12133256979736869
[lgbm-oss-train]RMSEの最小値: 0.12133256979736869
[lgbm-oss-train]RMSEの最大値: 0.12133256979736869

[lgbm-oss-test]RMSEの平均値: 0.9940887616705869
[lgbm-oss-test]RMSEの最小値: 0.9940887616705868
[lgbm-oss-test]RMSEの最大値: 0.9940887616705868

[lgbm-sleepiness-train]RMSEの平均値: 0.21806987053339172
[lgbm-sleepiness-train]RMSEの最小値: 0.21806987053339172
[lgbm-sleepiness-train]RMSEの最大値: 0.21806987053339172

[lgbm-sleepiness-test]RMSEの平均値: 3.075028808969175
[lgbm-sleepiness-test]RMSEの最小値: 3.075028808969175
[lgbm-sleepiness-test]RMSEの最大値: 3.075028808969175



'[lgbm-sleepiness-test]RMSEの平均値: 3.075028808969175\n[lgbm-sleepiness-test]RMSEの最小値: 3.075028808969175\n[lgbm-sleepiness-test]RMSEの最大値: 3.075028808969175\n'

In [17]:
# 正規化を分けてみる
all_data = pd.concat([train, test], ignore_index=True)


def normalize_data(series):
  scaler = MinMaxScaler()
  series = series.values.reshape(-1, 1)
  series = scaler.fit_transform(series)
  series = series.ravel()
  return pd.Series(series)


def evaluate_with_kfold_and_normalization(X, y, n_splits=5):
  """5分割交差検証を使用してモデルを評価する関数（目的変数の正規化付き）"""
  y = normalize_data(y)
  kf = KFold(n_splits=n_splits, shuffle=True, random_state=42)
  rmses = []

  for train_index, test_index in kf.split(X):
    X_train, X_test = X.iloc[train_index], X.iloc[test_index]
    y_train, y_test = y.iloc[train_index], y.iloc[test_index]

    model = train_lightgbm_full(X_train, y_train)
    rmse = evaluate_on_test(model, X_test, y_test)
    rmses.append(rmse)

  return rmses


X_all = all_data.drop(['oss', 'Sleepiness'], axis=1)
y_all_oss = all_data['oss']
y_all_sleepiness = all_data['Sleepiness']

oss_kfold_rmses_norm = evaluate_with_kfold_and_normalization(X_all, y_all_oss)

sleepiness_kfold_rmses_norm = evaluate_with_kfold_and_normalization(X_all, y_all_sleepiness)
printResults(sleepiness_kfold_rmses_norm, 'lgbm-sleepiness-kfold-normalized-full')

[lgbm-sleepiness-kfold-normalized-full]RMSEの平均値: 0.07520984690069096
[lgbm-sleepiness-kfold-normalized-full]RMSEの最小値: 0.06829121739978067
[lgbm-sleepiness-kfold-normalized-full]RMSEの最大値: 0.08426810056491758



'[lgbm-sleepiness-kfold-normalized-full]RMSEの平均値: 0.07520984690069096\n[lgbm-sleepiness-kfold-normalized-full]RMSEの最小値: 0.06829121739978067\n[lgbm-sleepiness-kfold-normalized-full]RMSEの最大値: 0.08426810056491758\n'

In [18]:
# 交差検証と正規化（逆変換付き）を組み合わせた関数
def evaluate_with_kfold_and_normalization_inverse(X, y, n_splits=5):
  """5分割交差検証を使用してモデルを評価する関数（目的変数の正規化と逆変換付き）"""
  kf = KFold(n_splits=n_splits, shuffle=True, random_state=42)
  rmses = []
  scaler = MinMaxScaler()

  for train_index, test_index in kf.split(X):
    X_train, X_test = X.iloc[train_index], X.iloc[test_index]
    y_train, y_test = y.iloc[train_index], y.iloc[test_index]

    # 正規化（訓練データでfitし、訓練データとテストデータ両方をtransform）
    y_train = y_train.values.reshape(-1, 1)
    scaler.fit(y_train)
    y_train = scaler.transform(y_train).ravel()
    y_test = scaler.transform(y_test.values.reshape(-1, 1)).ravel()

    model = train_lightgbm_full(X_train, pd.Series(y_train))  # 既存のtrain_lightgbm_full関数を使用

    # 予測値を正規化の逆変換
    predictions = model.predict(X_test)
    predictions = scaler.inverse_transform(predictions.reshape(-1, 1)).ravel()

    # RMSE計算（逆変換後の予測値と元の尺度のテストデータで）
    rmse = np.sqrt(mean_squared_error(scaler.inverse_transform(y_test.reshape(-1, 1)), predictions))
    rmses.append(rmse)

  return rmses


# 特徴量と目的変数に分割
X_all = all_data.drop(['oss', 'Sleepiness'], axis=1)
y_all_oss = all_data['oss']
y_all_sleepiness = all_data['Sleepiness']

# ossモデルの交差検証（正規化と逆変換付き）
oss_kfold_rmses_norm_inv = evaluate_with_kfold_and_normalization_inverse(X_all, y_all_oss)
printResults(oss_kfold_rmses_norm_inv, 'lgbm-oss-kfold-normalized-inverse')

# Sleepinessモデルの交差検証（正規化と逆変換付き）
sleepiness_kfold_rmses_norm_inv = evaluate_with_kfold_and_normalization_inverse(X_all, y_all_sleepiness)
printResults(sleepiness_kfold_rmses_norm_inv, 'lgbm-sleepiness-kfold-normalized-inverse')

[lgbm-oss-kfold-normalized-inverse]RMSEの平均値: 0.30742938171984724
[lgbm-oss-kfold-normalized-inverse]RMSEの最小値: 0.28849035391558886
[lgbm-oss-kfold-normalized-inverse]RMSEの最大値: 0.32946652693290873

[lgbm-sleepiness-kfold-normalized-inverse]RMSEの平均値: 0.6016787752055277
[lgbm-sleepiness-kfold-normalized-inverse]RMSEの最小値: 0.5463297391982453
[lgbm-sleepiness-kfold-normalized-inverse]RMSEの最大値: 0.6741448045193407



'[lgbm-sleepiness-kfold-normalized-inverse]RMSEの平均値: 0.6016787752055277\n[lgbm-sleepiness-kfold-normalized-inverse]RMSEの最小値: 0.5463297391982453\n[lgbm-sleepiness-kfold-normalized-inverse]RMSEの最大値: 0.6741448045193407\n'

### GNN


In [19]:
import torch
import torch.nn.functional as F
from torch_geometric.nn import GCNConv
from torch_geometric.data import Data

In [20]:
def transform_to_graph_data(X, y):
  x = torch.tensor(X.values, dtype=torch.float)
  edge_index = torch.tensor([list(range(X.shape[0]-1)), list(range(1, X.shape[0]))], dtype=torch.long)
  y = torch.tensor(y.values, dtype=torch.float).view(-1, 1)
  data = Data(x=x, edge_index=edge_index, y=y)
  return data


class SimpleGNN(torch.nn.Module):
  def __init__(self, num_features):
    super(SimpleGNN, self).__init__()
    self.conv1 = GCNConv(num_features, 64)
    self.conv2 = GCNConv(64, 32)
    self.fc = torch.nn.Linear(32, 1)

  def forward(self, data):
    x, edge_index = data.x, data.edge_index
    x = self.conv1(x, edge_index)
    x = F.relu(x)
    x = self.conv2(x, edge_index)
    x = F.relu(x)
    x = self.fc(x)
    return x


def train_gnn(train_data, num_features):
  model = SimpleGNN(num_features=num_features)
  optimizer = torch.optim.Adam(model.parameters(), lr=0.01)
  for epoch in range(10):
    model.train()
    optimizer.zero_grad()
    out = model(train_data)
    loss = torch.nn.MSELoss()(out, train_data.y)
    loss.backward()
    optimizer.step()
  return model


def evaluate_gnn_trials(X_train, y_train, X_test, y_test):
  train_data = transform_to_graph_data(X_train, y_train)
  test_data = transform_to_graph_data(X_test, y_test)
  gnn_rmses = []
  for _ in range(N_TRIALS):
    model = train_gnn(train_data, num_features=X_train.shape[1])
    model.eval()
    preds = model(test_data)
    rmse = np.sqrt(mean_squared_error(y_test, preds.detach().numpy()))
    gnn_rmses.append(rmse)
  return gnn_rmses

In [21]:
train = get_data_from_directory(os.path.join(DATA_DIR, 'train'))
test = get_data_from_directory(os.path.join(DATA_DIR, 'test'))

X_train = train.drop(['oss', 'Sleepiness'], axis=1)
X_test = test.drop(['oss', 'Sleepiness'], axis=1)

# ossモデル
oss_rmses = evaluate_gnn_trials(X_train, train['oss'], X_test, test['oss'])
# print(f"GNN ossモデルの平均RMSE: {np.mean(oss_rmses)}")
# print(f"GNN ossモデルのRMSEの標準偏差: {np.std(oss_rmses)}")
printResults(oss_rmses, 'gnn-oss')

# Sleepinessモデル
sleepiness_rmses = evaluate_gnn_trials(X_train, train['Sleepiness'], X_test, test['Sleepiness'])
# def compute_statistics(arr):
#     median = np.median(arr)
#     mean = np.mean(arr)
#     variance = np.var(arr)
#     std_dev = np.std(arr)
#     range_val = (np.min(arr), np.max(arr))

#     return {
#         'Median': median,
#         'Mean': mean,
#         'Variance': variance,
#         'Standard Deviation': std_dev,
#         'Range': range_val
#     }
# print(compute_statistics(sleepiness_rmses))
# print(f"GNN sleepinessモデルの平均RMSE: {np.mean(sleepiness_rmses)}")
# print(f"GNN sleepinessモデルのRMSEの標準偏差: {np.std(sleepiness_rmses)}")
printResults(sleepiness_rmses, 'gnn-sleepiness')

[gnn-oss]RMSEの平均値: 4.118733474757279
[gnn-oss]RMSEの最小値: 0.8064458504799236
[gnn-oss]RMSEの最大値: 7.796994657292529

[gnn-sleepiness]RMSEの平均値: 4.405072896981996
[gnn-sleepiness]RMSEの最小値: 2.190145127694566
[gnn-sleepiness]RMSEの最大値: 8.00019198528844



'[gnn-sleepiness]RMSEの平均値: 4.405072896981996\n[gnn-sleepiness]RMSEの最小値: 2.190145127694566\n[gnn-sleepiness]RMSEの最大値: 8.00019198528844\n'

| 中央値 | 平均   | 分散   | 標準偏差 | 範囲              |
| ------ | ------ | ------ | -------- | ----------------- |
| 4.5862 | 5.0127 | 4.3706 | 2.0906   | (2.0140, 10.5504) |
