In [12]:
import os
import pandas as pd
import numpy as np
import itertools
import lightgbm as lgb
from sklearn.metrics import mean_squared_error
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestRegressor
from sklearn.svm import SVR
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from tensorflow.keras.optimizers import Adam
from sklearn.linear_model import LinearRegression

In [13]:
DATA_DIR = './dms_data'
N_TRIALS = 2

In [14]:

def load_and_preprocess_data(file_path):
    """CSVを読み込んで前処理する関数。"""
    df = pd.read_csv(file_path)
    df = df.drop(['timestamp'], axis=1)
    return df.dropna()

def get_data_from_directory(directory_path):
    """指定ディレクトリのCSV全部を読み込んで結合する関数。"""
    files = [f for f in os.listdir(directory_path) if f.endswith('.csv')]
    data_frames = [load_and_preprocess_data(os.path.join(directory_path, file)) for file in files]
    return pd.concat(data_frames, ignore_index=True)

# トレインデータとテストデータを読み込む
train = get_data_from_directory(os.path.join(DATA_DIR, 'train'))
test = get_data_from_directory(os.path.join(DATA_DIR, 'test'))

# 特徴量を定義
features = [
    'm_speed', 'm_speed_var_480', 'm_speed_stddev_480', 'm_acceleration',
    'm_acceleration_var_480', 'm_acceleration_stddev_480', 'm_jerk',
    'm_jerk_var_480', 'm_jerk_stddev_480'
]
    
def evaluate_model(model, X, y):
    """モデルの評価。RMSE"""
    predictions = model.predict(X)
    return np.sqrt(mean_squared_error(y, predictions))

def printResults(rmse_results,title):
    mean_rmse = np.mean(rmse_results)
    median_rmse = np.median(rmse_results)
    variance_rmse = np.var(rmse_results)
    std_rmse = np.std(rmse_results)
    min_rmse = np.min(rmse_results)
    max_rmse = np.max(rmse_results)

    # 統計量を表示
    print(f"[{title}]RMSEの平均値: {mean_rmse}")
    print(f"[{title}]RMSEの中央値: {median_rmse}")
    print(f"[{title}]RMSEの分散: {variance_rmse}")
    print(f"[{title}]RMSEの標準偏差: {std_rmse}")
    print(f"[{title}]RMSEの最小値: {min_rmse}")
    print(f"[{title}]RMSEの最大値: {max_rmse}")

In [15]:
# oss_variance = train_data['oss'].var()
# sleepiness_variance = train_data['Sleepiness'].var()

# print(f"ossの分散: {oss_variance}")
# print(f"Sleepinessの分散: {sleepiness_variance}")
# print(train_data.head())
# print(train_data.describe())


### LightGBM

In [16]:
# def train_lightgbm(X, y):
#     X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=42)
#     train_data = lgb.Dataset(X_train, label=y_train)
#     eval_data = lgb.Dataset(X_val, label=y_val, reference=train_data)
#     params = {
#         'objective': 'regression',
#         'metric': 'rmse',
#         'verbose': 0
#     }
#     model = lgb.train(params, train_data, valid_sets=eval_data)
#     return model, X_val, y_val

# def evaluate_trials(X, y):
#     rmses = []
#     for _ in range(N_TRIALS):
#         model, X_val, y_val = train_lightgbm(X, y)
#         rmse = np.sqrt(mean_squared_error(y_val, model.predict(X_val)))
#         rmses.append(rmse)
#     return rmses
def train_lightgbm_full(X, y):
    """Train LightGBM model on the full training data."""
    train_data = lgb.Dataset(X, label=y)
    params = {
        'objective': 'regression',
        'metric': 'rmse',
        'verbose': -1
    }
    model = lgb.train(params, train_data)
    return model

def evaluate_on_test(model, X_test, y_test):
    """Evaluate model on the test data."""
    predictions = model.predict(X_test)
    return np.sqrt(mean_squared_error(y_test, predictions))

def evaluate_trials_with_test(X, y, X_test, y_test):
    rmses_train = []
    rmses_test = []
    for _ in range(N_TRIALS):
        model = train_lightgbm_full(X, y)
        rmse_train = evaluate_model(model, X, y)
        rmse_test = evaluate_on_test(model, X_test, y_test)
        rmses_train.append(rmse_train)
        rmses_test.append(rmse_test)
    return rmses_train, rmses_test

In [17]:
# train_data = get_data_from_directory(os.path.join(DATA_DIR, 'train'))
# X = train_data.drop(['oss', 'Sleepiness'], axis=1)

# # ossモデル
# oss_rmses = evaluate_trials(X, train_data['oss'])
# printResults(oss_rmses,'lgbm-oss')

# # Sleepinessモデル
# sleepiness_rmses = evaluate_trials(X, train_data['Sleepiness'])
# printResults(sleepiness_rmses,'lgbm-sleepiness')
train_data = get_data_from_directory(os.path.join(DATA_DIR, 'train'))
X = train_data.drop(['oss', 'Sleepiness'], axis=1)

test_data = get_data_from_directory(os.path.join(DATA_DIR, 'test'))
X_test = test_data.drop(['oss', 'Sleepiness'], axis=1)

In [18]:
oss_rmses_train, oss_rmses_test = evaluate_trials_with_test(X, train_data['oss'], X_test, test_data['oss'])
printResults(oss_rmses_train, 'lgbm-oss-train')
printResults(oss_rmses_test, 'lgbm-oss-test')

[lgbm-oss-train]RMSEの平均値: 0.12457814822031583
[lgbm-oss-train]RMSEの中央値: 0.12457814822031583
[lgbm-oss-train]RMSEの分散: 0.0
[lgbm-oss-train]RMSEの標準偏差: 0.0
[lgbm-oss-train]RMSEの最小値: 0.12457814822031583
[lgbm-oss-train]RMSEの最大値: 0.12457814822031583
[lgbm-oss-test]RMSEの平均値: 0.541056204141207
[lgbm-oss-test]RMSEの中央値: 0.541056204141207
[lgbm-oss-test]RMSEの分散: 0.0
[lgbm-oss-test]RMSEの標準偏差: 0.0
[lgbm-oss-test]RMSEの最小値: 0.541056204141207
[lgbm-oss-test]RMSEの最大値: 0.541056204141207


In [19]:
sleepiness_rmses_train, sleepiness_rmses_test = evaluate_trials_with_test(X, train_data['Sleepiness'], X_test, test_data['Sleepiness'])
printResults(sleepiness_rmses_train, 'lgbm-sleepiness-train')
printResults(sleepiness_rmses_test, 'lgbm-sleepiness-test')

[lgbm-sleepiness-train]RMSEの平均値: 0.22518856302682227
[lgbm-sleepiness-train]RMSEの中央値: 0.22518856302682227
[lgbm-sleepiness-train]RMSEの分散: 0.0
[lgbm-sleepiness-train]RMSEの標準偏差: 0.0
[lgbm-sleepiness-train]RMSEの最小値: 0.22518856302682227
[lgbm-sleepiness-train]RMSEの最大値: 0.22518856302682227
[lgbm-sleepiness-test]RMSEの平均値: 1.3758729110252772
[lgbm-sleepiness-test]RMSEの中央値: 1.3758729110252772
[lgbm-sleepiness-test]RMSEの分散: 0.0
[lgbm-sleepiness-test]RMSEの標準偏差: 0.0
[lgbm-sleepiness-test]RMSEの最小値: 1.3758729110252772
[lgbm-sleepiness-test]RMSEの最大値: 1.3758729110252772


### GNN

In [20]:
import torch
import torch.nn.functional as F
from torch_geometric.nn import GCNConv
from torch_geometric.data import Data

Exception ignored in: <function _xla_gc_callback at 0x7f5e45ab3160>
Traceback (most recent call last):
  File "/home/buntin/jupyter-env/lib64/python3.9/site-packages/jax/_src/lib/__init__.py", line 103, in _xla_gc_callback
    def _xla_gc_callback(*args):
KeyboardInterrupt: 


In [None]:
def transform_to_graph_data(X, y):
    x = torch.tensor(X.values, dtype=torch.float)
    edge_index = torch.tensor([list(range(X.shape[0]-1)), list(range(1, X.shape[0]))], dtype=torch.long)
    y = torch.tensor(y.values, dtype=torch.float).view(-1, 1)
    data = Data(x=x, edge_index=edge_index, y=y)
    return data

class SimpleGNN(torch.nn.Module):
    def __init__(self, num_features):
        super(SimpleGNN, self).__init__()
        self.conv1 = GCNConv(num_features, 64)
        self.conv2 = GCNConv(64, 32)
        self.fc = torch.nn.Linear(32, 1)

    def forward(self, data):
        x, edge_index = data.x, data.edge_index
        x = self.conv1(x, edge_index)
        x = F.relu(x)
        x = self.conv2(x, edge_index)
        x = F.relu(x)
        x = self.fc(x)
        return x

def train_gnn(train_data, num_features):
    model = SimpleGNN(num_features=num_features)
    optimizer = torch.optim.Adam(model.parameters(), lr=0.01)
    for epoch in range(10):
        model.train()
        optimizer.zero_grad()
        out = model(train_data)
        loss = torch.nn.MSELoss()(out, train_data.y)
        loss.backward()
        optimizer.step()
    return model

def evaluate_gnn_trials(X_train, y_train, X_test, y_test):
    train_data = transform_to_graph_data(X_train, y_train)
    test_data = transform_to_graph_data(X_test, y_test)
    gnn_rmses = []
    for _ in range(N_TRIALS):
        model = train_gnn(train_data, num_features=X_train.shape[1])
        model.eval()
        preds = model(test_data)
        rmse = np.sqrt(mean_squared_error(y_test, preds.detach().numpy()))
        gnn_rmses.append(rmse)
    return gnn_rmses

In [None]:
train = get_data_from_directory(os.path.join(DATA_DIR, 'train'))
test = get_data_from_directory(os.path.join(DATA_DIR, 'test'))

X_train = train.drop(['oss', 'Sleepiness'], axis=1)
X_test = test.drop(['oss', 'Sleepiness'], axis=1)

# ossモデル
oss_rmses = evaluate_gnn_trials(X_train, train['oss'], X_test, test['oss'])
# print(f"GNN ossモデルの平均RMSE: {np.mean(oss_rmses)}")
# print(f"GNN ossモデルのRMSEの標準偏差: {np.std(oss_rmses)}")
printResults(oss_rmses,'gnn-oss')

# Sleepinessモデル
sleepiness_rmses = evaluate_gnn_trials(X_train, train['Sleepiness'], X_test, test['Sleepiness'])
# def compute_statistics(arr):
#     median = np.median(arr)
#     mean = np.mean(arr)
#     variance = np.var(arr)
#     std_dev = np.std(arr)
#     range_val = (np.min(arr), np.max(arr))
    
#     return {
#         'Median': median,
#         'Mean': mean,
#         'Variance': variance,
#         'Standard Deviation': std_dev,
#         'Range': range_val
#     }
# print(compute_statistics(sleepiness_rmses))
# print(f"GNN sleepinessモデルの平均RMSE: {np.mean(sleepiness_rmses)}")
# print(f"GNN sleepinessモデルのRMSEの標準偏差: {np.std(sleepiness_rmses)}")
printResults(sleepiness_rmses,'gnn-sleepiness')

[gnn-oss]RMSEの平均値: 4.1393645076141805
[gnn-oss]RMSEの中央値: 4.013760194499868
[gnn-oss]RMSEの分散: 3.934743447058031
[gnn-oss]RMSEの標準偏差: 1.9836187756365968
[gnn-oss]RMSEの最小値: 1.12306073346855
[gnn-oss]RMSEの最大値: 7.972186727006109
[gnn-sleepiness]RMSEの平均値: 4.259584191602232
[gnn-sleepiness]RMSEの中央値: 4.034555438381602
[gnn-sleepiness]RMSEの分散: 2.77928866224355
[gnn-sleepiness]RMSEの標準偏差: 1.6671198703883143
[gnn-sleepiness]RMSEの最小値: 1.9850700341498457
[gnn-sleepiness]RMSEの最大値: 9.063698413915455


| 中央値 | 平均 | 分散 | 標準偏差 | 範囲 |
| --- | --- | --- | --- | --- |
| 4.5862 | 5.0127 | 4.3706 | 2.0906 | (2.0140, 10.5504) |

