In [25]:
import os
import pandas as pd
import numpy as np
import itertools
import lightgbm as lgb
from sklearn.metrics import mean_squared_error
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestRegressor
from sklearn.svm import SVR
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from tensorflow.keras.optimizers import Adam
from sklearn.linear_model import LinearRegression

In [26]:
DATA_DIR = './dms_data'
N_TRIALS = 50

In [27]:

def load_and_preprocess_data(file_path):
    """CSVを読み込んで前処理する関数。"""
    df = pd.read_csv(file_path)
    df = df.drop(['timestamp'], axis=1)
    return df.dropna()

def get_data_from_directory(directory_path):
    """指定ディレクトリのCSV全部を読み込んで結合する関数。"""
    files = [f for f in os.listdir(directory_path) if f.endswith('.csv')]
    data_frames = [load_and_preprocess_data(os.path.join(directory_path, file)) for file in files]
    return pd.concat(data_frames, ignore_index=True)

# トレインデータとテストデータを読み込む
train = get_data_from_directory(os.path.join(DATA_DIR, 'train'))
test = get_data_from_directory(os.path.join(DATA_DIR, 'test'))

# 特徴量を定義
features = [
    'm_speed', 'm_speed_var_480', 'm_speed_stddev_480', 'm_acceleration',
    'm_acceleration_var_480', 'm_acceleration_stddev_480', 'm_jerk',
    'm_jerk_var_480', 'm_jerk_stddev_480'
]
    
def evaluate_model(model, X, y):
    """モデルの評価。RMSE"""
    predictions = model.predict(X)
    return np.sqrt(mean_squared_error(y, predictions))

def printResults(rmse_results,title):
    mean_rmse = np.mean(rmse_results)
    median_rmse = np.median(rmse_results)
    variance_rmse = np.var(rmse_results)
    std_rmse = np.std(rmse_results)
    min_rmse = np.min(rmse_results)
    max_rmse = np.max(rmse_results)

    # 統計量を表示
    print(f"[{title}]RMSEの平均値: {mean_rmse}")
    print(f"[{title}]RMSEの中央値: {median_rmse}")
    print(f"[{title}]RMSEの分散: {variance_rmse}")
    print(f"[{title}]RMSEの標準偏差: {std_rmse}")
    print(f"[{title}]RMSEの最小値: {min_rmse}")
    print(f"[{title}]RMSEの最大値: {max_rmse}")

In [28]:
oss_variance = train_data['oss'].var()
sleepiness_variance = train_data['Sleepiness'].var()

print(f"ossの分散: {oss_variance}")
print(f"Sleepinessの分散: {sleepiness_variance}")


ossの分散: 0.5037177545123203
Sleepinessの分散: 3.4521796373462967


### LightGBM

In [29]:
def train_lightgbm(X, y):
    X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=42)
    train_data = lgb.Dataset(X_train, label=y_train)
    eval_data = lgb.Dataset(X_val, label=y_val, reference=train_data)
    params = {
        'objective': 'regression',
        'metric': 'rmse',
        'verbose': -1
    }
    model = lgb.train(params, train_data, valid_sets=eval_data)
    return model, X_val, y_val

def evaluate_trials(X, y):
    rmses = []
    for _ in range(N_TRIALS):
        model, X_val, y_val = train_lightgbm(X, y)
        rmse = np.sqrt(mean_squared_error(y_val, model.predict(X_val)))
        rmses.append(rmse)
    return rmses

In [30]:
train_data = get_data_from_directory(os.path.join(DATA_DIR, 'train'))
X = train_data.drop(['oss', 'Sleepiness'], axis=1)

# ossモデル
oss_rmses = evaluate_trials(X, train_data['oss'])
printResults(oss_rmses,'lgbm-oss')

# Sleepinessモデル
sleepiness_rmses = evaluate_trials(X, train_data['Sleepiness'])
printResults(sleepiness_rmse,'lgbm-sleepiness')

[1]	valid_0's rmse: 0.644743
[2]	valid_0's rmse: 0.60529
[3]	valid_0's rmse: 0.572186
[4]	valid_0's rmse: 0.54065
[5]	valid_0's rmse: 0.518605
[6]	valid_0's rmse: 0.497177
[7]	valid_0's rmse: 0.475992
[8]	valid_0's rmse: 0.461166
[9]	valid_0's rmse: 0.44502
[10]	valid_0's rmse: 0.431144
[11]	valid_0's rmse: 0.422217
[12]	valid_0's rmse: 0.412459
[13]	valid_0's rmse: 0.402994
[14]	valid_0's rmse: 0.394118
[15]	valid_0's rmse: 0.388302
[16]	valid_0's rmse: 0.382783
[17]	valid_0's rmse: 0.377448
[18]	valid_0's rmse: 0.372986
[19]	valid_0's rmse: 0.367962
[20]	valid_0's rmse: 0.363507
[21]	valid_0's rmse: 0.361
[22]	valid_0's rmse: 0.356155
[23]	valid_0's rmse: 0.352299
[24]	valid_0's rmse: 0.348463
[25]	valid_0's rmse: 0.345106
[26]	valid_0's rmse: 0.344176
[27]	valid_0's rmse: 0.342893
[28]	valid_0's rmse: 0.34163
[29]	valid_0's rmse: 0.339506
[30]	valid_0's rmse: 0.337213
[31]	valid_0's rmse: 0.336244
[32]	valid_0's rmse: 0.334792
[33]	valid_0's rmse: 0.332411
[34]	valid_0's rmse: 0.331

### GNN

In [31]:
import torch
import torch.nn.functional as F
from torch_geometric.nn import GCNConv
from torch_geometric.data import Data

In [34]:
def transform_to_graph_data(X, y):
    x = torch.tensor(X.values, dtype=torch.float)
    edge_index = torch.tensor([list(range(X.shape[0]-1)), list(range(1, X.shape[0]))], dtype=torch.long)
    y = torch.tensor(y.values, dtype=torch.float).view(-1, 1)
    data = Data(x=x, edge_index=edge_index, y=y)
    return data

class SimpleGNN(torch.nn.Module):
    def __init__(self, num_features):
        super(SimpleGNN, self).__init__()
        self.conv1 = GCNConv(num_features, 64)
        self.conv2 = GCNConv(64, 32)
        self.fc = torch.nn.Linear(32, 1)

    def forward(self, data):
        x, edge_index = data.x, data.edge_index
        x = self.conv1(x, edge_index)
        x = F.relu(x)
        x = self.conv2(x, edge_index)
        x = F.relu(x)
        x = self.fc(x)
        return x

def train_gnn(train_data, num_features):
    model = SimpleGNN(num_features=num_features)
    optimizer = torch.optim.Adam(model.parameters(), lr=0.01)
    for epoch in range(10):
        model.train()
        optimizer.zero_grad()
        out = model(train_data)
        loss = torch.nn.MSELoss()(out, train_data.y)
        loss.backward()
        optimizer.step()
    return model

def evaluate_gnn_trials(X_train, y_train, X_test, y_test):
    train_data = transform_to_graph_data(X_train, y_train)
    test_data = transform_to_graph_data(X_test, y_test)
    gnn_rmses = []
    for _ in range(N_TRIALS):
        model = train_gnn(train_data, num_features=X_train.shape[1])
        model.eval()
        preds = model(test_data)
        rmse = np.sqrt(mean_squared_error(y_test, preds.detach().numpy()))
        gnn_rmses.append(rmse)
    return gnn_rmses

In [35]:
train = get_data_from_directory(os.path.join(DATA_DIR, 'train'))
test = get_data_from_directory(os.path.join(DATA_DIR, 'test'))

X_train = train.drop(['oss', 'Sleepiness'], axis=1)
X_test = test.drop(['oss', 'Sleepiness'], axis=1)

# ossモデル
oss_rmses = evaluate_gnn_trials(X_train, train['oss'], X_test, test['oss'])
print(f"GNN ossモデルの平均RMSE: {np.mean(oss_rmses)}")
print(f"GNN ossモデルのRMSEの標準偏差: {np.std(oss_rmses)}")

# Sleepinessモデル
sleepiness_rmses = evaluate_gnn_trials(X_train, train['Sleepiness'], X_test, test['Sleepiness'])
print(f"GNN sleepinessモデルの平均RMSE: {np.mean(sleepiness_rmses)}")
print(f"GNN sleepinessモデルのRMSEの標準偏差: {np.std(sleepiness_rmses)}")

GNN ossモデルの平均RMSE: 3.8703443606185886
GNN ossモデルのRMSEの標準偏差: 2.1431110409661858
GNN sleepinessモデルの平均RMSE: 4.859197111936291
GNN sleepinessモデルのRMSEの標準偏差: 2.1745646688642863
