In [8]:
import os
import pandas as pd
import lightgbm as lgb
import numpy as np
from sklearn.metrics import mean_squared_error
from sklearn.model_selection import train_test_split


def read_and_process(file):
  df = pd.read_csv(file)
  df = df.drop(['timestamp'], axis=1)
  df = df.dropna()  # 無効値を含む行を削除
  return df


def train_model(X, y):
  X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
  train_data = lgb.Dataset(X_train, label=y_train)
  eval_data = lgb.Dataset(X_test, label=y_test, reference=train_data)

  params = {
      'objective': 'regression',
      'metric': 'rmse',
  }
  model = lgb.train(params, train_data, valid_sets=eval_data)
  return model, X_test, y_test


# 訓練データを処理
train_data_path = './train'
train_data_path = './dms_data/train'
files = os.listdir(train_data_path)

df_list = []
for file in files:
  if file.endswith('.csv'):
    df = read_and_process(os.path.join(train_data_path, file))
    df_list.append(df)

# データを結合
train_df = pd.concat(df_list, ignore_index=True)

# 特徴量と目的変数に分割 (oss)
X_oss = train_df.drop(['oss', 'Sleepiness'], axis=1)
y_oss = train_df['oss']

# 特徴量と目的変数に分割 (Sleepiness)
X_sleepiness = train_df.drop(['oss', 'Sleepiness'], axis=1)
y_sleepiness = train_df['Sleepiness']

# モデルの学習 (oss)
model_oss, X_test_oss, y_test_oss = train_model(X_oss, y_oss)

# モデルの学習 (Sleepiness)
model_sleepiness, X_test_sleepiness, y_test_sleepiness = train_model(X_sleepiness, y_sleepiness)

# テストデータを処理
test_data_path = './test'
test_data_path = './dms_data/test'
files = os.listdir(test_data_path)

df_list = []
for file in files:
  if file.endswith('.csv'):
    df = read_and_process(os.path.join(test_data_path, file))
    df_list.append(df)

# データを結合
test_df = pd.concat(df_list, ignore_index=True)

# 特徴量と目的変数に分割 (oss)
X_test_oss = test_df.drop(['oss', 'Sleepiness'], axis=1)
y_test_oss = test_df['oss']

# 特徴量と目的変数に分割 (Sleepiness)
X_test_sleepiness = test_df.drop(['oss', 'Sleepiness'], axis=1)
y_test_sleepiness = test_df['Sleepiness']

# モデルの評価 (oss)
oss_preds = model_oss.predict(X_test_oss)
oss_rmse = np.sqrt(mean_squared_error(y_test_oss, oss_preds))
print(f"RMSE for oss model: {oss_rmse}")

# モデルの評価 (Sleepiness)
sleepiness_preds = model_sleepiness.predict(X_test_sleepiness)
sleepiness_rmse = np.sqrt(mean_squared_error(y_test_sleepiness, sleepiness_preds))
print(f"RMSE for sleepiness model: {sleepiness_rmse}")


You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 5042
[LightGBM] [Info] Number of data points in the train set: 1390, number of used features: 22
[LightGBM] [Info] Start training from score 3.031342
[1]	valid_0's rmse: 0.655825
[2]	valid_0's rmse: 0.61167
[3]	valid_0's rmse: 0.574543
[4]	valid_0's rmse: 0.542637
[5]	valid_0's rmse: 0.516908
[6]	valid_0's rmse: 0.493443
[7]	valid_0's rmse: 0.470399
[8]	valid_0's rmse: 0.450852
[9]	valid_0's rmse: 0.436194
[10]	valid_0's rmse: 0.423399
[11]	valid_0's rmse: 0.413296
[12]	valid_0's rmse: 0.402987
[13]	valid_0's rmse: 0.393276
[14]	valid_0's rmse: 0.387414
[15]	valid_0's rmse: 0.380685
[16]	valid_0's rmse: 0.375928
[17]	valid_0's rmse: 0.369497
[18]	valid_0's rmse: 0.366759
[19]	valid_0's rmse: 0.360905
[20]	valid_0's rmse: 0.356972
[21]	valid_0's rmse: 0.354674
[22]	valid_0's rmse: 0.350765
[23]	valid_0's rmse: 0.348549
[24]	valid_0's rmse: 0.346686
[25]	valid_0's rmse: 0.343834
[26]	valid_0's rmse: 0.