In [1]:
import cv2
from matplotlib import pyplot as plt
import dlib
import numpy as np
import pandas as pd
import json
import os
import itertools
import lightgbm as lgb
from sklearn.metrics import mean_squared_error
from sklearn.preprocessing import MinMaxScaler

image_dir = 'face_data/pics/'
model = 'face_data/data/shape_predictor_68_face_landmarks.dat'

In [2]:

def load_and_preprocess_data(file_path):
  """CSVを読み込んで前処理する関数。"""
  df = pd.read_csv(file_path)
  df = df.drop(['timestamp'], axis=1)
  return df.dropna()


def get_data_from_directory(directory_path):
  """指定ディレクトリのCSV全部を読み込んで結合する関数。"""
  files = [f for f in os.listdir(directory_path) if f.endswith('.csv')]
  data_frames = [load_and_preprocess_data(os.path.join(directory_path, file)) for file in files]
  return pd.concat(data_frames, ignore_index=True)


def evaluate_model(model, X, y):
  """モデルの評価。RMSE"""
  predictions = model.predict(X)
  return np.sqrt(mean_squared_error(y, predictions))


def printResults(rmse_results, title, if_print=True):
  mean_rmse = np.mean(rmse_results)
  median_rmse = np.median(rmse_results)
  variance_rmse = np.var(rmse_results)
  std_rmse = np.std(rmse_results)
  min_rmse = np.min(rmse_results)
  max_rmse = np.max(rmse_results)

  out = ''
  out += f"[{title}]RMSEの平均値: {mean_rmse}\n"
  # out += f"[{title}]RMSEの中央値: {median_rmse}\n"
  # out += f"[{title}]RMSEの分散: {variance_rmse}\n"
  # out += f"[{title}]RMSEの標準偏差: {std_rmse}\n"
  out += f"[{title}]RMSEの最小値: {min_rmse}\n"
  out += f"[{title}]RMSEの最大値: {max_rmse}\n"

  if if_print:
    print(out)
  return out

  # print(f"[{title}]RMSEの平均値: {mean_rmse}")
  # # print(f"[{title}]RMSEの中央値: {median_rmse}")
  # # print(f"[{title}]RMSEの分散: {variance_rmse}")
  # # print(f"[{title}]RMSEの標準偏差: {std_rmse}")
  # print(f"[{title}]RMSEの最小値: {min_rmse}")
  # print(f"[{title}]RMSEの最大値: {max_rmse}")


def get_data_from_directory_with_scaling(directory_path, scaler=None):
  """指定ディレクトリのCSV全部を読み込んで結合し、スケーリングする関数。"""
  files = [f for f in os.listdir(directory_path) if f.endswith('.csv')]
  data_frames = [load_and_preprocess_data(os.path.join(directory_path, file)) for file in files]
  combined_df = pd.concat(data_frames, ignore_index=True)

  if scaler is not None:
    # 既存のスケーラーを使用してデータを変換
    combined_df[features] = scaler.transform(combined_df[features])
  else:
    # 新しいスケーラーをフィットしてデータを変換
    scaler = MinMaxScaler()
    combined_df[features] = scaler.fit_transform(combined_df[features])

  return combined_df, scaler

In [3]:
from sklearn.model_selection import KFold


def load_and_preprocess_data_save_timestamp(file_path):
  """CSVを読み込んで前処理する関数。"""
  df = pd.read_csv(file_path)
  return df.dropna()


def extract_image_name(timestamp):
  return timestamp.replace('-', '').replace(' ', '_').replace(':', '').replace('.', '_') + '.jpg'


def train_lightgbm_full(X, y):
  """Train LightGBM model on the full training data."""
  train_data = lgb.Dataset(X, label=y)
  params = {
      'objective': 'regression',
      'metric': 'rmse',
      'verbose': -1
  }
  model = lgb.train(params, train_data)
  return model


def evaluate_on_test(model, X_test, y_test):
  """Evaluate model on the test data."""
  predictions = model.predict(X_test)
  return np.sqrt(mean_squared_error(y_test, predictions))


def evaluate_with_kfold(X, y, n_splits=5):
  kf = KFold(n_splits=n_splits, shuffle=True, random_state=42)
  rmses = []

  for train_index, test_index in kf.split(X):
    X_train, X_test = X.iloc[train_index], X.iloc[test_index]
    y_train, y_test = y.iloc[train_index], y.iloc[test_index]

    model = train_lightgbm_full(X_train, y_train)
    rmse = evaluate_on_test(model, X_test, y_test)
    rmses.append(rmse)

  return rmses


def add_eye_area_to_df(df, eye_areas):
  eye_areas_sum = []

  for _, row in df.iterrows():
    image_name = extract_image_name(row['timestamp'])
    eye_area = eye_areas.get(image_name, {})
    left_eye_area = eye_area.get('left_eye_area')
    right_eye_area = eye_area.get('right_eye_area')
    if left_eye_area is not None and right_eye_area is not None:
      try:
        eye_areas_sum.append(float(left_eye_area) + float(right_eye_area))
      except ValueError:
        eye_areas_sum.append(np.nan)
    else:
      eye_areas_sum.append(np.nan)
  df['eye_area_sum'] = eye_areas_sum
  return df

In [4]:
def solve_one_subject(csv_filepath, json_filepath, descrive=False, data_name=""):
  # 特徴量を定義
  features = [
      'm_speed', 'm_speed_var_480', 'm_speed_stddev_480', 'm_acceleration',
      'm_acceleration_var_480', 'm_acceleration_stddev_480', 'm_jerk',
      'm_jerk_var_480', 'm_jerk_stddev_480', 'oss', 'Sleepiness'
  ]
  outStr = "train,test同時に正規化\n"

  data = load_and_preprocess_data_save_timestamp(csv_filepath)
  with open(json_filepath, 'r') as f:
    eye_areas = json.load(f)
  data = add_eye_area_to_df(data, eye_areas)
  features.append('eye_area_sum')
  # 正規化
  scaler = MinMaxScaler()
  data[features] = scaler.fit_transform(data[features])

  X = data[features]
  Y_oss = data['oss']
  Y_Sleepiness = data['Sleepiness']
  if (descrive):
    print(data.describe())

  oss_kfold_rmses_norm_divided = evaluate_with_kfold(X, Y_oss)
  outStr += printResults(oss_kfold_rmses_norm_divided, "lgbm-oss-normalized-kfold-single-sbj", False)

  sleepiness_kfold_rmses_norm_divided = evaluate_with_kfold(X, Y_Sleepiness)
  outStr += printResults(sleepiness_kfold_rmses_norm_divided, "lgbm-sleepiness-normalized-kfold-single-sbj", False)
  return {'out': outStr, 'oss': oss_kfold_rmses_norm_divided, 'sleepiness': sleepiness_kfold_rmses_norm_divided}

In [5]:
DATA_DIR = 'dms_data_single'
EYE_DATA_DIR = 'eye_data_json_dir'
file_names = [
    "20201126_1546_0_y",
    "20201127_1432_7_y",
    "20201127_1548_2_y",
    "20201127_1701_7_y",
    "20201127_1840_5_y",
    "20201130_1122_5_y",
    "20201130_1808_6_y",
    "20201201_1230_0_y",
    "20201201_1429_5_y",
    "20201201_1555_0_y",
    "20201203_1022_7_y",
    "20201203_1244_5_y",
    "20201203_1404_6_y",
    "20201210_1112_2_y",
    "20201210_1354_2_y",
    "20201210_1610_6_y"
]
data_name = file_names[0]
csv_filename = f'{data_name}.csv'
json_filename = f'{data_name}_eye_areas.json'
csv_filepath = os.path.join(DATA_DIR, csv_filename)
json_filepath = os.path.join(EYE_DATA_DIR, json_filename)


print(solve_one_subject(csv_filepath, json_filepath)['out'])

train,test同時に正規化
[lgbm-oss-normalized-kfold-single-sbj]RMSEの平均値: 0.052189182576800365
[lgbm-oss-normalized-kfold-single-sbj]RMSEの最小値: 0.03686978680807165
[lgbm-oss-normalized-kfold-single-sbj]RMSEの最大値: 0.06516212156327408
[lgbm-sleepiness-normalized-kfold-single-sbj]RMSEの平均値: 0.0538552717810238
[lgbm-sleepiness-normalized-kfold-single-sbj]RMSEの最小値: 0.03667714228659573
[lgbm-sleepiness-normalized-kfold-single-sbj]RMSEの最大値: 0.07357452653473572



In [6]:
file_names = [
    "20201126_1546_0_y",
    "20201127_1432_7_y",
    "20201127_1548_2_y",
    "20201127_1701_7_y",
    "20201127_1840_5_y",
    "20201130_1122_5_y",
    "20201130_1808_6_y",
    "20201201_1230_0_y",
    "20201201_1429_5_y",
    "20201201_1555_0_y",
    "20201203_1022_7_y",
    "20201203_1244_5_y",
    "20201203_1404_6_y",
    "20201210_1112_2_y",
    "20201210_1354_2_y",
    "20201210_1610_6_y"
]
oss_rmses = []
Sleepiness_rmses = []

for data_name in file_names:
  print(data_name)
  csv_filename = f'{data_name}.csv'
  json_filename = f'{data_name}_eye_areas.json'
  csv_filepath = os.path.join(DATA_DIR, csv_filename)
  json_filepath = os.path.join(EYE_DATA_DIR, json_filename)
  result = solve_one_subject(csv_filepath, json_filepath, data_name=data_name)
  print(result['out'])
  oss_rmses.append(result['oss'])
  Sleepiness_rmses.append(result['sleepiness'])
print(printResults(oss_rmses, "lgbm-oss-normalized-kfold-all-sbj"))
print(printResults(Sleepiness_rmses, "lgbm-sleepiness-normalized-kfold-all-sbj"))

20201126_1546_0_y
train,test同時に正規化
[lgbm-oss-normalized-kfold-single-sbj]RMSEの平均値: 0.052189182576800365
[lgbm-oss-normalized-kfold-single-sbj]RMSEの最小値: 0.03686978680807165
[lgbm-oss-normalized-kfold-single-sbj]RMSEの最大値: 0.06516212156327408
[lgbm-sleepiness-normalized-kfold-single-sbj]RMSEの平均値: 0.0538552717810238
[lgbm-sleepiness-normalized-kfold-single-sbj]RMSEの最小値: 0.03667714228659573
[lgbm-sleepiness-normalized-kfold-single-sbj]RMSEの最大値: 0.07357452653473572

20201127_1432_7_y
train,test同時に正規化
[lgbm-oss-normalized-kfold-single-sbj]RMSEの平均値: 0.06465127581042733
[lgbm-oss-normalized-kfold-single-sbj]RMSEの最小値: 0.04558136893327791
[lgbm-oss-normalized-kfold-single-sbj]RMSEの最大値: 0.08557951487362439
[lgbm-sleepiness-normalized-kfold-single-sbj]RMSEの平均値: 0.029401891072867913
[lgbm-sleepiness-normalized-kfold-single-sbj]RMSEの最小値: 0.017283799508294524
[lgbm-sleepiness-normalized-kfold-single-sbj]RMSEの最大値: 0.04103543244844021

20201127_1548_2_y
train,test同時に正規化
[lgbm-oss-normalized-kfold-single