In [1]:
import tensorflow as tf
from tensorflow.keras.layers import Masking, Flatten
from tensorflow.keras.callbacks import EarlyStopping
from tensorflow.keras.callbacks import ModelCheckpoint
from sklearn.model_selection import KFold

2024-06-05 20:51:05.653612: I tensorflow/core/util/port.cc:110] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
2024-06-05 20:51:05.697923: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 AVX512F AVX512_VNNI FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


In [2]:
import numpy as np
import pandas as pd
import cv2  # OpenCVライブラリ
import matplotlib.pyplot as plt
from matplotlib.colors import Normalize
import glob
import os
import sys
import pathlib
from pathlib import Path

from sklearn.metrics import r2_score, mean_absolute_error, mean_squared_error

In [3]:
current_dir = os.getcwd()
parent_dir = os.path.dirname(current_dir)
sys.path.append(parent_dir)
current_file_path = pathlib.Path(os.getcwd())
parent_dir = current_file_path.parent

In [4]:
import fluid_intake_estimate
from fluid_intake_estimate import variable_data_set
from fluid_intake_estimate import cnn
from fluid_intake_estimate import mlp

/home/s2/Desktop/washino/swallowing
/home/s2/Desktop/washino/swallowing


In [5]:
directory_path = parent_dir / 'dataset'

# 鷲野のデータを学習

In [None]:
path = directory_path / 'washino'
csv_path = path / 'washino.csv'
data = variable_data_set.VariableDataSet(300)
data.csv_to_dataset(path, csv_path, 0)
model = cnn.CNN()

In [None]:
model_checkpoint = ModelCheckpoint(
    filepath='cnn_20240516_model_best.keras',  # 保存するファイルのパス
    monitor='val_mean_absolute_error',        # 監視する値
    save_best_only=True,       # 最良のモデルのみを保存
    verbose=1,                 # 進行状況のメッセージ表示
    mode='min'                 # 「減少」を監視
)

model.training(data.X, data.y, 50, 32, model_checkpoint = model_checkpoint)
model.save('cnn_20240516_model_v1.keras')

# 全員のデータを学習して分割交差検証

In [11]:
names = ['washino', 'haruki', 'ezumi', 'hikaru', 'ibuki', 'kanata', 'kazuki', 'kishimoto', 'kosasayama', 'mocchi', 'nakaryo', 'ruku', 'shibata', 'soichiro', 'tsuji']
data = variable_data_set.VariableDataSet(786, scale=1, time_range = 70000)
# data = variable_data_set.VariableDataSet(786, scale=0, time_range = 20000)

# path = directory_path / 'washino'
# csv_path = path / 'washino.csv'
# data.csv_to_dataset(path, csv_path, 0)

count = 0

for name in names:    
    path = directory_path / name
    wav_files = glob.glob(os.path.join(path, '*.wav'))
    wav_file_count = len(wav_files)    
    
    csv_path = path / f'{name}.csv'    
    data.csv_to_dataset(path, csv_path, count, signal_processing='No')
    count += wav_file_count
    print(count)

352
382
413
444
474
504
538
571
601
631
661
694
724
755
786


In [12]:
# 分割交差検証の設定
kf = KFold(n_splits=5, shuffle=True, random_state=42)

# データを保持するためのリスト
train_data_splits_index = []
test_data_splits_index = []

# データのインデックスを分割
for train_index, test_index in kf.split(data.X):
    train_data_splits_index.append(train_index)
    test_data_splits_index.append(test_index)

In [13]:
y = []
predict = []

In [None]:
model = cnn.CNN(scale = 1, time_range = 70000)
# model = mlp.MLP(time_range = 20000)

model_checkpoint = ModelCheckpoint(
    filepath='nopro_cnn_20240605_model_best_4.keras',  # 保存するファイルのパス
    monitor='val_mean_absolute_error',        # 監視する値
    save_best_only=True,       # 最良のモデルのみを保存
    verbose=1,                 # 進行状況のメッセージ表示
    mode='min'                 # 「減少」を監視
)


use_data = 4
print(data.X.shape)
model.training(data.X[train_data_splits_index[use_data]], data.y[train_data_splits_index[use_data]], 50, 32)
model.evaluate(data.X[test_data_splits_index[use_data]], data.y[test_data_splits_index[use_data]])
model.save('nopro_cnn_20240605_model_4.keras')

(786, 70000, 1)
Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 30/50
Epoch 31/50
Epoch 32/50
Epoch 33/50
Epoch 34/50
Epoch 35/50
Epoch 36/50


Epoch 37/50
Epoch 38/50
Epoch 39/50
Epoch 40/50
Epoch 41/50
Epoch 42/50
Epoch 43/50
 4/18 [=====>........................] - ETA: 0s - loss: 2.1873e-04 - mean_absolute_error: 0.0112 - mean_squared_error: 2.1873e-04

In [None]:
y.append(data.y[test_data_splits_index[use_data]])
predict.append(model.predictions)

In [None]:
print(len(predict))

In [None]:
array_y = np.concatenate(y)
array_pre = np.concatenate(predict)

In [None]:
plt.scatter(array_y, array_pre)
plt.xlabel('y')
plt.ylabel('predict')
plt.title('Scatter Plot of y vs predict')
plt.show()

# R2スコアの計算
r2 = r2_score(array_y, array_pre)
print(f"R2 Score: {r2}")

# MAEの計算
mae = mean_absolute_error(array_y, array_pre)
print(f"Mean Absolute Error (MAE): {mae}")

      
      # RMSEの計算
rmse = np.sqrt(mean_squared_error(array_y, array_pre))
print(f"Root Mean Squared Error (RMSE): {rmse}")

In [None]:
array_pre = array_pre.flatten()

In [None]:
combined_array = np.vstack((array_y, array_pre)).T
np.savetxt("mlp_20240520.csv", combined_array, delimiter=",", header="y,predict", comments='')

In [None]:
df0 = pd.read_csv("stft_cnn_20240520_0.csv")
print(df0)
df1 = pd.read_csv("stft_cnn_20240520_1.csv")
print(df1)
df2 = pd.read_csv("stft_cnn_20240520_2.csv")
print(df2)
df3 = pd.read_csv("stft_cnn_20240520_3.csv")
print(df3)
df4 = pd.read_csv("stft_cnn_20240520_4.csv")
print(df4)

In [None]:
df = [df0, df1, df2, df3, df4]
combined_df = pd.concat(df, ignore_index=True)

# 結合したDataFrameを表示
print(combined_df)

In [None]:
plt.scatter(combined_df['y'], combined_df['predict'])
plt.xlabel('y')
plt.ylabel('predict')
plt.title('Scatter Plot of y vs predict')
plt.show()

# R2スコアの計算
r2 = r2_score(combined_df['y'], combined_df['predict'])
print(f"R2 Score: {r2}")

# MAEの計算
mae = mean_absolute_error(combined_df['y'], combined_df['predict'])
print(f"Mean Absolute Error (MAE): {mae}")

      
      # RMSEの計算
rmse = np.sqrt(mean_squared_error(combined_df['y'], combined_df['predict']))
print(f"Root Mean Squared Error (RMSE): {rmse}")