In [1]:
# '''
# Written by Tamas Gabor Csapo <csapot@tmit.bme.hu>
# First version Jan 21, 2019
# Restructured Jan 21, 2020 - for MRI data
# Keras implementation of Csapó T.G., ,,Speaker dependent acoustic-to-articulatory inversion using real-time MRI of the vocal tract'', accepted at Interspeech 2020
# code for training FC-DNN
# '''

# import numpy as np
# import matplotlib.pyplot as plt
# import scipy.io.wavfile as io_wav
# import os
# import os.path
# import datetime
# import pickle
# import cv2
# import random

# # import vocoder_LSP_sptk

# # do not use all GPU memory
# import tensorflow as tf
# from tensorflow import keras
# from keras.backend import set_session
# import librosa


# from keras.models import Sequential
# from keras.layers import Input, Dense, Conv2D, MaxPooling2D, Flatten, UpSampling2D, Reshape, LSTM, TimeDistributed

# from keras.callbacks import EarlyStopping, CSVLogger, ModelCheckpoint
# from keras.metrics import mean_squared_error

# from sklearn.model_selection import train_test_split
# from sklearn.preprocessing import StandardScaler, MinMaxScaler

# import subprocess
# from subprocess import run
# import numpy as np
# import scipy.io.wavfile as io_wav

import numpy as np
import matplotlib.pyplot as plt
import scipy.io.wavfile as io_wav
import os
import datetime
import pickle
import cv2
import random

# TensorFlow and Keras imports
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Input, Dense, Conv2D, MaxPooling2D, Flatten, UpSampling2D, Reshape, LSTM, TimeDistributed
from tensorflow.keras.callbacks import EarlyStopping, CSVLogger, ModelCheckpoint
from tensorflow.keras.metrics import MeanSquaredError
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
import librosa

import json

# Configure GPU settings (updated for TensorFlow 2.x)
gpus = tf.config.experimental.list_physical_devices('GPU')
if gpus:
    try:
        for gpu in gpus:
            tf.config.experimental.set_memory_growth(gpu, True)
    except RuntimeError as e:
        print(e)




# from LipReading with slight modifications
# https://github.com/hassanhub/LipReading/blob/master/codes/data_integration.py
################## VIDEO INPUT ##################
def load_video_3D(path, framesPerSec):

    cap = cv2.VideoCapture(path)
    frameCount = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
    frameHeight = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT ))
    frameWidth = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH ))
    fps = cap.get(cv2.CAP_PROP_FPS)

    buf = np.empty((frameHeight, frameWidth, frameCount), np.dtype('float32'))
    # make sure that all the videos are the same FPS
    if (np.abs(fps - framesPerSec) > 0.01):
        print('fps:', fps, '(' + path + ')')
        return buf

    buf = np.empty((frameHeight, frameWidth, frameCount), np.dtype('float32'))
    fc = 0
    ret = True

    while (fc < frameCount  and ret):
        ret, frame = cap.read()
        frame = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
        frame = frame.astype('float32')
        # min-max scaling to [0-1]
        frame = frame-np.amin(frame)
        # make sure not to divide by zero
        if np.amax(frame) != 0:
            frame = frame/np.amax(frame)
        buf[:,:,fc]=frame
        fc += 1
    cap.release()

    return buf

# convert an array of values into a dataset matrix
# code with modifications from
# https://machinelearningmastery.com/time-series-prediction-lstm-recurrent-neural-networks-python-keras/
def create_dataset_img_inverse(data_in_X, data_in_Y, look_back=1):
    (dim1_X, dim2_X) = data_in_X.shape
    (dim1_Y, dim2_Y, dim3_Y, dim4_Y) = data_in_Y.shape
    data_out_X = np.empty((dim1_X - look_back - 1, look_back, dim2_X))
    data_out_Y = np.empty((dim1_Y - look_back - 1, dim2_Y, dim3_Y, dim4_Y))
    print(dim1_Y)
    print(dim2_Y)
    print(dim3_Y)
    print(dim4_Y)

    for i in range(dim1_X - look_back - 1):
        for j in range(look_back):
            data_out_X[i, j] = data_in_X[i + j]
        data_out_Y[i] = data_in_Y[i + j]
    return data_out_X, data_out_Y

# load vocoder features,
# or calculate, if they are not available
def get_mgc_lsp_coeff(basefilename):
    print('I am in get mgc')
    # if os.path.isfile(basefilename + '.mgclsp'):
    #     print('I am in if part')
    #     mgc_lsp_coeff = np.fromfile(basefilename + '.mgclsp', dtype=np.float32).reshape(-1, order + 1)
    #     lf0 = np.fromfile(basefilename + '.lf0', dtype=np.float32)
    #     print(mgc_lsp_coeff.shape)
    #     print(lf0.shape)
    # else:
        # print(basefilename)
        #(mgc_lsp_coeff, lf0) = mgc_encode(basefilename, samplingFrequency, frameLength, frameShift, ord0er, alpha, stage)
        # command = 'sox ' + basefilename + '.wav' + ' -t raw -r ' + str(samplingFrequency) + ' - ' + ' | x2x +sf | ' + \
        #       'sptk frame -l ' + str(frameLength) + ' -p ' + str(frameShift) + ' | ' + \
        #       'sptk window -l ' + str(frameLength) + ' -L ' + str(frameLength) + ' -w 0 -n 1 | ' + \
        #       'sptk mgcep -a ' + str(alpha) + ' -c ' + str(stage) + ' -m ' + str(order) + ' -l ' + str(frameLength) + ' -e 1.0E-08 -o 4 | ' + \
        #       'sptk lpc2lsp -m ' + str(order) + ' -s ' + str(samplingFrequency / 1000) + ' -n ' + str(frameLength) + ' -p 8 -d 1.0E-08 > ' + basefilename + '.mgclsp'
        # print(command)
        # run(command, shell=True)
        # mgc_lsp_coeff = np.fromfile(basefilename + '.mgclsp', dtype=np.float32).reshape(-1, order + 1)

    (mgc_lsp_coeff, lf0) = mgc_encode(basefilename, samplingFrequency, frameLength, frameShift, order, alpha, stage)

    return (mgc_lsp_coeff, lf0)


for speaker in ['M2', 'M3', 'F1', 'F2' ]:
    # TODO: modify this according to your data path
    dir_mri = '/content/drive/MyDrive/backup_PhD/database/MRI_USC/data/' + speaker + '/avi/'


    # Parameters of vocoder
    samplingFrequency = 20000
    frameLength = 1024 #
    frameShift = 863 # 43.14 ms at 20000 Hz sampling, correspondong to 23.18 fps (MRI video)
    order = 24
    alpha = 0.42
    stage = 3
    n_mgc = order + 1

    # properties of MRI videos
    framesPerSec = 23.18
    n_width = 68
    n_height = 68

    # context window of LSTM
    n_sequence = 3


    # USC-TIMIT contains 92 files (460 sentences) for each speaker
    # train-valid-test split (random) :
    # - 4 files for valid
    # - 2 files for test
    # - the remaining (86 files) for training
    files_mri = dict()
    mri = dict()
    mgc = dict()
    files_mri['all'] = []
    if os.path.isdir(dir_mri):
        for file in sorted(os.listdir(dir_mri)):
            if ".avi" in file:
                files_mri['all'] += [file]

    # randomize file order
    random.seed(17)
    random.shuffle(files_mri['all'])

    files_mri['valid'] = files_mri['all'][0:4]
    files_mri['test'] = files_mri['all'][4:14]
    files_mri['train'] = files_mri['all'][14:]

    print('valid files', files_mri['valid'])
    print('test files', files_mri['test'])   # ['usctimit_mri_f1_146_150.avi', 'usctimit_mri_f1_441_445.avi']

    for train_valid in ['train', 'valid']:
        n_files = len(files_mri[train_valid])
        n_file = 0
        n_max_mri_frames = n_files * 1000
        mri[train_valid] = np.empty((n_max_mri_frames, n_width, n_height))
        mgc[train_valid] = np.empty((n_max_mri_frames, n_mgc))
        mri_size = 0
        mgc_size = 0

        for file in files_mri[train_valid]:
            try:
                print('starting', train_valid, file)
                mri_data = load_video_3D(dir_mri + file, framesPerSec)
                # print(mri_data.shape)
                # (mgc_lsp_coeff, lf0) = get_mgc_lsp_coeff(dir_mri + file[:-4])
                dir_mri_wav = dir_mri.replace('/avi/','/wav/')
                dir_mri_wav = dir_mri_wav + file[:-4]+'.wav'
                x, sr = librosa.load(dir_mri_wav, sr = samplingFrequency)
                n_fft = frameLength   # window length: 0.02 s
                hop_length = frameShift  #
                # mgc_lsp_coeff = librosa.feature.mfcc(x, sr=sr, n_mfcc=25, hop_length=hop_length, n_fft=n_fft)

                # x, sr = librosa.load(dir_mri_wav, sr=samplingFrequency)
                mgc_lsp_coeff = librosa.feature.mfcc(y=x, sr=sr, n_mfcc=25, hop_length=hop_length, n_fft=n_fft)


                mgc_lsp_coeff = mgc_lsp_coeff.transpose()
                # print(dir_mri + file[:-4])
                # print(mgc_lsp_coeff.shape)
            except ValueError as e:
                print("wrong data, check manually!", e)

            else:
                print('minmax:', np.min(mri_data), np.max(mri_data))
                n_file += 1

                mgc_mri_len = np.min((mri_data.shape[2], len(mgc_lsp_coeff)))

                mri_data = mri_data[:, :, 0:mgc_mri_len]
                mgc_lsp_coeff = mgc_lsp_coeff[0:mgc_mri_len]

                if mri_size + mgc_mri_len > n_max_mri_frames:
                    raise

                for i in range(mgc_mri_len):
                    mri[train_valid][mri_size + i] = mri_data[:, :, i] # original, 68x68
                    mgc[train_valid][mgc_size + i] = mgc_lsp_coeff[i]

                mri_size += mgc_mri_len
                mgc_size += mgc_mri_len

                print('n_frames_all: ', mri_size, 'mgc_size: ', mgc_size)

        mri[train_valid] = mri[train_valid][0 : mri_size].reshape(-1, n_width, n_height, 1)
        mgc[train_valid] = mgc[train_valid][0 : mgc_size]



    # target: min max scaler to [0,1] range
    # already scaled in load_video

    # input: normalization to zero mean, unit variance
    # feature by feature
    mgc_scalers = []
    for i in range(n_mgc):
        mgc_scaler = StandardScaler(with_mean=True, with_std=True)
        mgc_scalers.append(mgc_scaler)
        mgc['train'][:, i] = mgc_scalers[i].fit_transform(mgc['train'][:, i].reshape(-1, 1)).ravel()
        mgc['valid'][:, i] = mgc_scalers[i].transform(mgc['valid'][:, i].reshape(-1, 1)).ravel()

    # restructure for LSTM
    print(mri)
    for train_valid in ['train', 'valid']:
        mgc[train_valid], mri[train_valid] = create_dataset_img_inverse(mgc[train_valid], mri[train_valid], look_back = n_sequence)

        mri[train_valid] = mri[train_valid].reshape(-1, n_width * n_height)

    ### single training
    # ### single training
    # model = Sequential()
    # # mdninput_Lstm = keras.Input(shape=(10,25))

    # # model.add(TimeDistributed(Dense(575, kernel_initializer='normal', activation='relu'))(mdninput_Lstm))
    # model.add(TimeDistributed(Dense(575, kernel_initializer='normal', activation='relu'), input_shape=(10, 25)))
    # model.add(TimeDistributed(Dense(575, kernel_initializer='normal', activation='relu')))
    # model.add(TimeDistributed(Dense(575, kernel_initializer='normal', activation='relu')))

    # model.add(LSTM(575, kernel_initializer='normal', activation='relu', return_sequences=True))
    # model.add(LSTM(575, kernel_initializer='normal', activation='relu', return_sequences=False))

    # model.add(Dense(n_width*n_height, kernel_initializer='normal', activation='linear'))

    # model.build()

    # model.compile(loss='mean_squared_error', optimizer='adam')


    model = Sequential([
        TimeDistributed(Dense(575, activation='relu', kernel_initializer='normal'), input_shape=(10, 25)),
        TimeDistributed(Dense(575, activation='relu', kernel_initializer='normal')),
        TimeDistributed(Dense(575, activation='relu', kernel_initializer='normal')),
        LSTM(575, activation='relu', return_sequences=True),
        LSTM(575, activation='relu', return_sequences=False),
        Dense(n_width * n_height, activation='linear', kernel_initializer='normal')
    ])

    model.compile(loss='mean_squared_error', optimizer='adam')
    # print(model.summary())


    current_date = '{date:%Y-%m-%d_%H-%M-%S}'.format( date=datetime.datetime.now() )
    # os.mkdir('/content/models/')
    model_name = '/content/drive/MyDrive/GAN_based_models/speech2mri/models_speech2mri_2024/SPEECH2MRI_LSTM_5frame_' + speaker + '_' + current_date

    print('starting training', speaker, current_date)

    # # early stopping to avoid over-training
    # # csv logging of loss
    # # save best model
    # callbacks = [EarlyStopping(monitor='val_loss', patience=5, verbose=0), \
    #              CSVLogger(model_name + '.csv', append=True, separator=';'),
    #              ModelCheckpoint(model_name + '_weights.h5', monitor='val_loss')]

    # # early stopping to avoid over-training
    # # csv logging of loss
    # # save best model
    # callbacks = [EarlyStopping(monitor='val_loss', patience=5, verbose=0), \
    #              CSVLogger(model_name + '.csv', append=True, separator=';'),
    #              ModelCheckpoint(model_name + '_weights.h5', monitor='val_loss')]

    # # run training
    # history = model.fit(mgc['train'], mri['train'],
    #                         epochs = 100, batch_size = 128, shuffle = True, verbose = 1,
    #                         validation_data=(mgc['valid'], mri['valid']),
    #                         callbacks=callbacks)

    # Save mgc_scalers to a pickle file
    scaler_filename = model_name + '_mgc_scalers.sav'
    with open(scaler_filename, 'wb') as file:
        pickle.dump(mgc_scalers, file)

    callbacks = [
        EarlyStopping(monitor='val_loss', patience=5),
        CSVLogger(model_name + '.csv', append=True, separator=';'),
        ModelCheckpoint(model_name + '_weights.keras', monitor='val_loss')
    ]


    history = model.fit(mgc['train'], mri['train'], epochs=100, batch_size=128, shuffle=True, validation_data=(mgc['valid'], mri['valid']), callbacks=callbacks)
    print(model.summary())


    # Save the model architecture to a JSON file
    model_json = model.to_json()
    with open(model_name + '_model.json', "w") as json_file:
      json_file.write(model_json)

    # 8.6M parameters
    print(model.summary())

    t = mgc['valid'][0 : 465]
    y_pred = model.predict(t)
    y_true = (mri['valid'][0:465])

    # Calculating the error
    FrameErr = np.zeros((465,1))
    for i in range(465):
      t1 = y_pred[i,:]
      t2 = y_true[i,:]
      terr =np.mean((np.square(t1-t2)))
      FrameErr[i]=terr

    MSErr = np.mean(FrameErr)
    print(MSErr)

    # MSErr_fn = MeanSquaredError(y_pred, y_true)
    # print(np.mean(MSErr_fn))


    # save model
    model_json = model.to_json()
    with open(model_name + '_model.json', "w") as json_file:
        json_file.write(model_json)

    # serialize scalers to pickle
    pickle.dump(mgc_scalers, open(model_name + '_mgc_scalers.sav', 'wb'))

    # save test files
    with open(model_name + '_test_files.txt', 'w') as txt_file:
        for file in files_mri['test']:
            txt_file.write(file + '\n')

    # print('finished training', speaker, current_date)

valid files ['usctimit_mri_m2_081_085.avi', 'usctimit_mri_m2_171_175.avi', 'usctimit_mri_m2_251_255.avi', 'usctimit_mri_m2_401_405.avi']
test files ['usctimit_mri_m2_441_445.avi', 'usctimit_mri_m2_146_150.avi', 'usctimit_mri_m2_261_265.avi', 'usctimit_mri_m2_121_125.avi', 'usctimit_mri_m2_316_320.avi', 'usctimit_mri_m2_411_415.avi', 'usctimit_mri_m2_101_105.avi', 'usctimit_mri_m2_061_065.avi', 'usctimit_mri_m2_091_095.avi', 'usctimit_mri_m2_396_400.avi']
starting train usctimit_mri_m2_166_170.avi
minmax: 0.0 1.0
n_frames_all:  482 mgc_size:  482
starting train usctimit_mri_m2_306_310.avi
minmax: 0.0 1.0
n_frames_all:  1080 mgc_size:  1080
starting train usctimit_mri_m2_051_055.avi
minmax: 0.0 1.0
n_frames_all:  1614 mgc_size:  1614
starting train usctimit_mri_m2_056_060.avi
minmax: 0.0 1.0
n_frames_all:  2163 mgc_size:  2163
starting train usctimit_mri_m2_276_280.avi
minmax: 0.0 1.0
n_frames_all:  2706 mgc_size:  2706
starting train usctimit_mri_m2_241_245.avi
minmax: 0.0 1.0
n_frames_

  super().__init__(**kwargs)


starting training M2 2024-10-30_08-49-22
Epoch 1/100
[1m349/349[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m15s[0m 26ms/step - loss: 0.0108 - val_loss: 0.0035
Epoch 2/100
[1m349/349[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m14s[0m 16ms/step - loss: 0.0036 - val_loss: 0.0033
Epoch 3/100
[1m349/349[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 14ms/step - loss: 0.0033 - val_loss: 0.0032
Epoch 4/100
[1m349/349[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 14ms/step - loss: 0.0031 - val_loss: 0.0032
Epoch 5/100
[1m349/349[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 15ms/step - loss: 0.0029 - val_loss: 0.0031
Epoch 6/100
[1m349/349[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 16ms/step - loss: 0.0027 - val_loss: 0.0030
Epoch 7/100
[1m349/349[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 15ms/step - loss: 0.0026 - val_loss: 0.0031
Epoch 8/100
[1m349/349[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 15ms/step - loss: 0.00

None


None
[1m15/15[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 62ms/step
0.002644635371273108
valid files ['usctimit_mri_m3_081_085.avi', 'usctimit_mri_m3_171_175.avi', 'usctimit_mri_m3_251_255.avi', 'usctimit_mri_m3_406_410.avi']
test files ['usctimit_mri_m3_441_445.avi', 'usctimit_mri_m3_146_150.avi', 'usctimit_mri_m3_261_265.avi', 'usctimit_mri_m3_121_125.avi', 'usctimit_mri_m3_316_320.avi', 'usctimit_mri_m3_416_420.avi', 'usctimit_mri_m3_101_105.avi', 'usctimit_mri_m3_061_065.avi', 'usctimit_mri_m3_091_095.avi', 'usctimit_mri_m3_401_405.avi']
starting train usctimit_mri_m3_166_170.avi
minmax: 0.0 1.0
n_frames_all:  443 mgc_size:  443
starting train usctimit_mri_m3_306_310.avi
minmax: 0.0 1.0
n_frames_all:  985 mgc_size:  985
starting train usctimit_mri_m3_051_055.avi
minmax: 0.0 1.0
n_frames_all:  1460 mgc_size:  1460
starting train usctimit_mri_m3_056_060.avi
minmax: 0.0 1.0
n_frames_all:  1944 mgc_size:  1944
starting train usctimit_mri_m3_276_280.avi
minmax: 0.0 1.0
n_fram

  super().__init__(**kwargs)


starting training M3 2024-10-30_08-53-07
Epoch 1/100
[1m311/311[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m15s[0m 34ms/step - loss: 0.0096 - val_loss: 0.0031
Epoch 2/100
[1m311/311[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m13s[0m 16ms/step - loss: 0.0034 - val_loss: 0.0029
Epoch 3/100
[1m311/311[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 15ms/step - loss: 0.0032 - val_loss: 0.0029
Epoch 4/100
[1m311/311[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 16ms/step - loss: 0.0031 - val_loss: 0.0028
Epoch 5/100
[1m311/311[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 15ms/step - loss: 0.0029 - val_loss: 0.0028
Epoch 6/100
[1m311/311[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 16ms/step - loss: 0.0029 - val_loss: 0.0028
Epoch 7/100
[1m311/311[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 15ms/step - loss: 0.0028 - val_loss: 0.0029
Epoch 8/100
[1m311/311[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 16ms/step - loss: 0.0027

None


None
[1m15/15[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 59ms/step
0.0024143756733620734
valid files ['usctimit_mri_f1_081_085.avi', 'usctimit_mri_f1_171_175.avi', 'usctimit_mri_f1_251_255.avi', 'usctimit_mri_f1_401_405.avi']
test files ['usctimit_mri_f1_441_445.avi', 'usctimit_mri_f1_146_150.avi', 'usctimit_mri_f1_261_265.avi', 'usctimit_mri_f1_121_125.avi', 'usctimit_mri_f1_316_320.avi', 'usctimit_mri_f1_411_415.avi', 'usctimit_mri_f1_101_105.avi', 'usctimit_mri_f1_061_065.avi', 'usctimit_mri_f1_091_095.avi', 'usctimit_mri_f1_396_400.avi']
starting train usctimit_mri_f1_166_170.avi
minmax: 0.0 1.0
n_frames_all:  489 mgc_size:  489
starting train usctimit_mri_f1_306_310.avi
minmax: 0.0 1.0
n_frames_all:  1085 mgc_size:  1085
starting train usctimit_mri_f1_051_055.avi
minmax: 0.0 1.0
n_frames_all:  1620 mgc_size:  1620
starting train usctimit_mri_f1_056_060.avi
minmax: 0.0 1.0
n_frames_all:  2150 mgc_size:  2150
starting train usctimit_mri_f1_276_280.avi
minmax: 0.0 1.0
n_f

  super().__init__(**kwargs)


starting training F1 2024-10-30_08-55-32
Epoch 1/100
[1m349/349[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m14s[0m 28ms/step - loss: 0.0070 - val_loss: 0.0033
Epoch 2/100
[1m349/349[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m15s[0m 15ms/step - loss: 0.0023 - val_loss: 0.0030
Epoch 3/100
[1m349/349[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m11s[0m 18ms/step - loss: 0.0021 - val_loss: 0.0031
Epoch 4/100
[1m349/349[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m9s[0m 15ms/step - loss: 0.0020 - val_loss: 0.0030
Epoch 5/100
[1m349/349[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 16ms/step - loss: 0.0019 - val_loss: 0.0030
Epoch 6/100
[1m349/349[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 14ms/step - loss: 0.0018 - val_loss: 0.0030
Epoch 7/100
[1m349/349[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 15ms/step - loss: 0.0018 - val_loss: 0.0032
Epoch 8/100
[1m349/349[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 15ms/step - loss: 0.0017

None


None
[1m15/15[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 69ms/step
0.0020238683232015895
valid files ['usctimit_mri_f2_081_085.avi', 'usctimit_mri_f2_171_175.avi', 'usctimit_mri_f2_251_255.avi', 'usctimit_mri_f2_401_405.avi']
test files ['usctimit_mri_f2_441_445.avi', 'usctimit_mri_f2_146_150.avi', 'usctimit_mri_f2_261_265.avi', 'usctimit_mri_f2_121_125.avi', 'usctimit_mri_f2_316_320.avi', 'usctimit_mri_f2_411_415.avi', 'usctimit_mri_f2_101_105.avi', 'usctimit_mri_f2_061_065.avi', 'usctimit_mri_f2_091_095.avi', 'usctimit_mri_f2_396_400.avi']
starting train usctimit_mri_f2_166_170.avi
minmax: 0.0 1.0
n_frames_all:  485 mgc_size:  485
starting train usctimit_mri_f2_306_310.avi
minmax: 0.0 1.0
n_frames_all:  1080 mgc_size:  1080
starting train usctimit_mri_f2_051_055.avi
minmax: 0.0 1.0
n_frames_all:  1608 mgc_size:  1608
starting train usctimit_mri_f2_056_060.avi
minmax: 0.0 1.0
n_frames_all:  2137 mgc_size:  2137
starting train usctimit_mri_f2_276_280.avi
minmax: 0.0 1.0
n_f

  super().__init__(**kwargs)


starting training F2 2024-10-30_08-59-10
Epoch 1/100
[1m348/348[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m14s[0m 25ms/step - loss: 0.0101 - val_loss: 0.0045
Epoch 2/100
[1m348/348[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m16s[0m 16ms/step - loss: 0.0039 - val_loss: 0.0042
Epoch 3/100
[1m348/348[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 16ms/step - loss: 0.0037 - val_loss: 0.0040
Epoch 4/100
[1m348/348[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m11s[0m 17ms/step - loss: 0.0035 - val_loss: 0.0041
Epoch 5/100
[1m348/348[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 15ms/step - loss: 0.0034 - val_loss: 0.0042
Epoch 6/100
[1m348/348[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 17ms/step - loss: 0.0032 - val_loss: 0.0041
Epoch 7/100
[1m348/348[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 16ms/step - loss: 0.0031 - val_loss: 0.0044
Epoch 8/100
[1m348/348[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 16ms/step - loss: 0.00

None


None
[1m15/15[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 51ms/step
0.003542302968431925


In [None]:
def mri2vid(mri_data, dir_file, filename_no_ext, n_width, n_height, FramesPerSec):
    print(filename_no_ext + ' - MRI video started')
    output_file_no_ext = dir_file + filename_no_ext
    n_frames = len(mri_data)

    # Uncompressed 8-bit (use cv2.VideoWriter_fourcc instead of just VideoWriter_fourcc)
    fourcc = cv2.VideoWriter_fourcc(*'Y800')
    video = cv2.VideoWriter(output_file_no_ext + '.avi', fourcc, float(FramesPerSec), (n_width, n_height), 0)

    for n in range(n_frames):
        frame = np.uint8(255 * mri_data[n]).reshape(n_width, n_height, 1)
        video.write(frame)
        print('frame ', n, ' done', end='\r')

    video.release()
    print(filename_no_ext + ' - MRI video finished')

mri_data = y_pred
# mri2vid(mri_data, dir_mri_test, basefilename_name_only, n_width, n_height, framesPerSec)
# mrividwav2demo(dir_mri_test, basefilename_name_only + '.avi', os.path.dirname(dir_mri_wav) + '/', os.path.basename(dir_mri_wav))
dir_mri_test = '/content/drive/MyDrive/GAN_based_models/speech2mri/temp_outputs/'
basefilename_name_only = 'temp_file'
mri2vid(mri_data, dir_mri_test, basefilename_name_only, n_width, n_height, framesPerSec)

temp_file - MRI video started
temp_file - MRI video finished
