In [None]:
import librosa
import pandas as pd
import numpy as np

In [None]:
from google.colab import drive
drive.mount('/content/gdrive', force_remount=True) # mount to google drive

In [None]:
audio_files = pd.DataFrame(pd.read_excel("/content/gdrive/MyDrive/222 proj/data.xlsx")) # audio
audio_files

In [None]:
import tensorflow as tf
!pip install tensorflow_io
import tensorflow_io as tfio
def get_spectrogram(file_path):
  fileo = tf.io.read_file(file_path) # read file
  wav_file, sample_rate = tf.audio.decode_wav(fileo, desired_channels=1) # decode wav
  wav_file = tf.squeeze(wav_file, axis = 1)
  wav_file = tfio.audio.resample(wav_file, rate_in=tf.cast(sample_rate, dtype=tf.int64), rate_out=16000) # resample to 16000
  spectrogram = tf.signal.stft(wav_file, frame_length=512, frame_step=32) # convert to spectrogram
  spectrogram = tf.abs(spectrogram)
  spectrogram = tf.expand_dims(spectrogram, axis=2)
  return spectrogram

In [None]:
import tensorflow as tf
spectrograms = [] # list of spectrograms
for file_name in audio_files["file_name"]:
  print(file_name)
  spectrogram = get_spectrogram("/content/gdrive/MyDrive/222 proj/audio_clips/" + file_name)
  spectrogram = spectrogram[0:15000] # cut to 30 seconds
  spectrograms.append(spectrogram) # append spectrogram audio data to audios. After the loop, audios will have the audio information of all the files as a spectrograms in different rows
audio_files.insert(0, "spectrogram", spectrograms) # inserting new audio column to audio_files


In [None]:
audio_files.head() # check what audio_files looks like

In [None]:
import seaborn
from scipy import stats

quality = "enthusiasim" # THIS IS WHERE TO CHOOSE WHICH QUALITY WE WANT TO TRAIN THE MODEL ON
seaborn.histplot(data=stats.zscore(audio_files[quality]))
audio_files[quality] = stats.zscore(audio_files[quality])

In [None]:
from matplotlib import pyplot as plt
plt.imshow(tf.transpose(audio_files['spectrogram'][0][3000:3250])[0]) # display section of spectrogram
plt.show()

In [None]:
from sklearn.model_selection import train_test_split

x_train, x_test, y_train, y_test = train_test_split(audio_files.iloc[:, 0], stats.zscore(audio_files[quality]), test_size=0.25, random_state=48) # split into 80% 20% training testing datasets


In [None]:
from tensorflow.python.framework import ops
# convert training and testing data lists into np array
x_train = np.asarray(x_train)
x_test = np.asarray(x_test)
y_train = np.asarray(y_train)
y_test = np.asarray(y_test)
# convert to tensor
x_train = np.array([tensor.numpy() for tensor in x_train])
x_test = np.array([tensor.numpy() for tensor in x_test])

In [None]:
from keras.models import Sequential
from keras.layers import Dense, Dropout, Activation, Flatten, Conv2D, MaxPooling2D

# create model
model = Sequential()
model.add(Conv2D(1, (8,8), activation='relu', input_shape=[15000, 257, 1]))
model.add(MaxPooling2D(pool_size=(8, 8)))
model.add(Conv2D(1, (8,8), activation='relu'))
model.add(MaxPooling2D(pool_size=(8, 8)))
model.add(Flatten())
model.add(Dense(10, activation='linear'))
model.add(Dense(1, activation='linear'))


In [None]:
np.mean(y_test)

In [None]:
model.compile(loss='mse', optimizer='adam', metrics=['mae', 'mse'])

# train the model
history = model.fit(x_train, y_train, epochs = 3, validation_data=[x_test, y_test])

In [None]:
y_pred = model.predict(x_test)

In [None]:
from sklearn.metrics import r2_score
print(r2_score(y_test, y_pred))

In [None]:
from scipy.stats import spearmanr
coef, p = spearmanr(y_test, y_pred)
print(coef)

In [None]:
!mkdir saved_model
model.save("saved_models/" + quality) # create ability to save model for any quality

In [None]:
# after saving all different models...

In [None]:
def test_speech(file_name, characteristic):
  if characteristic not in ("assertiveness", "enthusiasm", "clarity", "engagement"):
    print("Error: test_speech called with bad characteristic. Please use one of the following: assertiveness, enthusiasm, clarity, engagement")
    return -1

  model = tf.keras.models.load_model('saved_models/' + quality) # load in saved model

  spectrogram = get_spectrogram("/content/gdrive/MyDrive/222 proj/audio_clips/" + file_name)
  spectrogram = spectrogram[0:15000] # cut to 30 seconds
  return model.predict(spectrogram) # return prediction