# Download Files

In [None]:
#
# For licensing see accompanying LICENSE file.
# Copyright (C) 2021 Apple Inc. All Rights Reserved.
#

"""
For each podcast episode:
* Download the raw mp3/m4a file
* Convert it to a 16k mono wav file
# Remove the original file
"""

import os
import pathlib
import subprocess

import numpy as np

# import argparse

# parser = argparse.ArgumentParser(description='Download raw audio files for SEP-28k or FluencyBank and convert to 16k hz mono wavs.')
# parser.add_argument('--episodes', type=str, required=True,
#                    help='Path to the labels csv files (e.g., SEP-28k_episodes.csv)')
# parser.add_argument('--wavs', type=str, default="wavs",
#                    help='Path where audio files from download_audio.py are saved')


# args = parser.parse_args()
# episode_uri = args.episodes
# wav_dir = args.wavs

episode_uri = '../input/fluencybank_episodes.csv'
wav_dir = '../input/fluencybank_rawWAVs'

# Load episode data
table = np.genfromtxt(episode_uri, dtype=str, delimiter=", ")
urls = table[:,2]
n_items = len(urls)

audio_types = [".mp3", ".m4a", ".mp4"]


for i in range(n_items):
	# Get show/episode IDs
	show_abrev = table[i,-2]
	ep_idx = table[i,-1]
	episode_url = table[i,2]

	# Check file extension
	ext = ''
	for ext in audio_types:
		if ext in episode_url:
			break

	# Ensure the base folder exists for this episode
	episode_dir = pathlib.Path(f"{wav_dir}/{show_abrev}/")
	os.makedirs(episode_dir, exist_ok=True)

	# Get file paths
	audio_path_orig = pathlib.Path(f"{episode_dir}/{ep_idx}{ext}")
	wav_path = pathlib.Path(f"{episode_dir}/{ep_idx}.wav")

	# Check if this file has already been downloaded
	if os.path.exists(wav_path):
		continue

	print("Processing", show_abrev, ep_idx)
	# Download raw audio file. This could be parallelized.
	if not os.path.exists(audio_path_orig):
		line = f"wget -O {audio_path_orig} {episode_url}"
		process = subprocess.Popen([(line)],shell=True)
		process.wait()

	# Convert to 16khz mono wav file
	line = f"ffmpeg -i {audio_path_orig} -ac 1 -ar 16000 {wav_path}"
	process = subprocess.Popen([(line)],shell=True)
	process.wait()

	# Remove the original mp3/m4a file
	os.remove(audio_path_orig)


Processing HeStutters 0
Processing HeStutters 1
Processing HeStutters 2
Processing HeStutters 3
Processing HeStutters 4
Processing HeStutters 5
Processing HeStutters 6
Processing HeStutters 7
Processing HeStutters 8
Processing HeStutters 9
Processing HeStutters 10
Processing HeStutters 11
Processing HeStutters 12
Processing HeStutters 13
Processing HeStutters 14
Processing HeStutters 15
Processing HeStutters 16
Processing HeStutters 17
Processing HeStutters 18
Processing HeStutters 19
Processing HeStutters 20
Processing HeStutters 21
Processing HeStutters 22
Processing HeStutters 23
Processing HVSA 0
Processing HVSA 1
Processing HVSA 2
Processing HVSA 3
Processing IStutterSoWhat 0
Processing IStutterSoWhat 1
Processing IStutterSoWhat 2
Processing IStutterSoWhat 3
Processing IStutterSoWhat 4
Processing MyStutteringLife 0
Processing MyStutteringLife 1
Processing MyStutteringLife 2
Processing MyStutteringLife 3
Processing MyStutteringLife 4
Processing MyStutteringLife 5
Processing MyStutt

In [None]:
#
# For licensing see accompanying LICENSE file.
# Copyright (C) 2021 Apple Inc. All Rights Reserved.
#

"""
For each podcast episode:
* Get all clip information for that episode
* Save each clip as a new wav file.
"""

import os
import pathlib
import subprocess

import numpy as np
import pandas as pd
from scipy.io import wavfile

# import argparse

# parser = argparse.ArgumentParser(description='Extract clips from SEP-28k or FluencyBank.')
# parser.add_argument('--labels', type=str, required=True,
#                    help='Path to the labels csv files (e.g., SEP-28k_labels.csv)')
# parser.add_argument('--wavs', type=str, default="wavs",
#                    help='Path where audio files from download_audio.py are saved')
# parser.add_argument('--clips', type=str, default="clips",
#                    help='Path where clips should be extracted')
# parser.add_argument("--progress", action="store_true",
#                     help="Show progress")

# args = parser.parse_args()
# label_file = args.labels
# data_dir = args.wavs
# output_dir = args.clips

# label_file = '../input/fluencybank_labels.csv'
# data_dir = '../input/fluencybank_rawWAVs'
# output_dir = '../input/fluencybank_clippedWAVs'

label_file = '../input/SEP-28k_labels.csv'
data_dir = '../input/SEP-28k_rawWAVs'
output_dir = '../input/SEP-28k_clippedWAVs'

# Load label/clip file
data = pd.read_csv(label_file, dtype={"EpId":str})

# Get label columns from data file
shows = data.Show
episodes = data.EpId
clip_idxs = data.ClipId
starts = data.Start
stops = data.Stop
labels = data.iloc[:,5:].values

n_items = len(shows)

loaded_wav = ""
cur_iter = range(n_items)
# if args.progress:
#         from tqdm import tqdm
#         cur_iter = tqdm(cur_iter)

for i in cur_iter:
	clip_idx = clip_idxs[i]
	show_abrev = shows[i]
	episode = episodes[i].strip()

	# Setup paths
	wav_path = f"{data_dir}/{shows[i]}/{episode}.wav"
	clip_dir = pathlib.Path(f"{output_dir}/{show_abrev}/{episode}/")
	clip_path = f"{clip_dir}/{shows[i]}_{episode}_{clip_idx}.wav"

	if not os.path.exists(wav_path):
		print("Missing", wav_path)
		continue

	# Verify clip directory exists
	os.makedirs(clip_dir, exist_ok=True)

	# Load audio. For efficiency reasons don't reload if we've already open the file.
	if wav_path != loaded_wav:
		sample_rate, audio = wavfile.read(wav_path)
		assert sample_rate == 16000, "Sample rate must be 16 khz"

		# Keep track of the open file
		loaded_wav = wav_path

	# Save clip to file
	clip = audio[starts[i]:stops[i]]
	wavfile.write(clip_path, sample_rate, clip)


# Imports

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pylab as plt
import seaborn as sns

from glob import glob

import librosa
import librosa.display
import IPython.display as ipd

from itertools import cycle

sns.set_theme(style="white", palette=None)
color_pal = plt.rcParams["axes.prop_cycle"].by_key()["color"]
color_cycle = cycle(plt.rcParams["axes.prop_cycle"].by_key()["color"])

# Spectogram

In [None]:
# List all files in the directory
# source_directory = '../input/fluencybank_clippedWAVs/FluencyBank/'
# destination_directory = '../output/fluencybank_spectograms'

source_directory = '../input/SEP-28k_clippedWAVs/FluencyBank/'
destination_directory = '../output/SEP-28k_spectograms'

folders = os.listdir(source_directory)
# Process each file
for folder in folders:
  folder_path = os.path.join(source_directory, folder)
  # Check if the item in the directory is indeed a folder
  if os.path.isdir(folder_path):
    files = os.listdir(folder_path)
    # Iterate over files in the current folder
    for file in files:
      if file.endswith('.wav'):
        file_path = os.path.join(folder_path, file)

        # Process the data
        y, sr = librosa.load(file_path)
        D = librosa.stft(y)
        S_db = librosa.amplitude_to_db(np.abs(D), ref=np.max)
        flatten_S_db = S_db.flatten()

        # Save the processed data with a new file extension
        # Construct the new file path in the destination directory
        new_file_path = os.path.join(destination_directory, f'{file[:-4]}.npy')

        # Save the processed data
        np.save(new_file_path, flatten_S_db)

# Create dataframe with label.csv and all the npy files

In [None]:
# Step 1: Read CSV files into DataFrames

csv_file_path = '../input/SEP_28k_labels.csv'
df = pd.read_csv(csv_file_path, delimiter=',', dtype={'ClipId': str})

for index, row in df.iterrows():
  # Extract EpId from the current row
  ep_id = row['EpId']
  # Pad EpId with leading zeros to ensure three digits
  ep_id_padded = str(ep_id).zfill(3)
  # Update the 'EpId' column in the DataFrame with the padded value
  df.at[index, 'EpId'] = ep_id_padded
  # Extract 'ClipId' from the current row and strip extra spaces
  clip_id_stripped = row['ClipId'].strip()
  # Update the 'ClipId' column in the DataFrame with the stripped value
  df.at[index, 'ClipId'] = clip_id_stripped

# Function to load the numpy array from .npy file
def load_npy(file_path):
  return np.load(file_path)

# Folder containing the .npy files
npy_folder = '../output/fluencybank_spectograms'

# Iterate through the DataFrame and add a new column with the loaded numpy array
for index, row in df.iterrows():
  show, ep_id, clip_id = row['Show'], row['EpId'], row['ClipId']
  npy_file_path = os.path.join(npy_folder, f'{show}_{ep_id}_{clip_id}.npy')

  # Load the numpy array from the .npy file
  numpy_array = load_npy(npy_file_path)

  numpy_series = pd.Series(numpy_array)
  # Add the Pandas Series as a new column to the DataFrame
  df['NumpyData'] = numpy_series






In [None]:
# Drop unnecessary columns
columns_to_drop = ['Show', 'EpId', 'ClipId', 'Start', 'Stop']
df = df.drop(columns=columns_to_drop)

print(df)

# Train from dataframe

In [None]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from keras.models import Sequential
from keras.layers import Dense

# Split the DataFrame into X (input features) and y (output labels)
X = df['NumpyData'].values.reshape(-1, 1)  # Reshape to ensure X is a 2D array
y = df[['Unsure', 'PoorAudioQuality', 'Prolongation', 'Block', 'SoundRep', 'WordRep',
        'DifficultToUnderstand', 'Interjection', 'NoStutteredWords', 'NaturalPause',
        'Music', 'NoSpeech']].values

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Standardize the input features
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# Define the neural network model
model = Sequential()
model.add(Dense(64, activation='relu', input_shape=(1,)))
model.add(Dense(64, activation='relu'))
model.add(Dense(12, activation='sigmoid'))  # Output layer with 12 neurons for 12 output classes (your output labels)

# Compile the model
model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

# Train the model
model.fit(X_train_scaled, y_train, epochs=10, batch_size=32, validation_split=0.1)

# Evaluate the model
loss, accuracy = model.evaluate(X_test_scaled, y_test)
print('Test Loss:', loss)
print('Test Accuracy:', accuracy)

# Save the model
model.save('../output/model.h5')


In [None]:
!pip3 install tensorflowjs
import tensorflowjs as tfjs

tfjs.converters.save_keras_model(model, '../output/tensorflowjs')

# Predict from model

In [None]:
from keras.models import load_model

# Load the saved model
loaded_model = load_model('../output/model.h5')

# Process the data
y, sr = librosa.load('../input/test4.mp3')
D = librosa.stft(y)
S_db = librosa.amplitude_to_db(np.abs(D), ref=np.max)
print(type(S_db))
flatten_S_db = S_db.flatten()
print(type(flatten_S_db))

predictions = loaded_model.predict(X_new)
print(type(predictions))

In [None]:
# Set a threshold (you can experiment with different values)
threshold = 0.0001

# Convert probabilities to binary values based on the threshold
binary_predictions = (predictions > threshold).astype(int)

# Interpret the results
categories = ['Unsure', 'PoorAudioQuality', 'Prolongation', 'Block', 'SoundRep', 'WordRep',
              'DifficultToUnderstand', 'Interjection', 'NoStutteredWords', 'NaturalPause',
              'Music', 'NoSpeech']

result_dict = dict(zip(categories, binary_predictions.flatten()))

# Display the results
for category, prediction in result_dict.items():
    print(f'{category}: {prediction}')

In [None]:
average_prediction = np.mean(predictions, axis=0)

print(average_prediction)