In [1]:
import numpy as np
import pandas as pd
import os
import librosa
import librosa.display
from pydub.silence import split_on_silence
from pydub import AudioSegment, effects 
from scipy.io.wavfile import read, write



def preprocess_audio(audio_file_name):

    audio_file, sr = librosa.load(audio_file_name)
    
    audio_file = librosa.effects.preemphasis(audio_file)
    audio_file = librosa.effects.trim(audio_file, top_db=20)[0]
    audio_file = librosa.util.normalize(audio_file)
    
    return sr, audio_file

In [2]:
DATA_PATH = r"D:/Datasets/Speech Processing/Data/"

def get_file_names(path = DATA_PATH):
    file_names = os.listdir(path)
    return file_names

def process_audio(audio_file_name):
    rate, audio = read(f"./Data/{audio_file_name}")

    aud = AudioSegment(audio.tobytes(), frame_rate = rate,
                         sample_width = audio.dtype.itemsize, channels = 1)

    audio_chunks = split_on_silence(
        aud,
        min_silence_len = 1000,
        silence_thresh = -50,
        keep_silence = 500,)
    
    # audio chunks are combined here
    audio_processed = sum(audio_chunks)
    audio_processed = np.array(audio_processed.get_array_of_samples())
    audio_file = audio_file_name.split(".")[0]
    write(f"./Processed Data/{audio_file}_PROCESSED.wav", rate, audio_processed)

file_names = get_file_names()
for file_name in file_names:
    process_audio(file_name)
print("Done")

In [3]:
def get_label_dict(df, file_list, test=False):
    label_dict = {}
    
    if test:
        df_list = df['PHQ_Score']
    else:
        df_list = df['PHQ8_Score']

    for file in file_list:
        patient_num = int(file.split("/")[-1].split("_")[0])

        patient_list = list(df['Participant_ID'])

        idx = patient_list.index(patient_num)

        phq8_score = int(df_list[idx])
        
        if phq8_score in range(0, 6):
            score = 0
        elif phq8_score in range(6, 15):
            score = 1
        elif phq8_score in range(15, 22):
            score = 2
        elif phq8_score in range(22, 28):
            score = 3

        label_dict[f"{file}"] = score
    
    return label_dict

In [7]:
def get_set(df):
    df_files = [f"D:/Datasets/Speech Processing/Processed Data/{x}_AUDIO_PROCESSED.wav" for x in df['Participant_ID']]

    return list(set(df_files))

In [8]:
train_df, test_df, val_df = pd.read_csv(f"./train.csv"), pd.read_csv(f"./test.csv"), pd.read_csv(f"./val.csv")

train, test, val = get_set(train_df), get_set(test_df), get_set(val_df)

In [9]:
train_dict, test_dict, val_dict = get_label_dict(train_df, train), get_label_dict(test_df, test, True), get_label_dict(val_df, val)
data_dict = {**train_dict, **test_dict, **val_dict}

In [10]:
PROCESSED_DATA_PATH = r"D:/Datasets/Speech Processing/Processed Data/"

data_list = list(data_dict.keys())
label_list = list(data_dict.values())

new_data_list = []
new_label_list = []
for data in data_list:
    audios = os.listdir(PROCESSED_DATA_PATH)
    data_2 = data.split('/')[2]
    if data_2 in audios:
        new_data_list.append(data)
        new_label_list.append(label_list[data_list.index(data)])

In [11]:
csv_path = "./Data Labels.csv"
data_df = pd.read_csv(csv_path)
new_data = list(data_df["New Data"])
new_labels = list(data_df["New Labels"])

In [12]:
csv_path = "./Combined Features.csv"
combined_df = pd.read_csv(csv_path)
combined = list(combined_df["Combined"])
combined_features = []
for i in combined:
    new_i = i.replace('\n', ' ')
    new_i = new_i.replace('[', '')
    new_i = new_i.replace(']', '')
    array_data = np.fromstring(new_i, dtype=np.float64, sep=' ')
    combined_features.append(array_data)
print(combined_features)
print(len(combined_features[0]))

[array([-4.85231260e+02,  8.07786790e+01,  2.83636800e+01,  1.87462650e+01,
        1.07295020e+01,  8.54391570e+00,  2.91498260e+00,  1.88066350e+00,
        1.48002710e-01,  2.13182590e+00,  4.51385350e+00,  8.51442220e-02,
       -1.12895550e+00,  1.52461820e-03,  6.07694620e-04, -8.09413790e-04,
       -2.01551130e-04,  2.23417700e-05, -3.35316550e-04, -1.79129260e-04,
        8.98939090e-04,  1.65928610e-03,  6.26254770e-04, -2.45515550e-04,
        3.70653460e-04,  8.09512220e-04, -3.28687110e-06,  1.65747700e-04,
        2.58554300e-04,  1.11684170e-04,  1.93315350e-04,  1.35246960e-04,
       -6.41282240e-05, -3.55471570e-05, -2.41795280e-04, -3.06375940e-04,
       -8.69797590e-05,  2.59585890e-05,  9.24054180e-05,  4.81180000e-01,
        4.78933330e-01,  4.77413120e-01,  4.67788040e-01,  4.42175570e-01,
        4.30359540e-01,  4.18223300e-01,  4.53993950e-01,  5.26944100e-01,
        5.94910860e-01,  5.77035250e-01,  5.28009400e-01,  1.00000000e+00,
       -3.15106724e+00, 

In [13]:
X = np.array(combined_features)

In [14]:
Y = np.array(new_labels)

In [15]:
from sklearn.model_selection import train_test_split
train_X, test_X, train_Y, test_Y = train_test_split(X, Y, test_size = 0.2)

In [16]:
from sklearn import svm
from sklearn.metrics import accuracy_score

clf = svm.SVC()

clf.fit(train_X, train_Y)

# Make predictions on the test set
y_pred = clf.predict(test_X)

# Evaluate the model
accuracy = accuracy_score(test_Y, y_pred)
print("Accuracy:", accuracy)

Accuracy: 0.5100671140939598
