In [1]:
import numpy as np
import pandas as pd
import os
import librosa
import librosa.display
from pydub.silence import split_on_silence
from pydub import AudioSegment, effects 
from scipy.io.wavfile import read, write



In [2]:
DATA_PATH = r"D:/Datasets/Speech Processing/Data/"

def get_file_names(path = DATA_PATH):
    file_names = os.listdir(path)
    return file_names

def process_audio(audio_file_name):
    rate, audio = read(f"./Data/{audio_file_name}")

    aud = AudioSegment(audio.tobytes(), frame_rate = rate,
                         sample_width = audio.dtype.itemsize, channels = 1)

    audio_chunks = split_on_silence(
        aud,
        min_silence_len = 1000,
        silence_thresh = -50,
        keep_silence = 500,)
    
    # audio chunks are combined
    audio_processed = sum(audio_chunks)
    audio_processed = np.array(audio_processed.get_array_of_samples())
    audio_file = audio_file_name.split(".")[0]
    write(f"./Processed Data/{audio_file}_PROCESSED.wav", rate, audio_processed)

file_names = get_file_names()
for file_name in file_names:
    process_audio(file_name)
print("Done")

In [3]:
def get_label_dict(df, file_list, test=False):
    label_dict = {}
    
    if test:
        df_list = df['PHQ_Score']
    else:
        df_list = df['PHQ8_Score']

    for file in file_list:
        patient_num = int(file.split("/")[-1].split("_")[0])

        patient_list = list(df['Participant_ID'])

        idx = patient_list.index(patient_num)

        phq8_score = int(df_list[idx])
        
        if phq8_score in range(0, 6):
            score = 0
        elif phq8_score in range(6, 15):
            score = 1
        elif phq8_score in range(15, 22):
            score = 2
        elif phq8_score in range(22, 28):
            score = 3

        label_dict[f"{file}"] = score
    
    return label_dict

In [4]:
def get_set(df):
    df_files = [f"D:/Datasets/Speech Processing/Processed Data/{x}_AUDIO_PROCESSED.wav" for x in df['Participant_ID']]

    return list(set(df_files))

In [5]:
train_df, test_df, val_df = pd.read_csv(f"./train.csv"), pd.read_csv(f"./test.csv"), pd.read_csv(f"./val.csv")

train, test, val = get_set(train_df), get_set(test_df), get_set(val_df)

In [6]:
train_dict, test_dict, val_dict = get_label_dict(train_df, train), get_label_dict(test_df, test, True), get_label_dict(val_df, val)
data_dict = {**train_dict, **test_dict, **val_dict}

In [7]:
PROCESSED_DATA_PATH = "D:/Datasets/Speech Processing/Processed Data/"

In [8]:
data_list = list(data_dict.keys())
label_list = list(data_dict.values())

In [11]:
new_data_list = []
new_label_list = []
for data in data_list:
    audios = os.listdir(PROCESSED_DATA_PATH)
    data_2 = data.split('/')[4]
    if data_2 in audios:
        new_data_list.append(data)
        new_label_list.append(label_list[data_list.index(data)])

In [12]:
new_data_list

['D:/Datasets/Speech Processing/Processed Data/426_AUDIO_PROCESSED.wav',
 'D:/Datasets/Speech Processing/Processed Data/376_AUDIO_PROCESSED.wav',
 'D:/Datasets/Speech Processing/Processed Data/338_AUDIO_PROCESSED.wav',
 'D:/Datasets/Speech Processing/Processed Data/304_AUDIO_PROCESSED.wav',
 'D:/Datasets/Speech Processing/Processed Data/401_AUDIO_PROCESSED.wav',
 'D:/Datasets/Speech Processing/Processed Data/317_AUDIO_PROCESSED.wav',
 'D:/Datasets/Speech Processing/Processed Data/434_AUDIO_PROCESSED.wav',
 'D:/Datasets/Speech Processing/Processed Data/486_AUDIO_PROCESSED.wav',
 'D:/Datasets/Speech Processing/Processed Data/409_AUDIO_PROCESSED.wav',
 'D:/Datasets/Speech Processing/Processed Data/336_AUDIO_PROCESSED.wav',
 'D:/Datasets/Speech Processing/Processed Data/353_AUDIO_PROCESSED.wav',
 'D:/Datasets/Speech Processing/Processed Data/479_AUDIO_PROCESSED.wav',
 'D:/Datasets/Speech Processing/Processed Data/321_AUDIO_PROCESSED.wav',
 'D:/Datasets/Speech Processing/Processed Data/327_

In [15]:
import numpy as np
import librosa
import soundfile as sf

def time_stretching(data, rate=0.8):
    data = librosa.effects.time_stretch(data, rate)
    return data

def pitch_shifting(data, sr, n_steps=2.0):
    data = librosa.effects.pitch_shift(data, sr, n_steps)
    return data

def add_noise(data, noise_factor=0.02):
    noise = np.random.randn(len(data))
    augmented_data = data + noise_factor * noise
    # Cast back to same data type
    augmented_data = augmented_data.astype(type(data[0]))
    return augmented_data

def augment_data(new_data_list, new_label_list):
    new_data = []
    new_labels = []
    for i in range(len(new_data_list)):
        audio, sr = librosa.load(new_data_list[i])
        time_stretched_data = time_stretching(audio)
        time_stretched_filename = str(new_data_list[i].split('.')[0]) + '_TIME_STRETCHED.wav'
        sf.write(time_stretched_filename, time_stretched_data, sr)
        pitch_shifted_data = pitch_shifting(audio, sr)
        pitch_shifted_filename = str(new_data_list[i].split('.')[0]) + '_PITCH_SHIFTED.wav'
        sf.write(pitch_shifted_filename, pitch_shifted_data, sr)
        noisy_data = add_noise(audio)
        noisy_data_filename = str(new_data_list[i].split('.')[0]) + '_NOISY.wav'
        sf.write(noisy_data_filename, noisy_data, sr)
        new_data.append(new_data_list[i])
        new_data.append(time_stretched_filename)
        new_data.append(pitch_shifted_filename)
        new_data.append(noisy_data_filename)
        new_labels.append(new_label_list[i])
        new_labels.append(new_label_list[i])
        new_labels.append(new_label_list[i])
        new_labels.append(new_label_list[i])
    return new_data, new_labels

In [16]:
new_data, new_label = augment_data(new_data_list, new_label_list)

In [44]:
data_df = pd.DataFrame({"New Data": new_data, "New Labels":new_label})

In [45]:
data_df.to_csv("Data Labels.csv")

## MFCC

In [17]:
MFCC_Features_list = []
num_mfcc = 13
for i in new_data:
    audio, sr = librosa.load(i)
    mfcc = librosa.feature.mfcc(y=audio, sr=sr, n_mfcc=num_mfcc)
    delta = librosa.feature.delta(mfcc)
    delta_delta = librosa.feature.delta(delta)
    concatenated_features = np.concatenate((mfcc, delta, delta_delta), axis=0)
    mfcc_mean = np.mean(concatenated_features, axis=1)        
    
    MFCC_Features_list.append(mfcc_mean)

In [18]:
MFCC_Features_list

[array([-4.8523126e+02,  8.0778679e+01,  2.8363680e+01,  1.8746265e+01,
         1.0729502e+01,  8.5439157e+00,  2.9149826e+00,  1.8806635e+00,
         1.4800271e-01,  2.1318259e+00,  4.5138535e+00,  8.5144222e-02,
        -1.1289555e+00,  1.5246182e-03,  6.0769462e-04, -8.0941379e-04,
        -2.0155113e-04,  2.2341770e-05, -3.3531655e-04, -1.7912926e-04,
         8.9893909e-04,  1.6592861e-03,  6.2625477e-04, -2.4551555e-04,
         3.7065346e-04,  8.0951222e-04, -3.2868711e-06,  1.6574770e-04,
         2.5855430e-04,  1.1168417e-04,  1.9331535e-04,  1.3524696e-04,
        -6.4128224e-05, -3.5547157e-05, -2.4179528e-04, -3.0637594e-04,
        -8.6979759e-05,  2.5958589e-05,  9.2405418e-05], dtype=float32),
 array([-5.1060071e+02,  7.7560661e+01,  2.8469296e+01,  1.7857534e+01,
         1.0098147e+01,  7.8813014e+00,  2.5998821e+00,  7.4673498e-01,
        -3.4680423e-01,  2.1251647e+00,  4.1607304e+00,  1.1738070e-01,
        -1.3615457e+00,  3.5139034e-04, -4.3327313e-05, -5.7271

In [19]:
len(MFCC_Features_list)

744

In [20]:
import pandas as pd

MFCC_df = pd.DataFrame({"MFCC": MFCC_Features_list})

MFCC_df.to_csv("MFCC Features.csv")

## Chroma

In [21]:
Chroma_Features_list = []
for i in new_data:
    audio, sr = librosa.load(i)
    chroma_features = librosa.feature.chroma_stft(y=audio, sr=sr)
    chroma_features_mean = np.mean(chroma_features, axis=1)            
    
    Chroma_Features_list.append(chroma_features_mean)

In [22]:
Chroma_Features_list

[array([0.48118   , 0.47893333, 0.47741312, 0.46778804, 0.44217557,
        0.43035954, 0.4182233 , 0.45399395, 0.5269441 , 0.59491086,
        0.57703525, 0.5280094 ], dtype=float32),
 array([0.44600317, 0.44529277, 0.44075444, 0.43590906, 0.4128726 ,
        0.3987354 , 0.3882369 , 0.4304896 , 0.5091542 , 0.5747403 ,
        0.55635315, 0.48978567], dtype=float32),
 array([0.56071734, 0.5067145 , 0.4525639 , 0.44358647, 0.43587372,
        0.42378423, 0.3912663 , 0.37438074, 0.36776587, 0.41287634,
        0.512152  , 0.5746907 ], dtype=float32),
 array([0.7748532 , 0.7825676 , 0.7934703 , 0.80738395, 0.81456494,
        0.7498319 , 0.663937  , 0.68506765, 0.7219654 , 0.7624385 ,
        0.7773217 , 0.7763969 ], dtype=float32),
 array([0.3407259 , 0.3363515 , 0.3245712 , 0.3511129 , 0.4178941 ,
        0.5029327 , 0.52935874, 0.488288  , 0.42150962, 0.37291417,
        0.35017875, 0.33496654], dtype=float32),
 array([0.31370872, 0.30854836, 0.29540268, 0.31754577, 0.3796139 ,
       

In [23]:
len(Chroma_Features_list)

744

In [24]:
import pandas as pd

Chroma_df = pd.DataFrame({"Chroma": Chroma_Features_list})

Chroma_df.to_csv("Chroma Features.csv")

## LogFBank Features

In [25]:
import scipy.io.wavfile as wav
from python_speech_features import mfcc, logfbank

fbank_features = []
for i in new_data:
# Load the speech file
    (rate, signal) = wav.read(i)

# Extract log filter bank energies
    fbank_feat = logfbank(signal, rate, nfilt=13)

    fbank_features_mean = np.mean(fbank_feat, axis=0) 
    fbank_features.append(fbank_features_mean)
    
fbank_features

















[array([7.20811674, 7.49507759, 7.3350198 , 7.02014909, 6.73024783,
        6.92604641, 6.77872483, 6.98961871, 7.06728445, 6.81816634,
        6.76192268, 7.42088699, 7.13010997]),
 array([6.72060557, 6.83537485, 6.63635712, 6.24198132, 6.19684815,
        6.22569279, 6.26010133, 6.44857274, 6.1893183 , 6.3748045 ,
        6.92064946, 6.13675829, 3.65777588]),
 array([6.74191714, 6.97547758, 6.83186563, 6.54610566, 6.2416522 ,
        6.50111991, 6.28899175, 6.64016918, 6.52097542, 6.2381957 ,
        6.92477573, 6.91807359, 5.20432112]),
 array([ 9.81093963, 10.68534551, 11.51983418, 12.30470061, 13.09082164,
        13.84584642, 14.57390641, 15.26565587, 15.90815482, 16.52132045,
        17.11503747, 17.61402274, 18.02905419]),
 array([8.20686092, 9.17901463, 9.22417392, 9.01978841, 8.79881738,
        8.50325737, 8.86051839, 8.69469785, 8.58466734, 7.99402735,
        8.51588189, 8.96049593, 8.59486047]),
 array([7.72694084, 8.66294483, 8.63388342, 8.47794666, 7.95864404,
        8

In [26]:
print(len(fbank_features))

744


In [27]:
import pandas as pd

logfbank_df = pd.DataFrame({"LogFbank": fbank_features})

logfbank_df.to_csv("LogFbank Features.csv")

## LPC Features

In [28]:
import numpy as np
import librosa

lpc_features = []
for j in new_data:

    audio, sr = librosa.load(j)

    # Pre-emphasis
    pre_emphasis = 0.97
    emphasized_audio = np.append(audio[0], audio[1:] - pre_emphasis * audio[:-1])

    # Framing the audio signal
    frame_size = 0.025  # 25 milliseconds
    frame_stride = 0.01  # 10 milliseconds
    frame_length = int(round(frame_size * sr))
    frame_step = int(round(frame_stride * sr))
    num_frames = int(np.ceil(float(np.abs(len(emphasized_audio) - frame_length)) / frame_step))

    # Padding the audio signal to make sure all frames have equal length
    pad_audio_length = num_frames * frame_step + frame_length
    padded_audio = np.pad(emphasized_audio, (0, pad_audio_length - len(emphasized_audio)), 'constant')

    # Splitting the audio signal into frames
    indices = np.tile(np.arange(0, frame_length), (num_frames, 1)) + np.tile(
        np.arange(0, num_frames * frame_step, frame_step), (frame_length, 1)).T
    frames = padded_audio[indices.astype(np.int32, copy=False)]
    
    # Applying the LPC analysis
    order = 12  # Number of LPC coefficients (higher values give more precise modeling but require more computational resources)
    lpc_coeffs = np.zeros((num_frames, order + 1))
    for k in range(num_frames):
        temp = frames[k, :]
        all_zeros = not np.any(temp)
        if all_zeros:
            continue
        else:
            lpc_coeffs[k, :] = librosa.lpc(temp, order)
    
    lpc_coeffs_mean = np.mean(lpc_coeffs, axis=0)  
    lpc_features.append(lpc_coeffs_mean)
print(lpc_features)
print(len(lpc_features))

[array([  1.        ,  -3.15106724,   6.63629226, -10.99552315,
        15.21734849, -17.96603846,  18.40312277, -16.45486987,
        12.69329305,  -8.32934001,   4.51758276,  -1.87463963,
         0.47781587]), array([ 1.00000000e+00, -1.40044194e+00,  1.58721493e+00, -1.52444121e+00,
        1.06700823e+00, -4.43364643e-01,  1.65475366e-03,  1.29378474e-01,
       -1.96805486e-01,  5.99091589e-02,  3.17543949e-02, -2.99627317e-02,
       -6.19425074e-04]), array([ 1.        , -1.20921356,  1.35680899, -1.49184889,  1.42778144,
       -1.09646857,  0.70237922, -0.49247489,  0.21055185, -0.04345522,
        0.02356321, -0.03285426,  0.0247917 ]), array([1.        , 0.89137861, 0.79257106, 0.69868429, 0.61280006,
       0.53067825, 0.45512972, 0.38111725, 0.31325365, 0.24677796,
       0.18445794, 0.12174629, 0.0615056 ]), array([  1.        ,  -3.10676075,   6.38366799, -10.43909598,
        14.34245201, -16.83704355,  17.26306352, -15.48581526,
        12.01869237,  -7.97721286,   4.

In [29]:
import pandas as pd

LPC_df = pd.DataFrame({"LPC": lpc_features})

LPC_df.to_csv("LPC Features.csv")

In [None]:
import librosa
import numpy as np

plpc_features = []
for j in new_data:
    print(j)
    speech_signal, sample_rate = librosa.load(j)
    problem_signal = speech_signal
    frame_length = int(0.02 * sample_rate)
    hop_length = int(0.01 * sample_rate)
    speech_frames = librosa.util.frame(speech_signal, frame_length=frame_length, hop_length=hop_length).T

    windowed_frames = speech_frames * np.hamming(frame_length)

    model_order = 12
    lpc_coefficients = np.zeros((len(windowed_frames), model_order + 1))
    for i, frame in enumerate(windowed_frames):
        all_zeros = not np.any(frame)
        if all_zeros:
            continue
        else:
            lpc_coefficients[i] = librosa.lpc(frame, model_order)

    lsf_coefficients = np.zeros_like(lpc_coefficients)
    for i, lpc_coeffs in enumerate(lpc_coefficients):
        lpc_coeffs = np.append(lpc_coeffs, 0)  
        roots = np.roots(lpc_coeffs)  
        angles = np.angle(roots)
        sorted_indices = np.argsort(angles)
        lsf_coefficients[i] = np.sort(angles[sorted_indices])

    perceptual_coefficients = np.mean(np.array(lsf_coefficients), axis = 0)
    plpc_features.append(perceptual_coefficients)
print(len(perceptual_coefficients))

In [None]:
def normalize_list(lst):
    min_val = min(lst)
    max_val = max(lst)

    # Protect against division by zero
    if max_val == min_val:
        return [0 for _ in lst]

    return [float((x - min_val) / (max_val - min_val)) for x in lst]

In [None]:
print(10/10)

In [None]:
print("Max:", np.max(problem_frame))
print("Min:", np.min(problem_frame))
print("Any NaN:", np.any(np.isnan(problem_frame)))
print("Any Inf:", np.any(np.isinf(problem_frame)))

In [None]:
print("Max:", np.max(problem_f))
print("Min:", np.min(problem_f))
print("Any NaN:", np.any(np.isnan(problem_f)))
print("Any Inf:", np.any(np.isinf(problem_f)))

In [None]:
problem_f

## GFCC

In [30]:
import numpy as np
from python_speech_features import logfbank, fbank
from scipy.fftpack import dct

def compute_gfcc(signal, samplerate=16000, winlen=0.025, winstep=0.01, numcep=13, nfilt=26, nfft=512, lowfreq=0, highfreq=None, preemph=0.97):
    """Compute GFCC features from an audio signal."""
    feat, energy = fbank(signal, samplerate, winlen, winstep, nfilt, nfft, lowfreq, highfreq, preemph)
    feat = np.log(feat)
    feat = dct(feat, type=2, axis=1, norm='ortho')[:,:numcep]
    return feat

In [33]:
import scipy.io.wavfile as wav

gfcc_features = []
for i in new_data:
# Load the speech file
    (rate, signal) = wav.read(i)

    gfcc = compute_gfcc(signal, rate)
    
    gfcc_mean = np.mean(gfcc, axis= 0)
    
    gfcc_features.append(gfcc_mean)

print(len(gfcc_features))
print(len(gfcc_features[0]))

















744
13


In [34]:
import pandas as pd

GFCC_df = pd.DataFrame({"GFCC": gfcc_features})

GFCC_df.to_csv("GFCC Features.csv")

## Combined Features

In [35]:
csv_path = "./MFCC Features.csv"
MFCC_df = pd.read_csv(csv_path)
MFCC_Features = list(MFCC_df["MFCC"])
MFCC_Features_list = []
for i in MFCC_Features:
    new_i = i.replace('\n', ' ')
    new_i = new_i.replace('[', '')
    new_i = new_i.replace(']', '')
    array_data = np.fromstring(new_i, dtype=np.float64, sep=' ')
    MFCC_Features_list.append(array_data)
print(MFCC_Features_list)

[array([-4.8523126e+02,  8.0778679e+01,  2.8363680e+01,  1.8746265e+01,
        1.0729502e+01,  8.5439157e+00,  2.9149826e+00,  1.8806635e+00,
        1.4800271e-01,  2.1318259e+00,  4.5138535e+00,  8.5144222e-02,
       -1.1289555e+00,  1.5246182e-03,  6.0769462e-04, -8.0941379e-04,
       -2.0155113e-04,  2.2341770e-05, -3.3531655e-04, -1.7912926e-04,
        8.9893909e-04,  1.6592861e-03,  6.2625477e-04, -2.4551555e-04,
        3.7065346e-04,  8.0951222e-04, -3.2868711e-06,  1.6574770e-04,
        2.5855430e-04,  1.1168417e-04,  1.9331535e-04,  1.3524696e-04,
       -6.4128224e-05, -3.5547157e-05, -2.4179528e-04, -3.0637594e-04,
       -8.6979759e-05,  2.5958589e-05,  9.2405418e-05]), array([-5.1060071e+02,  7.7560661e+01,  2.8469296e+01,  1.7857534e+01,
        1.0098147e+01,  7.8813014e+00,  2.5998821e+00,  7.4673498e-01,
       -3.4680423e-01,  2.1251647e+00,  4.1607304e+00,  1.1738070e-01,
       -1.3615457e+00,  3.5139034e-04, -4.3327313e-05, -5.7271484e-04,
       -1.8691452e-

In [36]:
print(len(MFCC_Features_list))
print(len(MFCC_Features_list[0]))

744
39


In [37]:
csv_path = "./Chroma Features.csv"
Chroma_df = pd.read_csv(csv_path)
Chroma_Features = list(Chroma_df["Chroma"])
Chroma_Features_list = []
for i in Chroma_Features:
    new_i = i.replace('\n', ' ')
    new_i = new_i.replace('[', '')
    new_i = new_i.replace(']', '')
    array_data = np.fromstring(new_i, dtype=np.float64, sep=' ')
    Chroma_Features_list.append(array_data)
print(Chroma_Features_list)

[array([0.48118   , 0.47893333, 0.47741312, 0.46778804, 0.44217557,
       0.43035954, 0.4182233 , 0.45399395, 0.5269441 , 0.59491086,
       0.57703525, 0.5280094 ]), array([0.44600317, 0.44529277, 0.44075444, 0.43590906, 0.4128726 ,
       0.3987354 , 0.3882369 , 0.4304896 , 0.5091542 , 0.5747403 ,
       0.55635315, 0.48978567]), array([0.56071734, 0.5067145 , 0.4525639 , 0.44358647, 0.43587372,
       0.42378423, 0.3912663 , 0.37438074, 0.36776587, 0.41287634,
       0.512152  , 0.5746907 ]), array([0.7748532 , 0.7825676 , 0.7934703 , 0.80738395, 0.81456494,
       0.7498319 , 0.663937  , 0.68506765, 0.7219654 , 0.7624385 ,
       0.7773217 , 0.7763969 ]), array([0.3407259 , 0.3363515 , 0.3245712 , 0.3511129 , 0.4178941 ,
       0.5029327 , 0.52935874, 0.488288  , 0.42150962, 0.37291417,
       0.35017875, 0.33496654]), array([0.31370872, 0.30854836, 0.29540268, 0.31754577, 0.3796139 ,
       0.47188812, 0.5145426 , 0.47186375, 0.39961556, 0.34890914,
       0.32489496, 0.3094523 ]

In [46]:
csv_path = "./LogFbank Features.csv"
fbank_df = pd.read_csv(csv_path)
fbank_Features = list(fbank_df["LogFbank"])
fbank_features = []
for i in fbank_Features:
    new_i = i.replace('\n', ' ')
    new_i = new_i.replace('[', '')
    new_i = new_i.replace(']', '')
    array_data = np.fromstring(new_i, dtype=np.float64, sep=' ')
    fbank_features.append(array_data)
print(fbank_features)
print(len(fbank_features))

[array([7.20811674, 7.49507759, 7.3350198 , 7.02014909, 6.73024783,
       6.92604641, 6.77872483, 6.98961871, 7.06728445, 6.81816634,
       6.76192268, 7.42088699, 7.13010997]), array([6.72060557, 6.83537485, 6.63635712, 6.24198132, 6.19684815,
       6.22569279, 6.26010133, 6.44857274, 6.1893183 , 6.3748045 ,
       6.92064946, 6.13675829, 3.65777588]), array([6.74191714, 6.97547758, 6.83186563, 6.54610566, 6.2416522 ,
       6.50111991, 6.28899175, 6.64016918, 6.52097542, 6.2381957 ,
       6.92477573, 6.91807359, 5.20432112]), array([ 9.81093963, 10.68534551, 11.51983418, 12.30470061, 13.09082164,
       13.84584642, 14.57390641, 15.26565587, 15.90815482, 16.52132045,
       17.11503747, 17.61402274, 18.02905419]), array([8.20686092, 9.17901463, 9.22417392, 9.01978841, 8.79881738,
       8.50325737, 8.86051839, 8.69469785, 8.58466734, 7.99402735,
       8.51588189, 8.96049593, 8.59486047]), array([7.72694084, 8.66294483, 8.63388342, 8.47794666, 7.95864404,
       8.27388925, 8.276

In [39]:
csv_path = "./LPC Features.csv"
lpc_df = pd.read_csv(csv_path)
lpc_Features = list(lpc_df["LPC"])
lpc_features = []
for i in lpc_Features:
    new_i = i.replace('\n', ' ')
    new_i = new_i.replace('[', '')
    new_i = new_i.replace(']', '')
    array_data = np.fromstring(new_i, dtype=np.float64, sep=' ')
    lpc_features.append(array_data)
print(lpc_features)

[array([  1.        ,  -3.15106724,   6.63629226, -10.99552315,
        15.21734849, -17.96603846,  18.40312277, -16.45486987,
        12.69329305,  -8.32934001,   4.51758276,  -1.87463963,
         0.47781587]), array([ 1.00000000e+00, -1.40044194e+00,  1.58721493e+00, -1.52444121e+00,
        1.06700823e+00, -4.43364643e-01,  1.65475366e-03,  1.29378474e-01,
       -1.96805486e-01,  5.99091589e-02,  3.17543949e-02, -2.99627317e-02,
       -6.19425074e-04]), array([ 1.        , -1.20921356,  1.35680899, -1.49184889,  1.42778144,
       -1.09646857,  0.70237922, -0.49247489,  0.21055185, -0.04345522,
        0.02356321, -0.03285426,  0.0247917 ]), array([1.        , 0.89137861, 0.79257106, 0.69868429, 0.61280006,
       0.53067825, 0.45512972, 0.38111725, 0.31325365, 0.24677796,
       0.18445794, 0.12174629, 0.0615056 ]), array([  1.        ,  -3.10676075,   6.38366799, -10.43909598,
        14.34245201, -16.83704355,  17.26306352, -15.48581526,
        12.01869237,  -7.97721286,   4.

In [40]:
csv_path = "./GFCC Features.csv"
GFCC_df = pd.read_csv(csv_path)
GFCC_Features = list(GFCC_df["GFCC"])
GFCC_Features_list = []
for i in GFCC_Features:
    new_i = i.replace('\n', ' ')
    new_i = new_i.replace('[', '')
    new_i = new_i.replace(']', '')
    array_data = np.fromstring(new_i, dtype=np.float64, sep=' ')
    GFCC_Features_list.append(array_data)
print(GFCC_Features_list)

[array([31.70005535,  0.28016246,  0.6131695 , -0.08965673, -0.194027  ,
       -0.48391482, -0.40178852,  0.18741896, -0.87323761,  0.48741782,
       -0.55984138, -0.1857372 , -0.56146376]), array([27.01687731,  2.02015738, -1.45515087,  2.07550269, -2.06747044,
        1.12641311, -2.02033616,  0.49390723, -0.80275038, -0.64105025,
        0.36889197, -0.47858184, -0.05844208]), array([28.37070006,  1.06734479, -0.90720335,  1.15075324, -1.76451858,
        0.58713216, -2.09444602,  0.8509254 , -1.67485893,  0.36838793,
       -0.46984719, -0.29539135, -0.54131554]), array([ 69.08167949, -14.05522725,  -1.38316607,  -1.53560296,
        -0.55886241,  -0.74290801,  -0.41998754,  -0.47851886,
        -0.36450566,  -0.34670711,  -0.21876517,  -0.17650491,
        -0.14894681]), array([39.8151896 ,  0.23435448, -0.32486194, -1.29827488, -0.56656553,
       -0.99825358, -1.11925607,  0.23926782, -1.02486513, -0.3440787 ,
       -0.6379277 , -0.42077231,  0.12159725]), array([35.19841564,

In [48]:
combined_features = []
for feature_id in range(744):
    combined_feature = np.append(MFCC_Features_list[feature_id], Chroma_Features_list[feature_id])
    combined_feature = np.append(combined_feature, lpc_features[feature_id])
    combined_feature = np.append(combined_feature, fbank_features[feature_id])
    combined_feature = np.append(combined_feature, GFCC_Features_list[feature_id])
    combined_features.append(combined_feature)

In [49]:
len(combined_features)

744

In [50]:
import pandas as pd

combined_df = pd.DataFrame({"Combined": combined_features})

combined_df.to_csv("Combined Features.csv")