In [1]:
import pandas as pd
import librosa
import librosa.display
import matplotlib.pyplot as plt
import numpy as np

In [2]:
file_path = 'B_Data_Frame.xlsx'
sheet_name = 'Banspemo'

df = pd.read_excel(file_path, sheet_name=sheet_name)
df.to_csv('Banspemo.csv', index=False)

#df = df.iloc[1:5]


In [None]:
df

In [4]:
def noise(data):
    noise_amp = 0.035*np.random.uniform()*np.amax(data)
    data = data + noise_amp*np.random.normal(size=data.shape[0])
    return data

def stretch(data, rate=0.8):
    return librosa.effects.time_stretch(data, rate=rate)

def shift(data):
    shift_range = int(np.random.uniform(low=-5, high = 5)*1000)
    return np.roll(data, shift_range)

def pitch(data, sampling_rate, pitch_factor=0.7):
    return librosa.effects.pitch_shift(y=data, sr=sampling_rate, n_steps=pitch_factor)



In [5]:
def extract_features(data, sample_rate):
    # ZCR
    result = np.array([])
    zcr = np.mean(librosa.feature.zero_crossing_rate(y=data).T, axis=0)
    result=np.hstack((result, zcr)) # stacking horizontally

    # Chroma_stft
    stft = np.abs(librosa.stft(data))
    chroma_stft = np.mean(librosa.feature.chroma_stft(S=stft, sr=sample_rate).T, axis=0)
    result = np.hstack((result, chroma_stft)) # stacking horizontally

    #Chroma_cqt
    chroma_cqt = np.mean(librosa.feature.chroma_cqt(y=data, sr=sample_rate))
    result = np.hstack((result, chroma_cqt)) # stacking horizontally

    # MFCC
    mfcc = np.mean(librosa.feature.mfcc(y=data, sr=sample_rate).T, axis=0)
    result = np.hstack((result, mfcc)) # stacking horizontally

    # Root Mean Square Value
    rms = np.mean(librosa.feature.rms(y=data).T, axis=0)
    result = np.hstack((result, rms)) # stacking horizontally

    # MelSpectogram
    mel = np.mean(librosa.feature.melspectrogram(y=data, sr=sample_rate).T, axis=0)
    result = np.hstack((result, mel)) # stacking horizontally
    
    return result

def get_features(path):
    data, sample_rate = librosa.load(path)
    
    # without augmentation
    res1 = extract_features(data=data, sample_rate=sample_rate)
    result = np.array(res1)
    
    # data with noise
    noise_data = noise(data)
    res2 = extract_features(data=noise_data, sample_rate=sample_rate)
    result = np.vstack((result, res2)) # stacking vertically
    
    # data with stretching 
    stretch_data = stretch(data)
    res3 = extract_features(data=stretch_data, sample_rate=sample_rate)
    result = np.vstack((result, res3)) # stacking vertically

    #data with shift
    shift_data =  shift(data)
    res4 = extract_features(data=shift_data, sample_rate=sample_rate)
    result = np.vstack((result, res4)) # stacking vertically

    #data with pitch
    pitch_data = pitch(data, sampling_rate=sample_rate)
    res5 = extract_features(data=pitch_data, sample_rate=sample_rate)
    np.vstack((result, res5)) # stacking vertically
    
    return result

In [None]:
from tqdm import tqdm
extracted_feature, emotion = [], []

import time
start_time = time.time()
for i, j in tqdm( zip(df['File Path'], df['Emotion'])):
    feature = get_features(i)
    for k in feature:
        extracted_feature.append(k)
        emotion.append(j)

In [None]:
len(extracted_feature[0])

In [None]:
end_time = time.time()

execution_time = end_time - start_time
print(f"Execution Time: {execution_time} seconds")

In [None]:
extracted_feature

In [None]:
extracted_feature[0]

In [None]:
extracted_feature[0][0]

In [None]:
type(extracted_feature[0][0])

In [None]:
len(extracted_feature)

In [None]:
emotion

In [None]:
len(emotion)

In [None]:
#df['Features'] = extracted_feature
feature_set = pd.DataFrame(extracted_feature)
feature_set['Emotion_Label'] = emotion
feature_set

In [17]:
feature_set.to_csv('B-Ser.csv', index=False)