In [1]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


# **Extracting the statistical summary and target features**

In [80]:
import numpy as np
import os
import librosa
import pandas as pd
import zipfile
import shutil

def process_audio_from_zip(zip_path):
    extract_path = zip_path.split('.')[0]

    if not os.path.exists(extract_path):
        os.makedirs(extract_path)
        with zipfile.ZipFile(zip_path, 'r') as zip_ref:
            zip_ref.extractall(extract_path)

    feature_dict = {
        'tempo': [],
        'onset_env_mean': [], 'onset_env_std': [], 'onset_env_median': [], 'onset_env_min': [], 'onset_env_max': [],
        'spectral_centroid_mean': [], 'spectral_centroid_std': [], 'spectral_centroid_median': [], 'spectral_centroid_min': [], 'spectral_centroid_max': [],
        'spectral_bandwidth_mean': [], 'spectral_bandwidth_std': [], 'spectral_bandwidth_median': [], 'spectral_bandwidth_min': [], 'spectral_bandwidth_max': [],
        'spectral_rolloff_mean': [], 'spectral_rolloff_std': [], 'spectral_rolloff_median': [], 'spectral_rolloff_min': [], 'spectral_rolloff_max': [],
        'zero_crossing_rate_mean': [], 'zero_crossing_rate_std': [], 'zero_crossing_rate_median': [], 'zero_crossing_rate_min': [], 'zero_crossing_rate_max': [],
        'chroma_stft_mean': [], 'chroma_stft_std': [], 'chroma_stft_median': [], 'chroma_stft_min': [], 'chroma_stft_max': [],
        'mfcc_mean': [], 'mfcc_std': [], 'mfcc_median': [], 'mfcc_min': [], 'mfcc_max': [],
        'rmse_mean': [], 'rmse_std': [], 'rmse_median': [], 'rmse_min': [], 'rmse_max': [],
        'emotion': [], 'emotional_intensity': []
    }

    for actor_dir in os.listdir(extract_path):
        if not actor_dir.startswith('Actor'):
            continue
        for wav_file in os.listdir(os.path.join(extract_path, actor_dir)):
            if not wav_file.endswith('.wav'):
                continue

            y, sr = librosa.load(os.path.join(extract_path, actor_dir, wav_file), sr=None)
            tempo, _ = librosa.beat.beat_track(y=y, sr=sr)
            onset_env = librosa.onset.onset_strength(y=y, sr=sr)
            spec_cent = librosa.feature.spectral_centroid(y=y, sr=sr)
            spec_bw = librosa.feature.spectral_bandwidth(y=y, sr=sr)
            spec_rolloff = librosa.feature.spectral_rolloff(y=y, sr=sr)
            zcr = librosa.feature.zero_crossing_rate(y)
            chroma_stft = librosa.feature.chroma_stft(y=y, sr=sr)
            mfcc = librosa.feature.mfcc(y=y, sr=sr)
            rmse = librosa.feature.rms(y=y)

            feature_dict['tempo'].append(tempo)

            # Onset Strength
            feature_dict['onset_env_mean'].append(np.mean(onset_env))
            feature_dict['onset_env_std'].append(np.std(onset_env))
            feature_dict['onset_env_median'].append(np.median(onset_env))
            feature_dict['onset_env_min'].append(np.min(onset_env))
            feature_dict['onset_env_max'].append(np.max(onset_env))

            # Spectral Centroid
            feature_dict['spectral_centroid_mean'].append(np.mean(spec_cent))
            feature_dict['spectral_centroid_std'].append(np.std(spec_cent))
            feature_dict['spectral_centroid_median'].append(np.median(spec_cent))
            feature_dict['spectral_centroid_min'].append(np.min(spec_cent))
            feature_dict['spectral_centroid_max'].append(np.max(spec_cent))

            # Spectral Bandwidth
            feature_dict['spectral_bandwidth_mean'].append(np.mean(spec_bw))
            feature_dict['spectral_bandwidth_std'].append(np.std(spec_bw))
            feature_dict['spectral_bandwidth_median'].append(np.median(spec_bw))
            feature_dict['spectral_bandwidth_min'].append(np.min(spec_bw))
            feature_dict['spectral_bandwidth_max'].append(np.max(spec_bw))

            # Spectral Rolloff
            feature_dict['spectral_rolloff_mean'].append(np.mean(spec_rolloff))
            feature_dict['spectral_rolloff_std'].append(np.std(spec_rolloff))
            feature_dict['spectral_rolloff_median'].append(np.median(spec_rolloff))
            feature_dict['spectral_rolloff_min'].append(np.min(spec_rolloff))
            feature_dict['spectral_rolloff_max'].append(np.max(spec_rolloff))

            # Zero Crossing Rate
            feature_dict['zero_crossing_rate_mean'].append(np.mean(zcr))
            feature_dict['zero_crossing_rate_std'].append(np.std(zcr))
            feature_dict['zero_crossing_rate_median'].append(np.median(zcr))
            feature_dict['zero_crossing_rate_min'].append(np.min(zcr))
            feature_dict['zero_crossing_rate_max'].append(np.max(zcr))

            # Chroma STFT
            feature_dict['chroma_stft_mean'].append(np.mean(chroma_stft))
            feature_dict['chroma_stft_std'].append(np.std(chroma_stft))
            feature_dict['chroma_stft_median'].append(np.median(chroma_stft))
            feature_dict['chroma_stft_min'].append(np.min(chroma_stft))
            feature_dict['chroma_stft_max'].append(np.max(chroma_stft))

            # MFCC
            mfcc_mean = np.mean(mfcc, axis=1)
            mfcc_std = np.std(mfcc, axis=1)
            mfcc_median = np.median(mfcc, axis=1)
            mfcc_min = np.min(mfcc, axis=1)
            mfcc_max = np.max(mfcc, axis=1)

            feature_dict['mfcc_mean'].append(mfcc_mean.tolist())
            feature_dict['mfcc_std'].append(mfcc_std.tolist())
            feature_dict['mfcc_median'].append(mfcc_median.tolist())
            feature_dict['mfcc_min'].append(mfcc_min.tolist())
            feature_dict['mfcc_max'].append(mfcc_max.tolist())

            # RMSE
            feature_dict['rmse_mean'].append(np.mean(rmse))
            feature_dict['rmse_std'].append(np.std(rmse))
            feature_dict['rmse_median'].append(np.median(rmse))
            feature_dict['rmse_min'].append(np.min(rmse))
            feature_dict['rmse_max'].append(np.max(rmse))

            # Inherent Emotion Features
            identifiers_only = wav_file.split('.')[0].split('-')
            feature_dict['emotion'].append(identifiers_only[2])
            feature_dict['emotional_intensity'].append(identifiers_only[3])

        print("Finished processing the actor directory: " + actor_dir)
    print("Finished processing all the audio files in the zip file " + zip_path.split('/')[1])

    actor_audio_df = pd.DataFrame(feature_dict)
    return actor_audio_df

# Example usage
zip_path = '/content/drive/MyDrive/Data_Mining_CMPE_255/ravdess_data.zip'
if os.path.exists(zip_path):
    actor_audio_df = process_audio_from_zip(zip_path)
    print('Saved the dataframe to actors_meta_df.csv in your Google Drive')
else:
    print('Zip file not found. Please check the path.')
print('Finished processing all the zip files in the directory')
actor_audio_df.to_csv('Audio_Summary_full.csv', index=False)
print('Saved the dataframe to actors_meta_df.csv')

Finished processing the actor directory: Actor_01
Finished processing the actor directory: Actor_02
Finished processing the actor directory: Actor_03
Finished processing the actor directory: Actor_04
Finished processing the actor directory: Actor_05
Finished processing the actor directory: Actor_06
Finished processing the actor directory: Actor_07
Finished processing the actor directory: Actor_08
Finished processing the actor directory: Actor_09
Finished processing the actor directory: Actor_10
Finished processing the actor directory: Actor_11
Finished processing the actor directory: Actor_12
Finished processing the actor directory: Actor_13
Finished processing the actor directory: Actor_14
Finished processing the actor directory: Actor_15
Finished processing the actor directory: Actor_16
Finished processing the actor directory: Actor_17
Finished processing the actor directory: Actor_18
Finished processing the actor directory: Actor_19
Finished processing the actor directory: Actor_20


In [81]:
actor_audio_df.head()

Unnamed: 0,tempo,onset_env_mean,onset_env_std,onset_env_median,onset_env_min,onset_env_max,spectral_centroid_mean,spectral_centroid_std,spectral_centroid_median,spectral_centroid_min,...,mfcc_median,mfcc_min,mfcc_max,rmse_mean,rmse_std,rmse_median,rmse_min,rmse_max,emotion,emotional_intensity
0,187.5,0.78633,1.61718,0.217788,0.0,14.334495,7416.297747,4428.027506,9147.820676,0.0,...,"[-838.5758056640625, 13.969961166381836, 0.016...","[-861.5325927734375, -85.97047424316406, -59.6...","[-432.15478515625, 236.70388793945312, 57.2677...",0.00212,0.003391,2.9e-05,0.0,0.015828,1,1
1,106.132075,0.868278,1.772298,0.286859,0.0,17.95422,7135.571471,4196.796438,8052.809002,850.770545,...,"[-823.7044677734375, 27.333763122558594, 0.665...","[-861.4439697265625, -14.97285270690918, -102....","[-417.70416259765625, 218.36373901367188, 55.4...",0.002258,0.003638,4.2e-05,1.348699e-06,0.016505,1,1
2,82.720588,0.733591,1.675118,0.209506,0.0,22.206697,7239.265648,4333.681827,9010.389014,0.0,...,"[-838.718994140625, 11.377859115600586, 0.2480...","[-851.2338256835938, -45.1416015625, -44.75405...","[-412.7435302734375, 229.02920532226562, 51.82...",0.002707,0.004298,2e-05,0.0,0.01936,1,1
3,66.176471,0.787081,1.559124,0.331623,0.0,15.080771,7008.958169,3975.417498,8415.361699,1001.254214,...,"[-827.2327880859375, 17.51581573486328, 3.1093...","[-847.8671264648438, -37.103309631347656, -46....","[-388.3479309082031, 216.24066162109375, 63.95...",0.002521,0.004178,2.4e-05,9.536743e-07,0.018674,1,1
4,112.5,0.873043,1.742576,0.413582,0.0,15.459251,6997.31181,4373.360791,7491.276195,0.0,...,"[-854.6892700195312, 40.637351989746094, 2.093...","[-917.4026489257812, -18.632596969604492, -63....","[-454.3965148925781, 230.4322052001953, 62.228...",0.001579,0.002497,4.8e-05,0.0,0.011612,2,1


## **Importing the Dataset**

In [57]:
import pandas as pd

File = '/content/drive/My Drive/Data_Mining_CMPE_255/Audio_Summary_full.csv'

# Save the DataFrame as a CSV file
data = pd.read_csv(File)

In [58]:
#Encode the MFCC Features
mfcc_features = ['mfcc_mean', 'mfcc_std', 'mfcc_median', 'mfcc_min', 'mfcc_max']
for feature in mfcc_features:
    data[feature] = data[feature].apply(lambda x: eval(x))  # Convert string representation to lists
    mfcc_cols = [f'{feature}_{i+1}' for i in range(len(data[feature][0]))]
    data[mfcc_cols] = pd.DataFrame(data[feature].tolist(), index=data.index)  # Expand lists into columns
data.drop(columns=mfcc_features, inplace=True)  # Drop original MFCC columns

  data[mfcc_cols] = pd.DataFrame(data[feature].tolist(), index=data.index)  # Expand lists into columns
  data[mfcc_cols] = pd.DataFrame(data[feature].tolist(), index=data.index)  # Expand lists into columns
  data[mfcc_cols] = pd.DataFrame(data[feature].tolist(), index=data.index)  # Expand lists into columns
  data[mfcc_cols] = pd.DataFrame(data[feature].tolist(), index=data.index)  # Expand lists into columns
  data[mfcc_cols] = pd.DataFrame(data[feature].tolist(), index=data.index)  # Expand lists into columns
  data[mfcc_cols] = pd.DataFrame(data[feature].tolist(), index=data.index)  # Expand lists into columns
  data[mfcc_cols] = pd.DataFrame(data[feature].tolist(), index=data.index)  # Expand lists into columns


In [59]:
data.head()

Unnamed: 0,tempo,onset_env_mean,onset_env_std,onset_env_median,onset_env_min,onset_env_max,spectral_centroid_mean,spectral_centroid_std,spectral_centroid_median,spectral_centroid_min,...,mfcc_max_11,mfcc_max_12,mfcc_max_13,mfcc_max_14,mfcc_max_15,mfcc_max_16,mfcc_max_17,mfcc_max_18,mfcc_max_19,mfcc_max_20
0,144.230769,0.932868,1.777812,0.373425,0.0,12.580906,5005.760329,4152.626605,3494.602511,0.0,...,8.835346,21.117416,13.364576,12.640429,6.615581,11.186466,15.153045,10.03234,7.776146,11.752613
1,165.441176,0.942505,1.932295,0.411716,0.0,22.585564,4821.836276,3961.762127,3232.207927,0.0,...,9.273089,20.921482,20.118553,14.089436,10.1136,15.664388,11.618687,13.624983,7.748759,17.45978
2,82.720588,0.90214,1.631991,0.484324,0.0,16.705038,4229.363715,4040.571673,2051.074495,0.0,...,16.235313,13.897518,9.207255,5.922969,4.848257,12.668896,19.154112,4.841742,5.554036,6.241574
3,122.282609,1.034313,1.941458,0.598841,0.0,20.15659,4461.636371,3384.983754,4149.625976,0.0,...,13.729324,14.880929,18.190212,10.610459,12.279799,15.166858,16.277058,15.276653,13.387456,8.160851
4,156.25,1.101004,2.097226,0.667383,0.0,19.12117,4406.057782,3376.567932,3955.306859,0.0,...,16.444546,18.992033,13.990599,20.981964,8.798305,19.021706,14.557951,14.275255,17.648052,13.84836


In [60]:
data.shape

(1440, 138)

In [61]:
# Map the Emotion Labels
emotion_mapping = {
    1: 'neutral',
    2: 'calm',
    3: 'happy',
    4: 'sad',
    5: 'angry',
    6: 'fearful',
    7: 'disgust',
    8: 'surprised'
}
data['emotion'] = data['emotion'].map(emotion_mapping)

# Map the Emotional Intensity Label
intensity_mapping = {1: 0, 2: 1}
data['emotional_intensity'] = data['emotional_intensity'].map(intensity_mapping)

In [62]:
data.emotion.unique()

array(['neutral', 'calm', 'happy', 'sad', 'angry', 'disgust', 'surprised',
       'fearful'], dtype=object)

In [63]:
# One-Hot Encoding for Emotion Labels
emotion_one_hot = pd.get_dummies(data['emotion'], prefix='emotion')
data = pd.concat([data, emotion_one_hot], axis=1)
data.drop('emotion', axis=1, inplace=True)  # Drop original emotion column

In [64]:
data.shape

(1440, 145)

In [65]:
data.head()

Unnamed: 0,tempo,onset_env_mean,onset_env_std,onset_env_median,onset_env_min,onset_env_max,spectral_centroid_mean,spectral_centroid_std,spectral_centroid_median,spectral_centroid_min,...,mfcc_max_19,mfcc_max_20,emotion_angry,emotion_calm,emotion_disgust,emotion_fearful,emotion_happy,emotion_neutral,emotion_sad,emotion_surprised
0,144.230769,0.932868,1.777812,0.373425,0.0,12.580906,5005.760329,4152.626605,3494.602511,0.0,...,7.776146,11.752613,0,0,0,0,0,1,0,0
1,165.441176,0.942505,1.932295,0.411716,0.0,22.585564,4821.836276,3961.762127,3232.207927,0.0,...,7.748759,17.45978,0,0,0,0,0,1,0,0
2,82.720588,0.90214,1.631991,0.484324,0.0,16.705038,4229.363715,4040.571673,2051.074495,0.0,...,5.554036,6.241574,0,0,0,0,0,1,0,0
3,122.282609,1.034313,1.941458,0.598841,0.0,20.15659,4461.636371,3384.983754,4149.625976,0.0,...,13.387456,8.160851,0,1,0,0,0,0,0,0
4,156.25,1.101004,2.097226,0.667383,0.0,19.12117,4406.057782,3376.567932,3955.306859,0.0,...,17.648052,13.84836,0,1,0,0,0,0,0,0


## **Normalizing**

In [66]:
columns = data.columns.values.tolist()
for column in ['emotion_neutral', 'emotion_calm', 'emotion_happy', 'emotion_sad', 'emotion_angry', 'emotion_disgust', 'emotion_surprised', 'emotion_fearful', 'emotional_intensity']:
  columns.remove(column)
len(columns)

136

In [67]:
from sklearn.preprocessing import StandardScaler

scaler = StandardScaler()

data[columns] = scaler.fit_transform(data[columns])

In [68]:
data.head()

Unnamed: 0,tempo,onset_env_mean,onset_env_std,onset_env_median,onset_env_min,onset_env_max,spectral_centroid_mean,spectral_centroid_std,spectral_centroid_median,spectral_centroid_min,...,mfcc_max_19,mfcc_max_20,emotion_angry,emotion_calm,emotion_disgust,emotion_fearful,emotion_happy,emotion_neutral,emotion_sad,emotion_surprised
0,0.374243,-0.33276,0.261798,-0.437075,0.0,-1.140906,-0.684409,1.197786,-1.260809,-1.073469,...,-1.076724,-0.577233,0,0,0,0,0,1,0,0
1,0.863181,-0.253371,0.924194,-0.251321,0.0,1.607811,-0.911456,0.876178,-1.431539,-1.073469,...,-1.079252,-0.087085,0,0,0,0,0,1,0,0
2,-1.043677,-0.585887,-0.363455,0.100917,0.0,-0.007827,-1.64284,1.008973,-2.200055,-1.073469,...,-1.281879,-1.050536,0,0,0,0,0,1,0,0
3,-0.131701,0.502915,0.963484,0.656464,0.0,0.940466,-1.356109,-0.095697,-0.834611,-1.073469,...,-0.558661,-0.885703,0,1,0,0,0,0,0,0
4,0.651308,1.052303,1.631391,0.988971,0.0,0.655991,-1.424718,-0.109878,-0.961047,-1.073469,...,-0.165303,-0.397244,0,1,0,0,0,0,0,0


In [69]:
data.describe()

Unnamed: 0,tempo,onset_env_mean,onset_env_std,onset_env_median,onset_env_min,onset_env_max,spectral_centroid_mean,spectral_centroid_std,spectral_centroid_median,spectral_centroid_min,...,mfcc_max_19,mfcc_max_20,emotion_angry,emotion_calm,emotion_disgust,emotion_fearful,emotion_happy,emotion_neutral,emotion_sad,emotion_surprised
count,1440.0,1440.0,1440.0,1440.0,1440.0,1440.0,1440.0,1440.0,1440.0,1440.0,...,1440.0,1440.0,1440.0,1440.0,1440.0,1440.0,1440.0,1440.0,1440.0,1440.0
mean,-2.960595e-16,4.736952e-16,8.783098e-16,3.552714e-16,0.0,-4.71228e-16,4.342206e-16,5.329071e-16,-3.4540270000000003e-17,9.868649e-18,...,-3.4540270000000003e-17,3.94746e-17,0.133333,0.133333,0.133333,0.133333,0.133333,0.066667,0.133333,0.133333
std,1.000347,1.000347,1.000347,1.000347,0.0,1.000347,1.000347,1.000347,1.000347,1.000347,...,1.000347,1.000347,0.340053,0.340053,0.340053,0.340053,0.340053,0.24953,0.340053,0.340053
min,-1.869982,-2.39502,-3.028827,-2.248638,0.0,-2.822565,-3.648847,-3.261141,-2.374124,-1.073469,...,-1.635435,-1.239399,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
25%,-0.824857,-0.7129698,-0.6513339,-0.7233546,0.0,-0.705314,-0.7274588,-0.6278745,-0.6903353,-1.073469,...,-0.6231255,-0.6028806,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
50%,-0.06906053,-0.1086645,-0.05137057,-0.06379396,0.0,-0.09446851,0.01869139,0.1378324,-0.008544116,0.3290369,...,-0.2035374,-0.3151892,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
75%,0.7542177,0.5652229,0.6274472,0.7339646,0.0,0.631217,0.7163379,0.7227428,0.6801746,0.8337458,...,0.2570672,0.141104,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
max,2.943389,4.631083,3.315785,3.310263,0.0,4.031471,2.586389,2.32195,2.819871,2.921285,...,4.510888,4.129274,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0


In [70]:
data_emotion = data.copy()
data_emotional_intensity = data.copy()

## **Modifying the data for individual modeling tasks and Saving them**

In [71]:
columns_to_drop = ['emotion_neutral', 'emotion_calm', 'emotion_happy', 'emotion_sad', 'emotion_angry', 'emotion_disgust', 'emotion_surprised', 'emotion_fearful']

# Drop the columns
data_emotional_intensity.drop(columns_to_drop, axis=1, inplace=True)

data_emotional_intensity.shape

(1440, 137)

In [72]:
output_csv_file = '/content/drive/My Drive/Data_Mining_CMPE_255/data_emotional_intensity.csv'

# Save the DataFrame as a CSV file
data_emotional_intensity.to_csv(output_csv_file, index=False)

In [73]:
columns_to_drop = ['emotional_intensity']

# Drop the columns
data_emotion.drop(columns_to_drop, axis=1, inplace=True)

data_emotion.shape

(1440, 144)

In [74]:
output_csv_file = '/content/drive/My Drive/Data_Mining_CMPE_255/data_emotion.csv'

# Save the DataFrame as a CSV file
data_emotion.to_csv(output_csv_file, index=False)