# Modify

In [1]:
import numpy as np
import os
import json
import librosa
import pandas as pd
import zipfile
import shutil

## Extracting the statistical summary and target features
This data is loaded from the Explore stage, so running the previous stage's notebooks is necessary.

### Importing the Dataset

In [2]:
File = '../01_Sample/actors_meta_df.csv'

# Save the DataFrame as a CSV file
data = pd.read_csv(File)
# testing that the shards are converted to the correct format (json string)
# convert the json stringa back to a list of lists for the list of list features using json.loads
data['spectral_centroid'] = data['spectral_centroid'].apply(lambda x: json.loads(x))
data['spectral_bandwidth'] = data['spectral_bandwidth'].apply(lambda x: json.loads(x))
data['spectral_rolloff'] = data['spectral_rolloff'].apply(lambda x: json.loads(x))
data['zero_crossing_rate'] = data['zero_crossing_rate'].apply(lambda x: json.loads(x))
data['chroma_stft'] = data['chroma_stft'].apply(lambda x: json.loads(x))
data['mfcc'] = data['mfcc'].apply(lambda x: json.loads(x))
data['rmse'] = data['rmse'].apply(lambda x: json.loads(x))
data['onset_env'] = data['onset_env'].apply(lambda x: json.loads(x))
data['y'] = data['y'].apply(lambda x: json.loads(x))

In [3]:
# create new columns [_mean, _std, _min, _max, _median] for each list feature ['spectral_centroid', 'spectral_bandwidth', 'spectral_rolloff', 'zero_crossing_rate', 'chroma_stft', 'mfcc', 'rmse', 'onset_env', 'y']
for feature in ['spectral_centroid', 'spectral_bandwidth', 'spectral_rolloff', 'zero_crossing_rate', 'chroma_stft', 'mfcc', 'rmse', 'onset_env']:
    data[feature+'_mean'] = data[feature].apply(lambda x: np.mean(x))
    data[feature+'_std'] = data[feature].apply(lambda x: np.std(x))
    data[feature+'_min'] = data[feature].apply(lambda x: np.min(x))
    data[feature+'_max'] = data[feature].apply(lambda x: np.max(x))
    data[feature+'_median'] = data[feature].apply(lambda x: np.median(x))

In [5]:
# selecting only the new columns [_mean, _std, _min, _max, _median] for each list feature ['spectral_centroid', 'spectral_bandwidth', 'spectral_rolloff', 'zero_crossing_rate', 'chroma_stft', 'mfcc', 'rmse', 'onset_env', 'y'
# as well as the emotion, emotional_intensity, and tempo
data = data.loc[:, ['emotion', 'emotional_intensity', 'tempo', 'spectral_centroid_mean', 'spectral_centroid_std', 'spectral_centroid_min', 'spectral_centroid_max', 'spectral_centroid_median', 'spectral_bandwidth_mean', 'spectral_bandwidth_std', 'spectral_bandwidth_min', 'spectral_bandwidth_max', 'spectral_bandwidth_median', 'spectral_rolloff_mean', 'spectral_rolloff_std', 'spectral_rolloff_min', 'spectral_rolloff_max', 'spectral_rolloff_median', 'zero_crossing_rate_mean', 'zero_crossing_rate_std', 'zero_crossing_rate_min', 'zero_crossing_rate_max', 'zero_crossing_rate_median', 'chroma_stft_mean', 'chroma_stft_std', 'chroma_stft_min', 'chroma_stft_max', 'chroma_stft_median', 'mfcc_mean', 'mfcc_std', 'mfcc_min', 'mfcc_max', 'mfcc_median', 'rmse_mean', 'rmse_std', 'rmse_min', 'rmse_max', 'rmse_median', 'onset_env_mean', 'onset_env_std', 'onset_env_min', 'onset_env_max', 'onset_env_median']]
data.to_csv('../03_Modify/actors_meta_data_selected.csv', index=False)

In [6]:
data.shape

(1440, 43)

In [7]:
# # Map the Emotion Labels
# emotion_mapp?ing = {
#     1: 'neutral',
#     2: 'calm',
#     3: 'happy',
#     4: 'sad',
#     5: 'angry',
#     6: 'fearful',
#     7: 'disgust',
#     8: 'surprised'
# }
# data['emotion'] = data['emotion'].map(emotion_mapping)

# Map the Emotional Intensity Label
intensity_mapping = {1: 0, 2: 1}
data['emotional_intensity'] = data['emotional_intensity'].map(intensity_mapping)

In [8]:
data.emotion.unique()

array([1, 2, 3, 4, 7, 6, 5, 8])

## No One hot encoding

In [9]:
# # One-Hot Encoding for Emotion Labels
# emotion_one_hot = pd.get_dummies(data['emotion'], prefix='emotion')
# data = pd.concat([data, emotion_one_hot], axis=1)
# data.drop('emotion', axis=1, inplace=True)  # Drop original emotion column

In [10]:
data.head()

Unnamed: 0,emotion,emotional_intensity,tempo,spectral_centroid_mean,spectral_centroid_std,spectral_centroid_min,spectral_centroid_max,spectral_centroid_median,spectral_bandwidth_mean,spectral_bandwidth_std,...,rmse_mean,rmse_std,rmse_min,rmse_max,rmse_median,onset_env_mean,onset_env_std,onset_env_min,onset_env_max,onset_env_median
0,1,0,83.354335,2377.96242,1349.696708,539.810952,6476.505326,1927.128621,2665.995141,600.401532,...,0.002277,0.002771,1.6e-05,0.007836,0.000276,1.35892,2.25539,0.0,18.253141,0.551198
1,2,0,71.777344,2705.783918,1353.17443,597.545069,6158.1882,2728.42385,2681.489089,642.217949,...,0.001943,0.002575,9e-06,0.0104,0.000146,1.365531,2.406745,0.0,20.316032,0.644791
2,2,1,99.384014,2597.143004,1413.81886,622.974845,7729.98017,2453.81755,2679.064326,569.657279,...,0.001035,0.001184,1.4e-05,0.004912,0.000432,1.449689,2.15889,0.0,15.330427,0.797497
3,3,0,161.499023,2510.694239,1168.110694,678.216364,5654.569378,2449.62492,2721.913688,691.472495,...,0.003573,0.00479,2.4e-05,0.017239,0.000115,1.187693,2.341018,0.0,17.443775,0.260358
4,3,1,51.679688,2496.662203,1228.672133,740.978888,7111.164914,2452.070635,2662.152183,671.015419,...,0.013955,0.021002,1.9e-05,0.095639,0.000175,1.121399,2.397961,0.0,20.576365,0.109023


In [11]:
data['emotion']

0       1
1       2
2       2
3       3
4       3
       ..
1435    1
1436    2
1437    2
1438    3
1439    3
Name: emotion, Length: 1440, dtype: int64

## **Normalizing**

In [12]:
# columns = data.columns.values.tolist()
# for column in ['emotion_neutral', 'emotion_calm', 'emotion_happy', 'emotion_sad', 'emotion_angry', 'emotion_disgust', 'emotion_surprised', 'emotion_fearful', 'emotional_intensity']:
#   columns.remove(column)
# len(columns)

In [13]:
columns = data.columns.values.tolist()
columns.remove('emotion')
columns.remove('emotional_intensity')

from sklearn.preprocessing import StandardScaler

scaler = StandardScaler()

data[columns] = scaler.fit_transform(data[columns])

In [14]:
data.head()

Unnamed: 0,emotion,emotional_intensity,tempo,spectral_centroid_mean,spectral_centroid_std,spectral_centroid_min,spectral_centroid_max,spectral_centroid_median,spectral_bandwidth_mean,spectral_bandwidth_std,...,rmse_mean,rmse_std,rmse_min,rmse_max,rmse_median,onset_env_mean,onset_env_std,onset_env_min,onset_env_max,onset_env_median
0,1,0,-1.002005,-1.172356,-0.202435,0.097613,-0.223804,-1.621565,1.249141,-0.611033,...,-0.638078,-0.642855,0.210754,-0.664915,-0.389027,-0.131641,-0.550568,0.0,0.02452,0.449809
1,2,0,-1.280322,-0.218384,-0.189966,0.238719,-0.496479,-0.086782,1.320176,-0.392261,...,-0.663971,-0.653625,-0.03458,-0.630356,-0.430968,-0.088909,-0.089261,0.0,0.544724,0.845361
2,2,1,-0.616644,-0.534533,0.027479,0.300872,0.849939,-0.612757,1.309059,-0.771878,...,-0.734289,-0.729827,0.117858,-0.704313,-0.338513,0.455054,-0.844688,0.0,-0.712507,1.490745
3,3,0,0.876629,-0.786102,-0.853524,0.435886,-0.927885,-0.620787,1.505509,-0.134576,...,-0.537629,-0.53225,0.518485,-0.538188,-0.440894,-1.238395,-0.289588,0.0,-0.17958,-0.779366
4,3,1,-1.763479,-0.826936,-0.636377,0.589282,0.319854,-0.616103,1.231523,-0.241601,...,0.26678,0.355998,0.307257,0.518379,-0.421367,-1.666901,-0.116032,0.0,0.610373,-1.418954


In [15]:
data_emotion = data.copy()
data_emotional_intensity = data.copy()

In [16]:
data_emotional_intensity.emotional_intensity

0       0
1       0
2       1
3       0
4       1
       ..
1435    0
1436    0
1437    1
1438    0
1439    1
Name: emotional_intensity, Length: 1440, dtype: int64

## **Modifying the data for individual modeling tasks and Saving them**

new

In [17]:
columns_to_drop = ['emotion']

# Drop the columns
data_emotional_intensity = data.drop(columns_to_drop, axis=1)

data_emotional_intensity.shape

(1440, 42)

In [18]:
output_csv_file = './data_emotional_intensity.csv'

# Save the DataFrame as a CSV file
data_emotional_intensity.to_csv(output_csv_file, index=False)

In [19]:
columns_to_drop = ['emotional_intensity']

# Drop the columns
data_emotion = data.drop(columns_to_drop, axis=1)

data_emotion.shape

(1440, 42)

In [20]:
output_csv_file = './data_emotion.csv'

# Save the DataFrame as a CSV file
data_emotion.to_csv(output_csv_file, index=False)