#### Dataset from publication

In [None]:
import pandas as pd
import numpy as np
pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', None)

# load dataset
path = '../dataset/02 Dataset.csv'
df = pd.read_csv(path, sep=';')

# create unique audio id
dataset = df.copy()
audio_id = dataset['ID'].astype(str) + '_' + dataset['Trigger_counter'].astype(str)
dataset.insert(2, 'file_name', audio_id)
dataset.head()

In [2]:
# keep only specific columns
to_keep = ['file_name', 'Soundscape_eventfulness', 'Soundscape_pleasantness', 'BGpleasant', 'BGchaotic', 'BGvibrant', 'BGuneventful', 'BGcalm', 'BGannoying', 
           'BGeventful', 'BGmonotonous', 'SC_Nature', 'SC_Human', 'SC_Household', 'SC_Installation', 'SC_Signals', 'SC_Traffic', 'SC_Speech', 
           'SC_Music', 'FGsource', 'Activity',  'Location8']

dataset = dataset[to_keep]
dataset.head()

Unnamed: 0,file_name,Soundscape_eventfulness,Soundscape_pleasantness,BGpleasant,BGchaotic,BGvibrant,BGuneventful,BGcalm,BGannoying,BGeventful,BGmonotonous,SC_Nature,SC_Human,SC_Household,SC_Installation,SC_Signals,SC_Traffic,SC_Speech,SC_Music,FGsource,Activity,Location8
0,1132730_6,-8.242641,-2.0,1,0,1,4,3,3,0,4,0,0,6,0,0,0,0,0,Wanduhr analog mit großen Zeigern,1_Cooking_housework_workout,2_Kitchen
1,1132730_8,-6.828427,-5.828427,1,0,0,4,0,4,0,4,0,0,8,0,0,0,0,0,Wanduhr analog,1_Cooking_housework_workout,2_Kitchen
2,1132730_12,-4.12132,5.12132,3,0,3,3,3,0,1,3,0,0,7,0,0,0,0,0,Herd,1_Cooking_housework_workout,2_Kitchen
3,1132730_13,7.949747,-4.707107,0,4,3,0,0,4,3,0,0,0,9,0,0,0,0,0,Tiefkühlschrank,2_Concentrated_mental_work,2_Kitchen
4,1132730_16,3.12132,-4.12132,1,3,3,2,0,3,3,3,0,0,10,0,0,0,0,0,Wäschetrockner,1_Cooking_housework_workout,3_Bathroom


In [3]:
# rename columns
to_rename = {'Soundscape_eventfulness':'ISO_Eventfulness', 'Soundscape_pleasantness': 'ISO_Pleasantness', 
             'BGpleasant':'pleasant', 'BGchaotic':'chaotic', 'BGvibrant':'vibrant', 'BGuneventful':'uneventful', 
             'BGcalm':'calm', 'BGannoying':'annoying', 'BGeventful':'eventful', 'BGmonotonous':'monotonous'}

dataset.rename(columns=to_rename, inplace=True)
dataset.head()

Unnamed: 0,file_name,ISO_Eventfulness,ISO_Pleasantness,pleasant,chaotic,vibrant,uneventful,calm,annoying,eventful,monotonous,SC_Nature,SC_Human,SC_Household,SC_Installation,SC_Signals,SC_Traffic,SC_Speech,SC_Music,FGsource,Activity,Location8
0,1132730_6,-8.242641,-2.0,1,0,1,4,3,3,0,4,0,0,6,0,0,0,0,0,Wanduhr analog mit großen Zeigern,1_Cooking_housework_workout,2_Kitchen
1,1132730_8,-6.828427,-5.828427,1,0,0,4,0,4,0,4,0,0,8,0,0,0,0,0,Wanduhr analog,1_Cooking_housework_workout,2_Kitchen
2,1132730_12,-4.12132,5.12132,3,0,3,3,3,0,1,3,0,0,7,0,0,0,0,0,Herd,1_Cooking_housework_workout,2_Kitchen
3,1132730_13,7.949747,-4.707107,0,4,3,0,0,4,3,0,0,0,9,0,0,0,0,0,Tiefkühlschrank,2_Concentrated_mental_work,2_Kitchen
4,1132730_16,3.12132,-4.12132,1,3,3,2,0,3,3,3,0,0,10,0,0,0,0,0,Wäschetrockner,1_Cooking_housework_workout,3_Bathroom


In [4]:
# add duration in seconds and suffix:
dataset.insert(1, 'duration_s', 15)
dataset.insert(2, 'suffix', '.wav')
dataset.head()

Unnamed: 0,file_name,duration_s,suffix,ISO_Eventfulness,ISO_Pleasantness,pleasant,chaotic,vibrant,uneventful,calm,annoying,eventful,monotonous,SC_Nature,SC_Human,SC_Household,SC_Installation,SC_Signals,SC_Traffic,SC_Speech,SC_Music,FGsource,Activity,Location8
0,1132730_6,15,.wav,-8.242641,-2.0,1,0,1,4,3,3,0,4,0,0,6,0,0,0,0,0,Wanduhr analog mit großen Zeigern,1_Cooking_housework_workout,2_Kitchen
1,1132730_8,15,.wav,-6.828427,-5.828427,1,0,0,4,0,4,0,4,0,0,8,0,0,0,0,0,Wanduhr analog,1_Cooking_housework_workout,2_Kitchen
2,1132730_12,15,.wav,-4.12132,5.12132,3,0,3,3,3,0,1,3,0,0,7,0,0,0,0,0,Herd,1_Cooking_housework_workout,2_Kitchen
3,1132730_13,15,.wav,7.949747,-4.707107,0,4,3,0,0,4,3,0,0,0,9,0,0,0,0,0,Tiefkühlschrank,2_Concentrated_mental_work,2_Kitchen
4,1132730_16,15,.wav,3.12132,-4.12132,1,3,3,2,0,3,3,3,0,0,10,0,0,0,0,0,Wäschetrockner,1_Cooking_housework_workout,3_Bathroom


In [5]:
# create new value ranges of soundscape items
def range_zero_to_four(x):
    return (x / (4 + np.sqrt(32)) + 1) * 2

def sc_range(x):
    x = round(x * 0.4, 1)

    return x.astype(float)

dataset['ISO_Eventfulness'] = dataset['ISO_Eventfulness'].apply(range_zero_to_four)
dataset['ISO_Pleasantness'] = dataset['ISO_Pleasantness'].apply(range_zero_to_four)

col = ['SC_Nature', 'SC_Human', 'SC_Household', 'SC_Installation', 'SC_Signals', 'SC_Traffic', 'SC_Speech', 'SC_Music']
dataset[col] = dataset[col].apply(sc_range)

dataset.head()

Unnamed: 0,file_name,duration_s,suffix,ISO_Eventfulness,ISO_Pleasantness,pleasant,chaotic,vibrant,uneventful,calm,annoying,eventful,monotonous,SC_Nature,SC_Human,SC_Household,SC_Installation,SC_Signals,SC_Traffic,SC_Speech,SC_Music,FGsource,Activity,Location8
0,1132730_6,15,.wav,0.292893,1.585786,1,0,1,4,3,3,0,4,0.0,0.0,2.4,0.0,0.0,0.0,0.0,0.0,Wanduhr analog mit großen Zeigern,1_Cooking_housework_workout,2_Kitchen
1,1132730_8,15,.wav,0.585786,0.792893,1,0,0,4,0,4,0,4,0.0,0.0,3.2,0.0,0.0,0.0,0.0,0.0,Wanduhr analog,1_Cooking_housework_workout,2_Kitchen
2,1132730_12,15,.wav,1.146447,3.06066,3,0,3,3,3,0,1,3,0.0,0.0,2.8,0.0,0.0,0.0,0.0,0.0,Herd,1_Cooking_housework_workout,2_Kitchen
3,1132730_13,15,.wav,3.646447,1.025126,0,4,3,0,0,4,3,0,0.0,0.0,3.6,0.0,0.0,0.0,0.0,0.0,Tiefkühlschrank,2_Concentrated_mental_work,2_Kitchen
4,1132730_16,15,.wav,2.646447,1.146447,1,3,3,2,0,3,3,3,0.0,0.0,4.0,0.0,0.0,0.0,0.0,0.0,Wäschetrockner,1_Cooking_housework_workout,3_Bathroom


In [6]:
# clean acoustic dataset
path = '../dataset/AcousticFeatures_SingleValues.csv'
acoustic_dataset = pd.read_csv(path, sep=';')

# items to keep from acoustic dataset
columns_to_select = ['Key', 'Channel', 'LAeq_default', 'N5_default', 'FavgArith_default', 'RAavgArith', 'SavgArith_default', 'R_default', 'T_default']
acoustic_dataset = acoustic_dataset[columns_to_select]

acoustic_dataset_max_values = acoustic_dataset.groupby('Key').max().reset_index()
acoustic_dataset_max_values = acoustic_dataset_max_values.drop(columns=['Channel'])
acoustic_dataset_max_values.head()

# calculate mean of both audio channels
#acoustic_dataset = acoustic_dataset.groupby('Key').mean().reset_index()
# acoustic_dataset = acoustic_dataset.drop(columns=['Channel'])
# acoustic_dataset.head()

Unnamed: 0,Key,LAeq_default,N5_default,FavgArith_default,RAavgArith,SavgArith_default,R_default,T_default
0,1132730_102,54.08,9.61,0.0304,9.94,2.37,0.196,0.0571
1,1132730_105,62.7,19.3,0.0428,14.6,1.65,0.0968,0.272
2,1132730_12,27.54,0.565,0.0232,2.84,0.816,0.132,0.0408
3,1132730_122,54.47,11.1,0.0124,4.32,1.31,0.309,0.267
4,1132730_124,61.0,15.8,0.074,7.42,2.49,0.199,0.412


In [None]:
# merge dataset and acoustic_dataset
final_dataset = dataset.merge(acoustic_dataset_max_values, left_on='file_name', right_on='Key', how='left')
final_dataset = final_dataset.drop(columns=['Key'])
final_dataset.head()

Unnamed: 0,file_name,duration_s,suffix,ISO_Eventfulness,ISO_Pleasantness,pleasant,chaotic,vibrant,uneventful,calm,annoying,eventful,monotonous,SC_Nature,SC_Human,SC_Household,SC_Installation,SC_Signals,SC_Traffic,SC_Speech,SC_Music,FGsource,Activity,Location8,LAeq_default,N5_default,FavgArith_default,RAavgArith,SavgArith_default,R_default,T_default
0,1132730_6,15,.wav,0.292893,1.585786,1,0,1,4,3,3,0,4,0.0,0.0,2.4,0.0,0.0,0.0,0.0,0.0,Wanduhr analog mit großen Zeigern,1_Cooking_housework_workout,2_Kitchen,16.11,0.147,0.00662,1.21,0.825,0.00522,0.0271
1,1132730_8,15,.wav,0.585786,0.792893,1,0,0,4,0,4,0,4,0.0,0.0,3.2,0.0,0.0,0.0,0.0,0.0,Wanduhr analog,1_Cooking_housework_workout,2_Kitchen,16.11,0.156,0.00779,1.34,0.846,0.00168,0.0279
2,1132730_12,15,.wav,1.146447,3.06066,3,0,3,3,3,0,1,3,0.0,0.0,2.8,0.0,0.0,0.0,0.0,0.0,Herd,1_Cooking_housework_workout,2_Kitchen,27.54,0.565,0.0232,2.84,0.816,0.132,0.0408
3,1132730_13,15,.wav,3.646447,1.025126,0,4,3,0,0,4,3,0,0.0,0.0,3.6,0.0,0.0,0.0,0.0,0.0,Tiefkühlschrank,2_Concentrated_mental_work,2_Kitchen,33.07,1.93,0.00623,0.999,1.12,0.239,0.426
4,1132730_16,15,.wav,2.646447,1.146447,1,3,3,2,0,3,3,3,0.0,0.0,4.0,0.0,0.0,0.0,0.0,0.0,Wäschetrockner,1_Cooking_housework_workout,3_Bathroom,56.0,11.3,0.0188,7.39,1.06,0.196,0.465


In [10]:
final_dataset.to_csv('../dataset/FinalDataset.csv', sep=';', index=False)

#### Create audio time-variant dataset

#### Merge dataset from publication and audio time-variant dataset