<h1>Table of Contents<span class="tocSkip"></span></h1>
<div class="toc"><ul class="toc-item"><li><span><a href="#Libraries" data-toc-modified-id="Libraries-1"><span class="toc-item-num">1&nbsp;&nbsp;</span>Libraries</a></span></li><li><span><a href="#Get-files-and-genres" data-toc-modified-id="Get-files-and-genres-2"><span class="toc-item-num">2&nbsp;&nbsp;</span>Get files and genres</a></span></li><li><span><a href="#Getting-and-normalizing-features" data-toc-modified-id="Getting-and-normalizing-features-3"><span class="toc-item-num">3&nbsp;&nbsp;</span>Getting and normalizing features</a></span><ul class="toc-item"><li><span><a href="#Zero-crossing" data-toc-modified-id="Zero-crossing-3.1"><span class="toc-item-num">3.1&nbsp;&nbsp;</span>Zero crossing</a></span></li><li><span><a href="#Spectral-Centroid" data-toc-modified-id="Spectral-Centroid-3.2"><span class="toc-item-num">3.2&nbsp;&nbsp;</span>Spectral Centroid</a></span></li><li><span><a href="#Spectral-Rolloff" data-toc-modified-id="Spectral-Rolloff-3.3"><span class="toc-item-num">3.3&nbsp;&nbsp;</span>Spectral Rolloff</a></span></li><li><span><a href="#Mel-Frequency-Cepstral-Coefficients-(MFCC)" data-toc-modified-id="Mel-Frequency-Cepstral-Coefficients-(MFCC)-3.4"><span class="toc-item-num">3.4&nbsp;&nbsp;</span>Mel-Frequency Cepstral Coefficients (MFCC)</a></span></li><li><span><a href="#Chroma-stft" data-toc-modified-id="Chroma-stft-3.5"><span class="toc-item-num">3.5&nbsp;&nbsp;</span>Chroma stft</a></span></li></ul></li><li><span><a href="#Export-DataFrame-to-.CSV" data-toc-modified-id="Export-DataFrame-to-.CSV-4"><span class="toc-item-num">4&nbsp;&nbsp;</span>Export DataFrame to .CSV</a></span></li></ul></div>

# Libraries

In [1]:
import librosa
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
# For iterating through files
import os
# For audio playing
import IPython.display as ipd
# For visualizations
import librosa.display

# Get files and genres 

In [2]:
# Get genres

directory = 'data'
genres = []
for entry in os.scandir(directory):
    # echar un ojo para evitar .DS_Store
    if entry.name != ".DS_Store":
        genres.append(entry.name)
print(genres)

['Supernatural', 'Adventure', 'Horror', 'Action', 'Beautiful', 'Drama']


In [3]:
# Get a dictionary -> key=genre , value = list of songs

songs_dict2 = {}
for gen in genres:
    dir = directory + f"/{gen}"
    songs2 = []

    for entry in os.scandir(dir):
        if entry.name.endswith(".wav"):
            songs2.append(entry.name)
            songs_dict2.update( {f"{gen}" : songs2} )

#print (songs_dict2)

In [4]:
#Creating DataFrame
df = pd.DataFrame(columns=['Genre', 'Title', 'Offset'])
offset_15 = [0,15,30,45]
for i in songs_dict2:
    sng = songs_dict2.get(i)
    for s in sng:
        if s != ".DS_Store":
            for o in offset_15:
                my_dict = {"Title":f"{s}","Genre":f"{i}", "Offset":int(f"{o}")}
                df = df.append(my_dict, ignore_index=True)
'''           
            df_prueba.append(my_dict, ignore_index=True)'''
    
'''df = df.append({"Title":f"{s}","genre":f"{i}"}, ignore_index=True)'''
df

Unnamed: 0,Genre,Title,Offset
0,Supernatural,ES_Chasing Self - Polar Nights.wav,0
1,Supernatural,ES_Chasing Self - Polar Nights.wav,15
2,Supernatural,ES_Chasing Self - Polar Nights.wav,30
3,Supernatural,ES_Chasing Self - Polar Nights.wav,45
4,Supernatural,ES_Foundation of Time - Jon Bjork.wav,0
...,...,...,...
1151,Drama,ES_Drive Me Anywhere but Here - Trailer Worx (...,45
1152,Drama,ES_Truth Be Known - Trailer Worx (1).wav,0
1153,Drama,ES_Truth Be Known - Trailer Worx (1).wav,15
1154,Drama,ES_Truth Be Known - Trailer Worx (1).wav,30


# Getting and normalizing features

## Zero crossing

In [5]:
%%time
zero_crossings = []

for index, row in df.iterrows():
    path = f"data/{row['Genre']}/{row['Title']}"
    #print (type(row['Offset']))
    y, sr = librosa.load(path, duration=15, offset=row['Offset'])
    
    no_norm = librosa.zero_crossings(y=y, pad=False)
    zr=sum(no_norm)
    #print(no_norm)
    #zr = np.linalg.norm(no_norm)
    #print(zr)
    zero_crossings.append(zr)   

KeyboardInterrupt: 

In [6]:
df['Zero_crossings'] = zero_crossings

df.head()

ValueError: Length of values (1013) does not match length of index (1156)

## Spectral Centroid

In [None]:
%%time
spectral_centroid = []

for index, row in df.iterrows():
    path = f"data/{row['Genre']}/{row['Title']}"
    #print (type(row['Offset']))
    y, sr = librosa.load(path, duration=15, offset=row['Offset'])
    
    no_norm = librosa.feature.spectral_centroid(y=y)
    #spc= np.mean(no_norm)
    #print(no_norm)
    #zr = np.linalg.norm(no_norm)
    #print(zr)
    spectral_centroid.append(zr)

df['Spectral_centroid'] = spectral_centroid
df.head()

In [None]:
df.tail()

## Spectral Rolloff

In [None]:
%%time
spectral_centroid = []

for index, row in df.iterrows():
    path = f"data/{row['Genre']}/{row['Title']}"
    #print (type(row['Offset']))
    y, sr = librosa.load(path, duration=15, offset=row['Offset'])
    
    no_norm = librosa.feature.spectral_rolloff(y=y, sr=sr)
    spr= np.mean(no_norm)
    #print(no_norm)
    #zr = np.linalg.norm(no_norm)
    #print(zr)
    spectral_centroid.append(spr)

df['Spectral_rolloff'] = spectral_centroid
df.head()

## Mel-Frequency Cepstral Coefficients (MFCC)

In [None]:
%%time
mfcc = []

for index, row in df.iterrows():
    path = f"data/{row['Genre']}/{row['Title']}"
    y, sr = librosa.load(path, duration=15, offset=row['Offset'])
    
    no_norm = librosa.feature.mfcc(y=y, sr=sr)
    
    lista = []
    for m in no_norm:
        lista.append(m)
    
    lista_media = []
    for l in lista:
        lista_media.append(np.mean(l))
    
    mfcc.append(lista_media)


df['MFCC'] = mfcc
df.head()

In [None]:
for index,row in df.iterrows():
    df['MFCC1'] = row['MFCC'][0]
    df['MFCC2'] = row['MFCC'][1]
    df['MFCC3'] = row['MFCC'][2]
    df['MFCC4'] = row['MFCC'][3]
    df['MFCC5'] = row['MFCC'][4]
    df['MFCC6'] = row['MFCC'][5]
    df['MFCC7'] = row['MFCC'][6]
    df['MFCC8'] = row['MFCC'][7]
    df['MFCC9'] = row['MFCC'][8]
    df['MFCC10'] = row['MFCC'][9]
    df['MFCC11'] = row['MFCC'][10]
    df['MFCC12'] = row['MFCC'][11]
    df['MFCC13'] = row['MFCC'][12]
    df['MFCC14'] = row['MFCC'][13]
    df['MFCC15'] = row['MFCC'][14]
    df['MFCC16'] = row['MFCC'][15]
    df['MFCC17'] = row['MFCC'][16]
    df['MFCC18'] = row['MFCC'][17]
    df['MFCC19'] = row['MFCC'][18]
    df['MFCC20'] = row['MFCC'][19]

df = df.drop(['MFCC'], axis=1)
df.head()

## Chroma stft

In [None]:
%%time
chroma = []

for index, row in df.iterrows():
    path = f"data/{row['Genre']}/{row['Title']}"
    #print (type(row['Offset']))
    y, sr = librosa.load(path, duration=15, offset=row['Offset'])
    
    no_norm = librosa.feature.chroma_stft(y=y, sr=sr)
    chrm= np.mean(no_norm)
    #print(no_norm)
    #zr = np.linalg.norm(no_norm)
    #print(zr)
    chroma.append(chrm)

df['Chroma_stft'] = chroma
df.head()

In [None]:
df.tail()

# Export DataFrame to .CSV

In [None]:
df.to_csv(path_or_buf= "output/features.csv",index=False)