In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import librosa
import librosa.display
import sklearn as skl
import sklearn.utils
import IPython.display as ipd
from sklearn.model_selection import train_test_split
import ast

In [2]:
file_paths = pd.read_csv('data/all_data_paths.txt', header = None, names = ['file_path'])

train_paths, test_paths = train_test_split(file_paths, test_size=0.33)

file_paths.head()

Unnamed: 0,file_path
0,000/000002.mp3
1,000/000005.mp3
2,000/000010.mp3
3,000/000140.mp3
4,000/000141.mp3


In [3]:
music_data = pd.read_csv('data/fma_metadata/tracks.csv', skiprows = [0,1,2], 
                         usecols = [0, 6, 8, 11, 26, 39, 41, 44, 47, 52], 
                         names = ['track_id', 'album_id', 'album_listens', 'album_title', 'artist_name',
                                 'track_favorites', 'track_genres', 'track_interest', 'track_listens', 'track_title'])

music_data['track_genres'] = music_data['track_genres'].apply(ast.literal_eval)

music_data.head()

Unnamed: 0,track_id,album_id,album_listens,album_title,artist_name,track_favorites,track_genres,track_interest,track_listens,track_title
0,2,1,6073,AWOL - A Way Of Life,AWOL,2,[21],4656,1293,Food
1,3,1,6073,AWOL - A Way Of Life,AWOL,1,[21],1470,514,Electric Ave
2,5,1,6073,AWOL - A Way Of Life,AWOL,6,[21],1933,1151,This World
3,10,6,47632,Constant Hitmaker,Kurt Vile,178,[10],54881,50135,Freeway
4,20,4,2710,Niris,Nicky Cook,0,"[76, 103]",978,361,Spiritual Level


In [4]:
genre_df = pd.read_csv('data/fma_metadata/genres.csv', usecols=[0, 3])
genre_df

Unnamed: 0,genre_id,title
0,1,Avant-Garde
1,2,International
2,3,Blues
3,4,Jazz
4,5,Classical
...,...,...
158,1032,Turkish
159,1060,Tango
160,1156,Fado
161,1193,Christmas


In [5]:
index = 1999
filename = 'data/fma_small/' + file_paths['file_path'][index]

y, sr = librosa.load(filename, sr=None, mono=True)
print('Duration: {:.2f}s, {} samples'.format(y.shape[-1] / sr, y.size))

start, end = 7, 17
# ipd.Audio(data=x[start*sr:end*sr], rate=sr)
ipd.Audio(data = y, rate = sr)

Duration: 29.98s, 1321967 samples




In [6]:
file_paths['file_path'][index]

'049/049062.mp3'

In [7]:
song_id = file_paths['file_path'][index].rsplit('/')[1].rsplit('.')[0].lstrip('0')
associated_genres_numeric = music_data[music_data['track_id'] == int(song_id)]['track_genres'].item()
associated_genres = [genre_df[genre_df['genre_id'] == num]['title'].item() for num in associated_genres_numeric]

In [8]:
import SpectrogramDataset
import data_generation

In [10]:
file_path_df, track_df, genre_df = SpectrogramDataset.create_dataframes("data/all_data_paths.txt", "data/fma_metadata/tracks.csv","data/fma_metadata/genres.csv")


In [11]:
train_data, validation_data, test_data = SpectrogramDataset.create_dataset(file_path_df, track_df, genre_df)


Failed to load  data/fma_small/133/133297.mp3
Failed to load  data/fma_small/108/108925.mp3
Failed to load  data/fma_small/099/099134.mp3
Training dataset created.
Validation dataset created.
Test dataset created.


In [18]:
genres = []
for i in range(len(train_data)):
    genre = train_data[i][1]
    if genre not in genres:
        genres.append(genre)
        
print(genres)


['Soundtrack', 'Hip-Hop', 'Lo-Fi', 'Pop', 'New Age', 'International', 'Psych-Folk', 'Electroacoustic', 'Folk', 'Reggae - Dub', 'Ambient', 'Europe', 'Avant-Garde', 'Rock', 'Alternative Hip-Hop', 'Kid-Friendly', 'Noise-Rock', 'Noise', 'Psych-Rock', 'Dubstep', 'Unclassifiable', 'Electronic', 'Experimental Pop', 'IDM', 'House', 'Experimental', 'Progressive', 'Punk', 'Middle East', 'Balkan', 'Ambient Electronic', 'Trip-Hop', 'Latin America', 'Romany (Gypsy)', 'Singer-Songwriter', 'Sound Collage', 'Synth Pop', 'Afrobeat', 'Chiptune', 'Chill-out', 'Post-Rock', 'Industrial', 'Metal', 'Glitch', 'Audio Collage', 'Freak-Folk', 'Instrumental', 'Sound Poetry', 'Field Recordings', 'Post-Punk', 'Polka', 'Indie-Rock', 'Techno', 'Dance', 'Hip-Hop Beats', 'Garage', 'Asia-Far East', 'Novelty', 'Breakbeat', 'African', 'Celtic', 'Minimalism', 'Holiday', 'Rap', 'Black-Metal', 'Goth', 'French', 'Krautrock', 'North African', 'New Wave', 'Chip Music', 'Salsa', 'Latin', 'Indian', 'Hardcore', 'Reggae - Dancehall

In [19]:
len(genres)

103

In [14]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torchvision
import torchvision.transforms as transforms


In [244]:

import torch.nn as nn
import torch.nn.functional as F
#device = torch.device("cuda:0")


#part c
class Net(nn.Module):
    def __init__(self):
        super(Net, self).__init__()
        self.conv1 = nn.Conv2d(1, 3, 5)
        self.fc1 = nn.Linear(29946, 512)
        self.fc2 = nn.Linear(512, 256)
        self.fc3 = nn.Linear(256, 102)

    def forward(self, x):
        x = F.max_pool2d(F.relu(self.conv1(x)), (4, 8))
        x = x.view(-1, 29946)
        x = F.relu(self.fc1(x))
        x = F.relu(self.fc2(x))
        x = self.fc3(x)

        return x

model = Net()
#model.to(device)

In [16]:
import torch.optim as optim
learning_rate = 1e-4
criterion = nn.CrossEntropyLoss(reduction='sum')
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)
#optimizer = optim.SGD(model.parameters(), lr=learning_rate, momentum=0.80) 


In [None]:
def genre(strgenre):
    label = torch.LongTensor(1) 
    label[0] = genres.index(strgenre)
    return label

In [None]:
acc_train1 = ()
acc_val1 = ()

for epoch in range(10):  # loop over the dataset multiple times
    running_loss = 0.0
    for i, data in enumerate(train_data, 0):
        # get the inputs; data is a list of [inputs, labels]
        #inputs, labels = data[0].to(device), data[1].to(device)
        
        inputs, labels = data[0], genre(data[1])
        inputs = inputs.unsqueeze(0).unsqueeze(0)
        # zero the parameter gradients
        optimizer.zero_grad()

        # forward + backward + optimize
        outputs = model(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

        # print statistics
        running_loss += loss.item()
        print(epoch, running_loss/(i+1))

print('Finished Training')

In [None]:



#Testing Accuracy
model.eval()
correct = 0
total = 0
with torch.no_grad():
    for i, data in enumerate(validation_data, 0):
        #valinputs, labels = data[0].to(device), data[1].to(device)
        valinputs, labels = data[0], genre(data[1])

        outputs = model(valinputs)
        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()
acc_val1 = np.append(acc_val1, correct / total)
print('Accuracy of the network on the validation set: %.2f %%' % (100 * correct / total))

correct = 0
total = 0
with torch.no_grad():
    running_test_loss = 0.0
    for i, data in enumerate(test_data, 0):
        inputs, labels = data[0], genre(data[1])

        outputs = model(inputs)
        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()
        loss = F.cross_entropy(outputs, labels, size_average=False)
        running_test_loss += loss.item()
acc_train1 = np.append(acc_train1, correct / total)
print('Accuracy of the network on the train set: %.2f %%' % (100 * correct / total))
print('Test loss: '+str(running_test_loss/len(testset)))

