In [198]:
import librosa
import os
import subprocess
from os import path
from pydub import AudioSegment
from torch.utils.data import TensorDataset , DataLoader
from sklearn.ensemble import RandomForestClassifier
import csv
import numpy as np
import random
import torch
from collections import Counter
from torch import nn
import matplotlib.pyplot as plt

In [None]:
!pip install pydub

In [None]:
!pip install torch

In [230]:
#this function extracts 15 seconds of a song from the "start" moment

def feature_extractor(audio_file_dir,start):
    
#load the audio files
    x,freq = librosa.load(audio_file_dir ,sr=22050)
# trim the first 15 seconds 
    
    x_15sec=x[22050*start:22050*(start+15)]
#verification if the size is good
    if len(x_15sec)!=22050*15 :
        return False,0
# extract 20 MFCCs
    mfccs_15sec=librosa.feature.mfcc(x_15sec ,sr=freq ,n_mfcc=20)
# return mfcc of the first 15 sec as the audio file feature
    return True,mfccs_15sec

In [231]:



def data_loader(data_dir) :

    # Read file info file to get the list of audio files and their labels
    file_list=[]
    label_list=[]

#1 Classic, 2 Rap, 3 Jazz/blues, 4 Rock, 5 Pop, 6 Electronic, 7 Ambiant.

    # create a dictionary for styles
    style_dic={'1':1,'2':2,'5':3,'6':4,'7':4,
             '8':5,'10':6,'11':6,'12':7}
    
    #As we gather certain styles together, 
    #we have added an occurrence to use 150 sounds per style or group of styles
    style_occurence={'1':1,'2':1,'5':1,'6':2,'7':2,
             '8':1,'10':2,'11':2,'12':1}
    
    
    for file in os.listdir(data_dir):
        
        label=file.split("_")[0]
        if label!='3' and label!='4' and label !='9':
            
    
            max_song=170/style_occurence[str(file.split("_")[0])]
            
            if int((file.split("_")[1]).split(".")[0])<=max_song:
                file_list.append(file)
                label_list.append(label)



#1: 1, 2: 2, 3: 3, 4:3, 5:3, 6:4, 7: 4, 8: 2, 9: 3, 10: 5, 11: 5, 12: 6



    # create a list of extracted feature (MFCC) for files
    x_data=[]
    for audio_file in file_list:

        print(audio_file)
        true_false,file_feature = feature_extractor(data_dir+audio_file,0)
        #add extracted feature to dataset
        if true_false:
            x_data.append(file_feature)

    # create a list of labels for files
    y_data=[]
    for style_label in label_list:
        #convert the label to a value in {0,1,2,3....} as the class label
        y_data.append(style_dic[style_label])

    # shuffle two lists
    temp_list = list(zip(x_data , y_data))
    #print(x_data)
    random.shuffle(temp_list)
    
    x_data , y_data = zip(*temp_list)

    x_data=np.array(x_data)
    y_data=np.array(y_data)
    # transform to torch tensor
    tensor_x_data = torch.Tensor(x_data)
    tensor_y_data = torch.Tensor(y_data)
    # create our datset
    dataset = TensorDataset(tensor_x_data ,tensor_y_data)
    
    batch_size = 16
    # create our dataloader
    dataloader = DataLoader(dataset , batch_size=batch_size)
    return dataloader

In [232]:
def train(dataloader , model , loss_fn , optimizer):
    
    
    size = len(dataloader.dataset)
    model.train()
    for batch , (X, y) in enumerate(dataloader):
        
        X, y = X.to(device), y.to(device)
        # Compute prediction error
        pred = model(X)
        loss = loss_fn(pred , y.type(torch.LongTensor))
        # Backpropagation
        optimizer.zero_grad ()
        loss.backward ()
        optimizer.step()
        if batch % 100 == 0:
            loss , current = loss.item(), batch * len(X)
            print(f"loss: {loss:>7f} [{current:>5d}/{size:>5d}]")
            
def test(dataloader , model , loss_fn,list_style):
    size = len(dataloader.dataset)
    num_batches = len(dataloader)
    model.eval()
    test_loss , correct = 0, 0
    style_pred=[]
    with torch.no_grad ():
        
        #movement in the sound to test each part of the sound
        for X, y in dataloader:
            X, y = X.to(device), y.to(device)
            pred = model(X)
            
            test_loss += loss_fn(pred , y.type(torch.LongTensor)).item()
            
            
            
            #Check if the first predicted sound is the right one
            correct += (pred.argmax(1) == y).type(torch.float).sum().item()
            
            
            style_pred.append(list_style[pred.argmax(1).item()-1])
            
    test_loss /= num_batches
    correct /= size
    #print(f"Test Error: \n Accuracy: {(100*correct):>0.1f}%, Avg loss: {test_loss:>8f} \n")
    
    return style_pred

In [233]:
def take_a_song(path,name):
    
    #take the song that we want
    file_path = os.path.join(path, name)
    name = str(0) + "_" + str(0) + ".wav"
    out_path = os.path.join(path, name)

    
    
    
    # Convert the file to wav  to analize it
    parameters = ["ffmpeg", "-i", file_path, '-ac', "1", '-ar', "22050", out_path]
    subprocess.call(parameters,stdout=subprocess.DEVNULL,stderr=subprocess.STDOUT)
    
    # extract data
    x_data=[]
    y_data=[]
    x,freq = librosa.load(out_path ,sr=22050)
    
    # take some 15 seconds parts of the song, every 15 seconds and we start at 0 seconds
    for i in range(0,int(len(x)/freq)-15,15):
        boolean,song=feature_extractor(path + name,i)
        
        #if the feature extractor went well, we add the sound part to our data
        if boolean:
            x_data.append(song)
            y_data.append(0)
        
        
    # transform to torch tensor
    tensor_x_data = torch.Tensor(x_data)
    tensor_y_data = torch.Tensor(y_data)
    
    # create our datset
    dataset = TensorDataset(tensor_x_data ,tensor_y_data)

    #create our data loader
    data_loader=DataLoader(dataset , batch_size=1)
    
    #remove the wav song
    os.remove(out_path)
    
    #return data loader
    return data_loader


    

In [234]:
#this function allow us to print and return the proportions of predicted styles
def calcul_prop(dict):
    size=len(dict)
    list=np.zeros(size)
    list_style=[]
    n=0
    for style in dict:
        list[n]=dict[style]
        n+=1
        list_style.append(style)
    list=list/sum(list)
    n=0
    for prop in list:
        print(list_style[n]+ " : "+str(round(prop*100,2))+"%")
        n+=1
    return list,list_style

In [235]:
#this function allow us to test all the songs in a directory and give us an accuracy for our AI
def test_all_song(data_dir_test):
    #extract data from the directory that we want to test
    file_list=[]
    label_list=[]
    style_dic={'1':1,'2':2,'5':3,'6':4,'7':4,
             '8':5,'10':6,'11':6,'12':7}
    
    #browse the folder and get all the songs to test, and put their name into file_list, and their label into list_label
    for file in os.listdir(data_dir_test):
        
        label=file.split("_")[0]
        if label!='3' and label!='4' and label !='9':
            file_list.append(file)
            label_list.append(style_dic[label])
    
    

    #correct is to see if the song is correctly predicted
    correct=0
    
    #n is to calculate the total accuracy, n will be the number total of songs analized
    n=0

    

    

    #predict the song
    for i in range(0,len(file_list)):
        
        #extract the style expected from the file name
        try:
            style_id=int(file_list[i][0:2])-1

        except:
            style_id=int(file_list[i][0])-1

        #Display which song we will test
        print(file_list[i])



        # dataloader_test_2 contains some parts of the song
        dataloader_test_2=take_a_song(data_dir_test,file_list[i])
        
        #styles_pred predicts the style of each part of the song
        styles_pred=test(dataloader_test_2 , model , loss_fn,list_style)
        
        

        
        #print the predictions in function of the time of the song
        for j in range(0,len(styles_pred)):
            minutes = int(j*15/60)
            minutes2= int((j+1)*15/60)
            seconds = j*15-60*minutes
            seconds2= (j+1)*15-60*minutes2
            print(str(minutes) + ":"+str(seconds) + " - " + str(minutes2) + ":"+str(seconds2) 
                  + " : " + styles_pred[j])
            
        
        #calculate the number of times a style has been predicted
        counter=Counter(styles_pred)
        
        #print and take the results
        prop,styles=calcul_prop(counter)

        
        
        
                
        #take the predicted style
        x = max(counter, key=counter.get)
        
        
            
            
        #print predicted and expected style
        print("Best : " + x)
        print("Expected : "+list_style[int(label_list[i])-1]+"\n \n")


        n+=1
        

        
        #add if the style was correctly predicted
        if(x==list_style[int(label_list[i])-1]):
            correct+=1
            

    # accuracy total
    print(" Accuracy : " +str(correct/n*100))
    
    return correct/n*100

In [249]:
def test_a_song(data_dir,file):
    #extract data from the directory that we want to test
    # dataloader_test_2 contains some parts of the song
    dataloader_test_2=take_a_song(data_dir,file)
    #styles_pred predicts the style of each part of the song
    styles_pred=test(dataloader_test_2 , model , loss_fn,list_style)
    print(file+"\n")
    #print the predictions in function of the time of the song
    for j in range(0,len(styles_pred)):
        minutes = int(j*15/60)
        minutes2= int((j+1)*15/60)
        seconds = j*15-60*minutes
        seconds2= (j+1)*15-60*minutes2
        print(str(minutes) + ":"+str(seconds) + " - " + str(minutes2) + ":"+str(seconds2) + " : " + styles_pred[j])
    print("")
    #calculate the number of times a style has been predicted
    counter=Counter(styles_pred)
        
    #print and take the results
    prop,styles=calcul_prop(counter)
    
    #take the predicted style
    x = max(counter, key=counter.get)
    
    #print predicted and expected style
    print("\nBest : " + x)


In [237]:
class NeuralNetwork(nn.Module):
    def __init__(self):
        super(NeuralNetwork , self).__init__ ()
        self.flatten = nn.Flatten ()
        self.linear_relu_stack = nn.Sequential(
            # the size of input should be the number of features (20 MFCC) times
            # length of sequence (646)
            nn.Linear(20*646 , 512),
            nn.ReLU(),
            nn.Linear(512 , 512),
            nn.ReLU(),
            nn.Linear(512 , 256),
            nn.ReLU(),
            nn.Linear(256 , 13)
        )
    def forward(self , x):
        
        x = self.flatten(x)
        
        
        logits = self.linear_relu_stack(x)
        return logits

In [226]:
"""As a user, don't execute this"""
#Step 1 : collect data to train our AI

list_style=['Classic','Rap',"Jazz",'Rock','Pop','Electronic','Ambient']
dataloader=data_loader("../Data/train/")



#Step 2 : initialize our neural network

# Get cpu or gpu device for training.
device = "cuda" if torch.cuda.is_available () else "cpu"
print("Using {} device".format(device))

# Define model
model = NeuralNetwork().to(device)

# to train a model , we need a loss function and an optimizer .
loss_fn = nn.CrossEntropyLoss()
optimizer = torch.optim.SGD(model.parameters (), lr=1e-3)



#Step 3 : train our neural network

number_of_epochs = 24
for t in range(number_of_epochs):
    print(f"Epoch {t+1}\n-------------------------------")
    train(dataloader , model , loss_fn , optimizer)
    
#Save our model
torch.save(model.state_dict(), '../Model/model.pth')
print("Done!")


In [251]:
list_style=['Classic','Rap',"Jazz",'Rock','Pop','Electronic','Ambient']

# Get cpu or gpu device for training.
device = "cuda" if torch.cuda.is_available () else "cpu"
print("Using {} device".format(device))

#Retrieve our model
model= NeuralNetwork().to(device)
model.load_state_dict(torch.load('../Model/model.pth'))
loss_fn = nn.CrossEntropyLoss()
optimizer = torch.optim.SGD(model.parameters (), lr=1e-3)

Using cpu device


In [243]:
"""As a user, don't execute this"""
#Step 4 : test our IA

test_all_song("../Data/test/")

10_Gabry PonteLUMXDaddy DJ - We Could Be Together.webm
0:0 - 0:15 : Ambient
0:15 - 0:30 : Electronic
0:30 - 0:45 : Pop
0:45 - 1:0 : Electronic
1:0 - 1:15 : Electronic
1:15 - 1:30 : Pop
1:30 - 1:45 : Pop
1:45 - 2:0 : Electronic
2:0 - 2:15 : Rap
Ambient : 11.11%
Electronic : 44.44%
Pop : 33.33%
Rap : 11.11%
Best : Electronic
Expected : Electronic
 

10_John SummitE.webm
0:0 - 0:15 : Pop
0:15 - 0:30 : Rock
0:30 - 0:45 : Pop
0:45 - 1:0 : Pop
1:0 - 1:15 : Pop
1:15 - 1:30 : Rock
1:30 - 1:45 : Pop
1:45 - 2:0 : Pop
2:0 - 2:15 : Pop
2:15 - 2:30 : Ambient
2:30 - 2:45 : Rock
2:45 - 3:0 : Pop
3:0 - 3:15 : Electronic
3:15 - 3:30 : Pop
Pop : 64.29%
Rock : 21.43%
Ambient : 7.14%
Electronic : 7.14%
Best : Pop
Expected : Electronic
 

10_Obskur - Bayside - Radio Edit.webm
0:0 - 0:15 : Electronic
0:15 - 0:30 : Rock
0:30 - 0:45 : Electronic
0:45 - 1:0 : Electronic
1:0 - 1:15 : Electronic
1:15 - 1:30 : Electronic
1:30 - 1:45 : Rock
1:45 - 2:0 : Electronic
2:0 - 2:15 : Rock
2:15 - 2:30 : Electronic
2:30 - 

0:0 - 0:15 : Classic
0:15 - 0:30 : Classic
0:30 - 0:45 : Classic
0:45 - 1:0 : Classic
1:0 - 1:15 : Classic
1:15 - 1:30 : Classic
1:30 - 1:45 : Classic
1:45 - 2:0 : Classic
2:0 - 2:15 : Classic
2:15 - 2:30 : Classic
2:30 - 2:45 : Classic
2:45 - 3:0 : Classic
3:0 - 3:15 : Classic
3:15 - 3:30 : Classic
3:30 - 3:45 : Classic
3:45 - 4:0 : Classic
Classic : 100.0%
Best : Classic
Expected : Classic
 

1_25.webm
0:0 - 0:15 : Classic
0:15 - 0:30 : Classic
0:30 - 0:45 : Classic
0:45 - 1:0 : Jazz
1:0 - 1:15 : Jazz
1:15 - 1:30 : Jazz
1:30 - 1:45 : Jazz
1:45 - 2:0 : Jazz
2:0 - 2:15 : Rock
2:15 - 2:30 : Jazz
Classic : 30.0%
Jazz : 60.0%
Rock : 10.0%
Best : Jazz
Expected : Classic
 

1_26.webm
0:0 - 0:15 : Classic
0:15 - 0:30 : Classic
0:30 - 0:45 : Classic
0:45 - 1:0 : Classic
1:0 - 1:15 : Classic
1:15 - 1:30 : Classic
1:30 - 1:45 : Classic
1:45 - 2:0 : Classic
2:0 - 2:15 : Classic
2:15 - 2:30 : Classic
Classic : 100.0%
Best : Classic
Expected : Classic
 

1_27.webm
0:0 - 0:15 : Jazz
0:15 - 0:30 : J

0:0 - 0:15 : Pop
0:15 - 0:30 : Rock
0:30 - 0:45 : Electronic
0:45 - 1:0 : Electronic
1:0 - 1:15 : Electronic
1:15 - 1:30 : Electronic
1:30 - 1:45 : Rock
1:45 - 2:0 : Rock
2:0 - 2:15 : Rock
2:15 - 2:30 : Rock
2:30 - 2:45 : Rock
2:45 - 3:0 : Rock
3:0 - 3:15 : Pop
3:15 - 3:30 : Rock
3:30 - 3:45 : Rock
3:45 - 4:0 : Rock
Pop : 12.5%
Rock : 62.5%
Electronic : 25.0%
Best : Rock
Expected : Rock
 

6_GAYLEblackbear - fmk (with blackbear) - messier.webm
0:0 - 0:15 : Classic
0:15 - 0:30 : Classic
0:30 - 0:45 : Electronic
0:45 - 1:0 : Rock
1:0 - 1:15 : Pop
1:15 - 1:30 : Pop
1:30 - 1:45 : Pop
1:45 - 2:0 : Pop
2:0 - 2:15 : Rock
2:15 - 2:30 : Jazz
2:30 - 2:45 : Electronic
2:45 - 3:0 : Pop
Classic : 16.67%
Electronic : 16.67%
Rock : 16.67%
Pop : 41.67%
Jazz : 8.33%
Best : Pop
Expected : Rock
 

6_Luna Bay - Chasing Highs.webm
0:0 - 0:15 : Jazz
0:15 - 0:30 : Rock
0:30 - 0:45 : Electronic
0:45 - 1:0 : Rock
1:0 - 1:15 : Rock
1:15 - 1:30 : Electronic
1:30 - 1:45 : Rock
1:45 - 2:0 : Pop
2:0 - 2:15 : Classi

63.33333333333333

In [253]:
"""This is where you will be able to test your music"""

#Step 5 : test a specific song, this will be what the user will use to test a song

#if you want to test your song, put it in the directory "../Data/Example for user/" 
#and change "RapSong.mp3" above by your song
test_a_song("../Data/Example for user/","RapSong.mp3")



RapSong.mp3

0:0 - 0:15 : Classic
0:15 - 0:30 : Ambient
0:30 - 0:45 : Ambient
0:45 - 1:0 : Rap
1:0 - 1:15 : Rap
1:15 - 1:30 : Rap
1:30 - 1:45 : Rap
1:45 - 2:0 : Pop
2:0 - 2:15 : Rap
2:15 - 2:30 : Rap
2:30 - 2:45 : Ambient
2:45 - 3:0 : Electronic
3:0 - 3:15 : Rap
3:15 - 3:30 : Ambient
3:30 - 3:45 : Jazz

Classic : 6.67%
Ambient : 26.67%
Rap : 46.67%
Pop : 6.67%
Electronic : 6.67%
Jazz : 6.67%

Best : Rap


In [204]:
import subprocess
from multiprocessing import cpu_count

def download_spotify_playlist(link, output_directory):    
    subprocess.call(
        ["spotify_dl", "-l", link, "-o",  output_directory, "-m", 
         "-mc", str(cpu_count())],
        stdout=subprocess.DEVNULL,
        stderr=subprocess.STDOUT)
    
    
download_spotify_playlist("https://open.spotify.com/playlist/55Y51y537MBe0IesH847gQ","../Data/test")