<a href="https://colab.research.google.com/github/Golem8/Music-Genre-Classifier/blob/main/classifier.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

This notebook takes in a mp3 upload and converts it to a spectrogram and displays the model's resultant classification

In [92]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [107]:
pathToEvaluate = r'/content/000140.mp3'
modelPath = r'/content/78.7valacc'

In [108]:
from torchvision import datasets, transforms
import torch
import torch.nn.functional as nnf
import torch.nn as nn
import os
import torch.optim as optim 

!pip install pydub
import pydub
import librosa, librosa.display
from pydub import AudioSegment 
from pydub.utils import make_chunks

from pydub import AudioSegment
import numpy as np
import shutil
import matplotlib.pyplot as plt
from PIL import Image




In [109]:
# get a pretrained network for transfer learning
import torchvision.models as models
densenet = models.densenet161(pretrained=True)

class transfer_music_classifer(nn.Module):
  def __init__(self):
    super(transfer_music_classifer, self).__init__()
    self.featureExtract = densenet.features
    
    self.classifier = nn.Sequential(
      nn.Linear(2208, 780),
      nn.ReLU(),
      nn.Dropout(0.2),
      nn.Linear(780, 240),
      nn.ReLU(),
      nn.Dropout(0.2),
      nn.Linear(240, 5),
      nn.Sigmoid(),
    )

  def forward(self, spectrogram):
    features = self.featureExtract(spectrogram)

    ######
    # This code is normally built into densenet, but since we are splitting
    # the features and classifier it must be put here
    # Taken from here: 
    # https://pytorch.org/vision/stable/_modules/torchvision/models/densenet.html

    # inplace = True means it modifies the input instead of allocating memory
    # for an ouput. Saves on memory
    out = nnf.relu(features, inplace=True)

    # adaptive pool decides the stride and kernel size automatically to ensure
    # the output has shape (x,x,1,1) regardless of input
    out = nnf.adaptive_avg_pool2d(out, (1, 1))

    # reshape output to be (1, x)
    out = torch.flatten(out, 1)
    ######    

    out = self.classifier(out)
    return out

In [110]:
model = transfer_music_classifer()
state = torch.load(modelPath, map_location='cpu')
model.load_state_dict(state)

<All keys matched successfully>

In [111]:
# Create 2 second .wav files for the selected audio
try:
  shutil.rmtree('/content/chunks')
except:
  pass
os.mkdir('/content/chunks')
audio_object = AudioSegment.from_mp3(pathToEvaluate) 
chunk_length_ms = 2000   #2000 ms clips each 
chunks = make_chunks(audio_object,chunk_length_ms)
for iteration, chunk in enumerate(chunks): 
  # if the chunks are not 2000 ms long (the last one may be shorter)
  # discard them as they would
  # mess up the creation of the spectrograms
  if len(chunk) == 2000:
    chunk.export(os.path.join('/content', 'chunks', str(iteration) + '.wav'), format="wav")

In [112]:
# makes the spectrograms
try:
  shutil.rmtree('/content/spectrograms')
except:
  pass
os.mkdir('/content/spectrograms')

for path in os.listdir('/content/chunks'):
    fullPath = os.path.join('/content', 'chunks', path)
    audio, sr = librosa.load(fullPath, sr= 44100) 
    plt.figure(figsize=(8,8.2), dpi= 36)
    value = librosa.stft(audio)
    data_in_db = librosa.amplitude_to_db(np.abs(value)) #applies Fourier Transform to show power throughout the time
    librosa.display.specshow(data_in_db, sr=sr)   #show spectrogram, in terms of log Function because that is how audio is usually represented
    
    specPath = os.path.join('/content', 'spectrograms', path[:-4] + '.png')
    plt.savefig(specPath, format="PNG", bbox_inches= "tight", pad_inches= 0, transparent= "True", )
    plt.clf()
    plt.close()
    print('Creating spectrogram:', specPath)

Creating spectrogram: /content/spectrograms/10.png
Creating spectrogram: /content/spectrograms/13.png
Creating spectrogram: /content/spectrograms/8.png
Creating spectrogram: /content/spectrograms/1.png
Creating spectrogram: /content/spectrograms/12.png
Creating spectrogram: /content/spectrograms/0.png
Creating spectrogram: /content/spectrograms/9.png
Creating spectrogram: /content/spectrograms/7.png
Creating spectrogram: /content/spectrograms/3.png
Creating spectrogram: /content/spectrograms/11.png
Creating spectrogram: /content/spectrograms/2.png
Creating spectrogram: /content/spectrograms/6.png
Creating spectrogram: /content/spectrograms/4.png
Creating spectrogram: /content/spectrograms/5.png


In [113]:
genres = [0,0,0,0,0]

preprocess = transforms.Compose([
  transforms.Resize([224,224]),
  transforms.ToTensor(),
  # densenet expects this transformations
  transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
])

for spec in os.listdir('/content/spectrograms'):
  fullPath = os.path.join('/content', 'spectrograms', spec)
  spec = preprocess(Image.open(fullPath).convert('RGB'))
  out = model(spec.unsqueeze(0))
  pred = out.max(1, keepdim=True)[1][0][0].item()
  genres[pred] += 1
  
print(genres)
final_classification = genres.index(max(genres))
print(final_classification)

[4, 3, 0, 0, 7]
4
