<a href="https://colab.research.google.com/github/MikeGordeichik/AImusic/blob/main/Tracks_analysis_with_discogs_effnet_and_mtt_vgg.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
#Install essentia libraries with tensorflow support

!pip install -q essentia-tensorflow

#Download model files

!wget -q https://essentia.upf.edu/models/autotagging/msd/msd-musicnn-1.pb
!wget -q https://essentia.upf.edu/models/autotagging/msd/msd-musicnn-1.json

!wget -q https://essentia.upf.edu/models/autotagging/mtt/mtt-vgg-1.pb
!wget -q https://essentia.upf.edu/models/autotagging/mtt/mtt-vgg-1.json

!wget -q https://essentia.upf.edu/models/music-style-classification/discogs-effnet/discogs-effnet-bs64-1.json
!wget -q https://essentia.upf.edu/models/music-style-classification/discogs-effnet/discogs-effnet-bs64-1.pb




In [2]:
#Import libraries
import json

from essentia.standard import MonoLoader, TensorflowPredictMusiCNN, TensorflowPredictVGGish, TensorflowPredictEffnetDiscogs
import numpy as np
import matplotlib.pyplot as plt




In [3]:
#Load styles name from model metadata

with open('discogs-effnet-bs64-1.json', 'r') as json_file:
    metadata__discogs = json.load(json_file)
styles_discogs = metadata__discogs['classes']

with open('msd-musicnn-1.json', 'r') as json_file:
    metadata_msd = json.load(json_file)
styles_msd = metadata_msd['classes']

with open('mtt-vgg-1.json', 'r') as json_file:
    metadata_mtt = json.load(json_file)
styles_mtt = metadata_mtt['classes']



In [4]:
#Define inference function for Discogs model

def inference_discogs(audio_file, model, styles):
  #Do inference
  activations = TensorflowPredictEffnetDiscogs(graphFilename=model)(audio)
  #Calculate mean activations
  meanActivations = sum(activations)/len(activations)
  #Get indexes of top activations
  genreIndex = np.argpartition(meanActivations, -7)[-7:]
  #Print results
  for x in range (0,7):
      print("\t\t"+styles[genreIndex[x]]+":", "%.2f" % meanActivations[genreIndex[x]])

#Define inference function for MusiCNN models

def inference_musicnn(audio_file, model, styles):
  #Do inference
  activations = TensorflowPredictMusiCNN(graphFilename=model)(audio)
  #Calculate mean activations
  meanActivations = sum(activations)/len(activations)
  #Get indexes of top activations
  genreIndex = np.argpartition(meanActivations, -7)[-7:]
  #Print results
  for x in range (0,7):
      v = meanActivations[genreIndex[x]]
      print("\t\t"+styles[genreIndex[x]]+":", "%.2f" % meanActivations[genreIndex[x]])



In [6]:
#Tracklist
tracks = ["st1","st2","st3","st4","ai1", "ai2", "ai3", "ai4", "mb1", "mb2", "mb3", "mb4"]

for i in range (0,len(tracks)):
  #Load audio file
  audio_file = tracks[i]+'.mp3'
  audio = MonoLoader(sampleRate=16000, filename=audio_file)()
  print("song: "+tracks[i])
  print("{\n\tmodel: discogs")
  print("\t{")
  #Get discogs styles
  inference_discogs(audio_file, 'discogs-effnet-bs64-1.pb', styles_discogs)
  print("\tmodel: mtt")
  #Get musicnn mtt music properties
  inference_musicnn(audio_file, 'mtt-vgg-1.pb', styles_mtt)
  print("\t}\n}")

song: st1
{
	model: discogs
	{
		Hip Hop---Jazzy Hip-Hop: 0.11
		Electronic---Ambient: 0.13
		Hip Hop---Trip Hop: 0.31
		Electronic---Trip Hop: 0.33
		Hip Hop---Instrumental: 0.20
		Electronic---Downtempo: 0.42
		Electronic---Hip Hop: 0.13
	model: mtt
		pop: 0.11
		electronic: 0.12
		ambient: 0.12
		synth: 0.15
		beat: 0.20
		slow: 0.23
		drums: 0.28
	}
}
song: st2
{
	model: discogs
	{
		Jazz---Bossa Nova: 0.07
		Jazz---Smooth Jazz: 0.07
		Latin---Bossanova: 0.09
		Electronic---Future Jazz: 0.11
		Jazz---Contemporary Jazz: 0.08
		Jazz---Fusion: 0.11
		Electronic---Downtempo: 0.20
	model: mtt
		female: 0.12
		ambient: 0.13
		vocal: 0.15
		drums: 0.44
		slow: 0.23
		beat: 0.17
		guitar: 0.16
	}
}
song: st3
{
	model: discogs
	{
		Electronic---Downtempo: 0.07
		Electronic---Experimental: 0.08
		Rock---Indie Rock: 0.08
		Rock---Dream Pop: 0.08
		Rock---Ethereal: 0.16
		Rock---Shoegaze: 0.11
		Electronic---Ambient: 0.38
	model: mtt
		new age: 0.15
		violin: 0.21
		ambient: 0.33
		classical: 