In [1]:
import warnings
warnings.filterwarnings("ignore")
import shutil
import os
import pandas as pd
import matplotlib
matplotlib.use(u'nbAgg')
import matplotlib.pyplot as plt
import seaborn as sns
import numpy as np
import pickle
from sklearn.manifold import TSNE
from sklearn import preprocessing
import pandas as pd
from multiprocessing import Process
import multiprocessing
import librosa
import IPython.display as ipd
import librosa.display
import sklearn

In [None]:
data_sort=np.load('data_sort.npy') #ordered based on audio names. eg: 2,10,15,etc

In [7]:
label=np.load('label.npy',allow_pickle=True) #ordered based on audio names.

In [2]:
filename_split_order=np.load('filename_split_order.npy')

### Data Split: (MFCC)

In [8]:
#split in the same way as Other_Features.ipynb. random_state should be same in both the notebooks
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(data_sort, label, test_size=0.20, random_state=42,stratify=label)

In [9]:
 y_train[0:10] #verify this with Other_features.ipynb. y_train and y_test in both the notebooks should be same.

array(['Pop', 'Instrumental', 'Experimental', 'Electronic', 'Electronic',
       'Instrumental', 'Rock', 'Hip-Hop', 'Experimental', 'Experimental'],
      dtype=object)

In [10]:
y_test[0:10]

array(['Rock', 'International', 'Folk', 'Folk', 'Electronic', 'Hip-Hop',
       'Hip-Hop', 'Pop', 'Instrumental', 'International'], dtype=object)

### Pad with zeros to get minimum size needed to use VGG model:

In [8]:
#Array of size 6397*20*1293 is converted to 6397*32*1293
X_train_32=np.zeros([6397,32,1293])
for i in range(len(X_train)):
    for j in range(len(X_train[i])):
        X_train_32[i,j]=X_train[i,j]

In [9]:
#Array of size 1600*20*1293 is converted to 1600*32*1293
X_test_32=np.zeros([1600,32,1293])
for i in range(len(X_test)):
    for j in range(len(X_test[i])):
        X_test_32[i,j]=X_test[i,j]

### VGG16 model:

In [1]:
from tensorflow.keras.applications import VGG16
from tensorflow.keras.layers import Input,Concatenate
img_input = Input(shape=(32,1293,1))
img_conc = Concatenate()([img_input, img_input, img_input]) #concatenate gray scale image 3 times to represent in 3d.
vgg_conv = VGG16(weights='imagenet', include_top=False, input_tensor=img_conc)

Instructions for updating:
If using Keras pass *_constraint arguments to layers.


In [2]:
#https://www.learnopencv.com/keras-tutorial-fine-tuning-using-pre-trained-models/
from tensorflow.keras import models
from tensorflow.keras import layers
from tensorflow.keras import optimizers
from tensorflow.keras.layers import Flatten


model = models.Sequential()
model.add(vgg_conv)
model.add(Flatten())
model.summary()

Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
vgg16 (Model)                (None, 1, 40, 512)        14714688  
_________________________________________________________________
flatten (Flatten)            (None, 20480)             0         
Total params: 14,714,688
Trainable params: 14,714,688
Non-trainable params: 0
_________________________________________________________________


In [14]:
Train_Latent=model.predict(X_train_32.reshape(6397,32,1293,1))
Train_Latent.shape

(6397, 20480)

In [15]:
Test_Latent=model.predict(X_test_32.reshape(1600,32,1293,1))
Test_Latent.shape

(1600, 20480)

In [17]:
vgg_features_split_order=np.vstack((Train_Latent,Test_Latent))

In [18]:
np.save('vgg_features_split_order',vgg_features_split_order)

### VGG_Spectrogram:

In [11]:
#input shape is different here compared to MFCC
from tensorflow.keras.applications import VGG16
from tensorflow.keras.layers import Input

img_input = Input(shape=(480,640,3))
vgg_conv = VGG16(weights='imagenet', include_top=False, input_tensor=img_input)

In [12]:
#https://www.learnopencv.com/keras-tutorial-fine-tuning-using-pre-trained-models/
from tensorflow.keras import models
from tensorflow.keras import layers
from tensorflow.keras import optimizers
from tensorflow.keras.layers import Flatten,Conv2D,MaxPooling2D


model = models.Sequential()
model.add(vgg_conv)
model.add(MaxPooling2D(pool_size=(3, 2))) 
model.add(Flatten())
model.summary()

Model: "sequential_1"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
vgg16 (Model)                (None, 15, 20, 512)       14714688  
_________________________________________________________________
max_pooling2d_1 (MaxPooling2 (None, 5, 10, 512)        0         
_________________________________________________________________
flatten_1 (Flatten)          (None, 25600)             0         
Total params: 14,714,688
Trainable params: 14,714,688
Non-trainable params: 0
_________________________________________________________________


In [13]:
# load each image as vector and extract its corresponding vgg features 
#image saved in order of the name of audio(eg: '02.mp3' followed by '10.mp3' and so on) 
#and order is the name of spectrogram image (eg: '1.png' is the spectrogram of audio '02.mp3')
import cv2 
from tqdm import tqdm

vgg_spectrogram=np.zeros((7997,25600))
for i in tqdm(range(7997)): 
    img=cv2.imread('MFCC_SPECTROGRAM/'+str(i)+'.png')
    vgg_spectrogram[i]=model.predict(img.reshape(1,480,640,3))
    

100%|██████████| 7997/7997 [06:01<00:00, 22.14it/s]


### Data Split:

In [24]:
#split in the same way as before. random_state should be same in all the notebooks.
#splitting is done to rearrange the data points.
from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(vgg_spectrogram, label, test_size=0.20, random_state=42,stratify=label)

In [25]:
 y_train[0:10] 

array(['Pop', 'Instrumental', 'Experimental', 'Electronic', 'Electronic',
       'Instrumental', 'Rock', 'Hip-Hop', 'Experimental', 'Experimental'],
      dtype=object)

In [26]:
y_test[0:10]

array(['Rock', 'International', 'Folk', 'Folk', 'Electronic', 'Hip-Hop',
       'Hip-Hop', 'Pop', 'Instrumental', 'International'], dtype=object)

In [27]:
vgg_spectrogram_features_split_order=np.vstack((X_train,X_test))

In [38]:
vgg_spectrogram_features_split_order.shape

(7997, 25600)

In [39]:
np.save('vgg_spectrogram_features_split_order',vgg_spectrogram_features_split_order)

### VGG_Chromagram:

In [4]:
from tensorflow.keras.applications import VGG16
from tensorflow.keras.layers import Input

img_input = Input(shape=(480,640,3))
vgg_conv = VGG16(weights='imagenet', include_top=False, input_tensor=img_input)

Instructions for updating:
If using Keras pass *_constraint arguments to layers.


In [9]:
#https://www.learnopencv.com/keras-tutorial-fine-tuning-using-pre-trained-models/
from tensorflow.keras import models
from tensorflow.keras import layers
from tensorflow.keras import optimizers
from tensorflow.keras.layers import Flatten,Conv2D,MaxPooling2D


model = models.Sequential()
model.add(vgg_conv)
model.add(MaxPooling2D(pool_size=(3, 2))) 
model.add(Flatten())
model.summary()

Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
vgg16 (Model)                (None, 15, 20, 512)       14714688  
_________________________________________________________________
max_pooling2d (MaxPooling2D) (None, 5, 10, 512)        0         
_________________________________________________________________
flatten (Flatten)            (None, 25600)             0         
Total params: 14,714,688
Trainable params: 14,714,688
Non-trainable params: 0
_________________________________________________________________


In [11]:
# load each image as vector and extract its corresponding vgg features 
# and the order is same as train-test split
import cv2 
from tqdm import tqdm

vgg_chromagram=np.zeros((7997,25600))
for e,i in enumerate(tqdm(filename_split_order)): 
    img=cv2.imread('Chroma_Spectrogram/'+i+'.png')
    vgg_chromagram[e]=model.predict(img.reshape(1,480,640,3))
    

100%|██████████| 7997/7997 [05:57<00:00, 22.35it/s]


In [12]:
np.save('vgg_chromagram_split_order',vgg_chromagram)

### VGG_Mel_Spectrogram:

In [2]:
from tensorflow.keras.applications import VGG16
from tensorflow.keras.layers import Input

img_input = Input(shape=(480,640,3))
vgg_conv = VGG16(weights='imagenet', include_top=False, input_tensor=img_input)

Instructions for updating:
If using Keras pass *_constraint arguments to layers.


In [3]:
#https://www.learnopencv.com/keras-tutorial-fine-tuning-using-pre-trained-models/
from tensorflow.keras import models
from tensorflow.keras import layers
from tensorflow.keras import optimizers
from tensorflow.keras.layers import Flatten,Conv2D,MaxPooling2D


model = models.Sequential()
model.add(vgg_conv)
model.add(MaxPooling2D(pool_size=(3, 2))) 
model.add(Flatten())
model.summary()

Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
vgg16 (Model)                (None, 15, 20, 512)       14714688  
_________________________________________________________________
max_pooling2d (MaxPooling2D) (None, 5, 10, 512)        0         
_________________________________________________________________
flatten (Flatten)            (None, 25600)             0         
Total params: 14,714,688
Trainable params: 14,714,688
Non-trainable params: 0
_________________________________________________________________


In [6]:
# load each image as vector and extract its corresponding vgg features 
# and the order is same as train-test split
import cv2 
from tqdm import tqdm

vgg_Mel_Spectrogram=np.zeros((7997,25600))
for e,i in enumerate(tqdm(filename_split_order)): 
    img=cv2.imread('Mel_Spectrogram/'+i+'.png')
    vgg_Mel_Spectrogram[e]=model.predict(img.reshape(1,480,640,3))

100%|██████████| 7997/7997 [07:46<00:00, 17.13it/s]


In [7]:
np.save('vgg_mel_spectrogram_split_order',vgg_Mel_Spectrogram)

### VGG_Mel_Spectrogram (unflattened):

In [3]:
#Load the VGG16 model and remove final layer(ie:'include_top=False')
from tensorflow.keras.applications import VGG16
from tensorflow.keras.layers import Input

img_input = Input(shape=(480,640,3))
vgg_conv = VGG16(weights='imagenet', include_top=False, input_tensor=img_input)

Instructions for updating:
If using Keras pass *_constraint arguments to layers.


In [4]:
#https://www.learnopencv.com/keras-tutorial-fine-tuning-using-pre-trained-models/
from tensorflow.keras import models
from tensorflow.keras import layers
from tensorflow.keras import optimizers
from tensorflow.keras.layers import Flatten,Conv2D,MaxPooling2D


model = models.Sequential()
model.add(vgg_conv)
model.add(MaxPooling2D(pool_size=(2, 2))) #different from flattened model which has (3,2)
model.summary()

Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
vgg16 (Model)                (None, 15, 20, 512)       14714688  
_________________________________________________________________
max_pooling2d (MaxPooling2D) (None, 7, 10, 512)        0         
Total params: 14,714,688
Trainable params: 14,714,688
Non-trainable params: 0
_________________________________________________________________


In [5]:
# load each image as vector and extract its corresponding vgg features 
# and the order is same as train-test split
import cv2 
from tqdm import tqdm

vgg_Mel_Spectrogram_unflat=np.zeros((7997,7,10,512))
for e,i in enumerate(tqdm(filename_split_order)): 
    img=cv2.imread('Mel_Spectrogram/'+i+'.png')
    vgg_Mel_Spectrogram_unflat[e]=model.predict(img.reshape(1,480,640,3))

100%|██████████| 7997/7997 [08:09<00:00, 16.34it/s]


In [6]:
np.save('vgg_Mel_Spectrogram_unflat_split_order',vgg_Mel_Spectrogram_unflat)