In [1]:
!pip install opendatasets



You should consider upgrading via the 'C:\Users\M\anaconda3\python.exe -m pip install --upgrade pip' command.


In [2]:
import opendatasets as od

In [3]:
from sklearn.model_selection import train_test_split
from keras.utils import to_categorical #converts a class of integers to binary class matrix
import matplotlib.pyplot as plt 
from scipy.fft import fft #convert to FFT(Fast Fourier Transform)signals
from librosa import display 
import pandas as pd
import numpy as np
import pickle 
import librosa #read audio files  and convert them to spectrogram
import os
import warnings #to alert the user of some condition in a program


warnings.filterwarnings('ignore')

In [4]:
# overlap 50% to reduce the chances of overfitting
def splitsongs(X,y,window =0.05,overlap=0.5):
  #empty lists to hold our results
  temp_X=[]
  temp_y=[]
  #get input song array size
  xshape=X.shape[0]
  chunk=int(xshape*window)
  offset=int(chunk*(1-overlap))
  #split the song and creat new ones on window 
  spsong=[X[i:i+chunk]for i in range(0,xshape,offset)]
  for s in spsong:
    if s.shape[0] != chunk:
      continue
    temp_X.append(s)
    temp_y.append(y)
  return np.array(temp_X),np.array(temp_y)

In [5]:
samp_rate=0
def read_data(path):
  path="GTZAN/Data/genres_original/"
  genres = os.listdir(path)
  genres_labels = {}
  for index,genre in enumerate(genres):
    genres_labels[genre] = index
  features=[]
  labels=[]
  for genre in genres:
    print("Satrted {} Genre audio file reading!".format(genre))
    audio_files = os.listdir(path+genre)
    for audio_file in audio_files:
      audio_file_path = path + genre + "/" +audio_file
      try:
        audio_time_series,sampling_rate = librosa.load(audio_file_path)
        samp_rate = sampling_rate
        X,y = splitsongs(audio_time_series,genres_labels[genre],window=0.10) #window size = 0.10 to split into 3 sec 
        features.extend(X)
        labels.extend(y)
      except Exception:
        print('{} is an empty file!'.format(audio_file_path))
    
    print("{} Genre audio file reading completed!".format(genre))
  
  return np.array(features),np.array(labels)

In [6]:
#convert each 3 sec clip tp stft 
def convert_audio_to_stft(features):

  converted_files=[]
  for feature in features:
    fft_result1 = np.abs(fft(feature,32700)) #scipy library to convert audio into fft form of length 32700
    stft_trans2 = np.abs(librosa.stft(fft_result1,1024))#librosa library stft function to convert fft to stft by length 1024
    converted_files.append(stft_trans2.reshape(128, 513))#return array os shape (128,5113)
  
  return np.array(converted_files)


In [7]:
path="./GTZAN/genres_original/"
features,labels = read_data(path)

Satrted blues Genre audio file reading!
blues Genre audio file reading completed!
Satrted classical Genre audio file reading!
classical Genre audio file reading completed!
Satrted country Genre audio file reading!
country Genre audio file reading completed!
Satrted disco Genre audio file reading!
disco Genre audio file reading completed!
Satrted hiphop Genre audio file reading!
hiphop Genre audio file reading completed!
Satrted jazz Genre audio file reading!
GTZAN/Data/genres_original/jazz/jazz.00054.wav is an empty file!
jazz Genre audio file reading completed!
Satrted metal Genre audio file reading!
metal Genre audio file reading completed!
Satrted pop Genre audio file reading!
pop Genre audio file reading completed!
Satrted reggae Genre audio file reading!
reggae Genre audio file reading completed!
Satrted rock Genre audio file reading!
rock Genre audio file reading completed!


In [9]:
plt.figure()
display.waveplot(y = features[0], sr =22050)
plt.xlabel("Time in seconds")
plt.ylabel("Amplitiude")
plt.show()

AttributeError: module 'librosa.display' has no attribute 'waveplot'

<Figure size 432x288 with 0 Axes>

In [None]:
#80% training , 10% testing , 10% validation sets
X_train , X_test , y_train , y_test = train_test_split(features,labels,test_size=0.1,random_state=42)
X_train , X_val , y_train , y_val =train_test_split(X_train,y_train,test_size=0.1,random_state=42)

In [None]:
#convert them into stft form
X_train = convert_audio_to_stft(X_train)
X_val = convert_audio_to_stft(X_val)
X_test = convert_audio_to_stft(X_test)

In [None]:
fig, ax = plt.subplots()
img = librosa.display.specshow(librosa.amplitude_to_db(X_train[0], ref=np.max), y_axis='log', x_axis="time", ax=ax)
ax.set_title("power spectrogram")
fig.colorbar(img, ax=ax, format="%+2.0f dB")

In [None]:
X_train = X_train.reshape(-1, 128, 513, 1)
X_val = X_val.reshape(-1, 128, 513, 1)
X_test = X_test.reshape(-1, 128, 513, 1)

In [None]:
y_train = to_categorical(y_train)
y_val = to_categorical(y_val)
y_test = to_categorical(y_test)

In [None]:
with open('processed_data/X_train.pickle', 'wb') as f:
  pickle.dump(X_train, f)

with open('processed_data/X_valid.pickle', 'wb') as f:
  pickle.dump(X_val, f)

with open('processed_data/X_test.pickle', 'wb') as f:
  pickle.dump(X_test, f)


In [None]:
with open('processed_data/y_train.pickle', 'wb') as f:
  pickle.dump(y_train, f)

with open('processed_data/y_valid.pickle', 'wb') as f:
  pickle.dump(y_val, f)

with open('processed_data/y_test.pickle', 'wb') as f:
  pickle.dump(y_test, f)