In [1]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [2]:
import librosa
import IPython.display as ipd
import skimage
import numpy as np
from skimage import io
import matplotlib.pyplot as plt
import librosa.display
import sklearn
import pandas as pd
from librosa.core import istft


#*************settings for png output (only for CNN)******************
# hop length = number of samples per time-step in spectrogram
#hop_length = 512

# number of time-steps. (Width of spectogram/png)
#time_steps = 384

#length_samples = time_steps*hop_length

# n_fft = 2048
#*********************************************************************

In [3]:
#create base dataframe with column headings
basedata = {"Genre":[],
            "Chroma Feature":[],
            "Zero-Crossing Rate":[],
            "Spectral Bandwidth":[],
            "Root Mean Square Energy (RSME)":[],
            "Spectral Rolloff":[],
            "Spectral Centroid":[]
            }


#create slot for each mfcc value in dictionary
for i in range(0,30):
  mfcc_title = f"mfcc({i})"
  mfcc_value = []
  basedata[mfcc_title] = mfcc_value

basedata

{'Genre': [],
 'mfcc(0)': [],
 'mfcc(1)': [],
 'mfcc(10)': [],
 'mfcc(11)': [],
 'mfcc(12)': [],
 'mfcc(13)': [],
 'mfcc(14)': [],
 'mfcc(15)': [],
 'mfcc(16)': [],
 'mfcc(17)': [],
 'mfcc(18)': [],
 'mfcc(19)': [],
 'mfcc(2)': [],
 'mfcc(20)': [],
 'mfcc(21)': [],
 'mfcc(22)': [],
 'mfcc(23)': [],
 'mfcc(24)': [],
 'mfcc(25)': [],
 'mfcc(26)': [],
 'mfcc(27)': [],
 'mfcc(28)': [],
 'mfcc(29)': [],
 'mfcc(3)': [],
 'mfcc(4)': [],
 'mfcc(5)': [],
 'mfcc(6)': [],
 'mfcc(7)': [],
 'mfcc(8)': [],
 'mfcc(9)': []}

In [4]:
#create function that defines scale of spectogram
def scale_minmax(X, min=0.0, max=1.0):
    X_std = (X - X.min()) / (X.max() - X.min())
    X_scaled = X_std * (max - min) + min
    return X_scaled

In [5]:
#create function for spectogram plot without visualization (for performance purposes)
def create_spec(file):

  x , sr = librosa.load(file, mono=True)
  #samples per track is sample rate (sr)*song length in seconds

  #samples per segment is samples per track sliced into equal parts
  #in this case song length is 30sec and we are slicing into 30 parts
  #So in this case, our samples per segment is equal to our sample rate



  sps = sr
  spt = sr*30
  global basedata

  basedata["Genre"].append("rock")

  
#Break song into 30 equal segments, each 1 second long, and loop through
  for i in range(0,30):
    start2=sr*i
    X2 = x[(start2):(start2+sr)]

    #convert scale form hz to decibels
    X = librosa.stft(X2)
    Xdb = librosa.amplitude_to_db(abs(X))

    


    
    # #find spectral centroid value
    #"Center of mass of sound" I am assuming this is mostly where the sound lies
    spectral_centroids = librosa.feature.spectral_centroid(X2,  sr=sr)
    sc_title = f"Spectral Centroid({i})"
    sc_value = np.mean([spectral_centroids])
    basedata[sc_title].append(sc_value)
    


    # #Compute root mean square energy
    rmse = librosa.feature.rmse(X2)
    rsme_title = f"RSME({i})"
    rsme_value = np.mean([rmse])
    basedata[rsme_title].append(rsme_value)


    #find spectral rolloff value
    #measures how often high frequencies(peaks) reduce to zero
    spectral_rolloff = librosa.feature.spectral_rolloff(X2, sr=sr)
    sr_title = f"Spectral Rolloff({i})"
    sr_value = np.mean([spectral_rolloff])
    basedata[sr_title].append(sr_value)


    # #find spectral bandwith value
    #spectral bandwidth is defined as the width of the band of light at one-half the peak maximum 
    spectral_bandwidth = librosa.feature.spectral_bandwidth(X2, sr=sr)
    sb_title = f"Spectral Bandwidth({i})"
    sb_value = np.mean([spectral_bandwidth])
    basedata[sb_title].append(sb_value)
    

    # #find zero crossing rate value
    #calculates the smoothness of a signal, typically higher in rock/metal music
    zcr = librosa.feature.zero_crossing_rate(X2)
    zcr_title = f"Zero-Crossing Rate({i})"
    zcr_value = np.mean([zcr])
    basedata[zcr_title].append(zcr_value)
  

    # #find Chroma feature value
    #how much energy of each pitch class is present
    chromagram = librosa.feature.chroma_stft(X2, sr=sr)
    # # print(chromagram)
    cf_title = f"Chroma Feature({i})"
    cf_value = np.mean([chromagram])
    basedata[cf_title].append(cf_value)
  

#Break song into 30 equal segments, each 1 second long, and loop through (with different settings)
  for i in range(0,30):
      start_sample=sr*i
      window = x[(start_sample):(start_sample+sr)]
      X = librosa.stft(window)
      Xdb = librosa.amplitude_to_db(abs(X))
      Xdb = istft(Xdb)

      
      #find Mel frequency cepstral coefficients
      #which are the measure in peaks of sound in a given segment
      mfcc = librosa.feature.mfcc(window, sr)
      mfcc = mfcc.T
      mfcc_title = f"mfcc({i})"
      # print(mfcc[i])
      mfcc_value = [mfcc[i]]
      basedata[mfcc_title].append(np.mean(mfcc_value))


  ##*****In case we use CNN later******************

  # plt.figure(figsize=(14, 5))
  # librosa.display.specshow(Xdb, sr=sr, x_axis='time', y_axis='hz')
  # plt.colorbar()

  ##scale png image to 8 bit
  #img = scale_minmax(Xdb, 0, 255).astype(numpy.uint8)
  ##put x axis at "0"
  #img = numpy.flip(img, axis=0) 
  ##invert image to black== machine learning is colorblind apparently
  #img = 255-img
  ##save as PNG 
  #skimage.io.imsave("out.png", img)



In [6]:
import glob
gyo= ["blues","classical","country","disco","h_metal","hiphop","jazz","pop","reggae","rock"]
for file in glob.iglob(r'drive/My Drive/FinalDataProject/rock_mp3/*.mp3'): #<path to directory with mp3s in it>
    create_spec(file)



KeyboardInterrupt: ignored

In [None]:

# print(basedata)
base_df = pd.DataFrame(basedata)
base_df


In [None]:
base_df.to_csv('drive/My Drive/FinalDataProject/Machine/rock_machine2.csv', index=False)


In [None]:
blues = pd.read_csv("drive/My Drive/FinalDataProject/Machine/blues_machine2.csv")
classical = pd.read_csv("drive/My Drive/FinalDataProject/Machine/classical_machine2.csv")
country = pd.read_csv("drive/My Drive/FinalDataProject/Machine/country_machine2.csv")
disco = pd.read_csv("drive/My Drive/FinalDataProject/Machine/disco_machine2.csv")
h_metal = pd.read_csv("drive/My Drive/FinalDataProject/Machine/h_metal_machine2.csv")
hiphop = pd.read_csv("drive/My Drive/FinalDataProject/Machine/hiphop_machine2.csv")
jazz = pd.read_csv("drive/My Drive/FinalDataProject/Machine/jazz_machine2.csv")
pop = pd.read_csv("drive/My Drive/FinalDataProject/Machine/pop_machine2.csv")
reggae = pd.read_csv("drive/My Drive/FinalDataProject/Machine/reggae_machine2.csv")
rock = pd.read_csv("drive/My Drive/FinalDataProject/Machine/rock_machine2.csv")

In [None]:
#combine all genre dfs into one
ml_df = pd.concat([blues, classical, country, disco, h_metal, hiphop, jazz, pop, reggae, rock], ignore_index=True)
ml_df

In [None]:
ml_df.to_csv("drive/My Drive/FinalDataProject/Machine/numerical_machine2.csv", index=False)