In [1]:
from scipy.io import wavfile as wv
import scipy.signal as ss
from scipy.stats import entropy
import numpy as np
import os
import librosa
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.preprocessing import StandardScaler
from sklearn.decomposition import FastICA
from sklearn.cluster import KMeans
from sklearn.svm import SVC
import sklearn.naive_bayes as sknb
import matplotlib.pyplot as plt

In [2]:
td = pd.read_csv('train.csv')
y = td['Genre'].values

In [3]:
directory_test = "test"

audio_data_list_test = []
sampling_rate_list_test = []

# Iterate over test data
for filename in os.listdir(directory_test):
    # Loads all the .wav files in the traning set
    if filename.endswith(".wav"):
        file_path = os.path.join(directory_test, filename)
        audio_data, sampling_rate = librosa.load(file_path, sr=None)
        
        # Append the audio data and sampling rate to the lists
        audio_data_list_test.append(audio_data)
        sampling_rate_list_test.append(sampling_rate)

# Now we can use the audio data and sampling rate data

In [4]:
directory = "train"

audio_data_list_unsorted = []
sampling_rate_list_unsorted = []

filename_list = []

# Iterate over training data
for filename in os.listdir(directory):
    # Loads all the .wav files in the traning set
    if filename.endswith(".wav"):
        file_path = os.path.join(directory, filename)
        audio_data, sampling_rate = librosa.load(file_path, sr=None)
        
        # Append the audio data and sampling rate to the lists
        audio_data_list_unsorted.append(audio_data)
        sampling_rate_list_unsorted.append(sampling_rate)

        filename_list.append(filename)

# here, we sort normalized_features so our y data matches it
zipped_lists = zip(filename_list, audio_data_list_unsorted, sampling_rate_list_unsorted)
zipped_sorted = sorted(zipped_lists, key=lambda x: x[0][5:8])
audio_data_list = [x for _, x, _ in zipped_sorted]
sampling_rate_list = [x for _, _, x in zipped_sorted]

# Now we can use the audio data and sampling rate data

In [5]:
#Gathers the frequency information of a song
fft_list = []
for song in audio_data_list:
    fft_list.append(np.fft.fft(song))
    
fft_list_test = []
for song in audio_data_list_test:
    fft_list_test.append(np.fft.fft(song))

In [6]:
# Computes energy of each song
energy_list = []
for song in audio_data_list:
    energy_list.append(np.sum(np.square(song)))
    
energy_list_test = []
for song in audio_data_list_test:
    energy_list_test.append(np.sum(np.square(song)))

In [7]:
# Finds the variance and mean of each song
variance_list = []
expectation_list = []
for song in audio_data_list:
    variance_list.append(np.var(song))
    expectation_list.append(np.mean(song))
    
variance_list_test = []
expectation_list_test = []
for song in audio_data_list_test:
    variance_list_test.append(np.var(song))
    expectation_list_test.append(np.mean(song))

In [8]:
# Finds the tempo of each song
tempo_list = []
for i in range(0,len(audio_data_list)):
    tempo, _ = librosa.beat.beat_track(y=audio_data_list[i], sr=sampling_rate_list[i])
    tempo_list.append(tempo)

tempo_list_test = []
for i in range(0,len(audio_data_list_test)):
    tempo, _ = librosa.beat.beat_track(y=audio_data_list_test[i], sr=sampling_rate_list_test[i])
    tempo_list_test.append(tempo)

In [21]:
# Finds the zero cross rate of each song
zero_cross_rate_list = []
for song in audio_data_list:
    zero_cross_rate_list.append(np.median(librosa.feature.zero_crossing_rate(song)))

zero_cross_rate_list_test = []
for song in audio_data_list_test:
    zero_cross_rate_list_test.append(np.median(librosa.feature.zero_crossing_rate(song)))

In [22]:
# Finds the FFT entropy of each song
fft_entrp_list = []
for fft in fft_list:
    fft_entrp_list.append(entropy(np.absolute(fft)))

fft_entrp_list_test = []
for fft in fft_list_test:
    fft_entrp_list_test.append(entropy(np.absolute(fft)))

In [23]:
# Gathers the MFCCs of a song
mfcc_list = []
for song in audio_data_list:
    mfcc_list.append(np.fft.fft(song))
    
mfcc_list_test = []
for song in audio_data_list_test:
    mfcc_list_test.append(np.fft.fft(song))

In [24]:
# Gets entropy of MFCCs
mfcc_entrp_list = []
for mfcc in mfcc_list:
    mfcc_entrp_list.append(entropy(np.absolute(mfcc)))

mfcc_entrp_list_test = []
for mfcc in mfcc_list_test:
    mfcc_entrp_list_test.append(entropy(np.absolute(mfcc)))

In [25]:
def conv_compare(song, data):
    """
    Sees how similar the fft of the songs are, higher number means more overlap
    song is one song and data is all the songs to compare song to
    can be used for time or for freq domain info
    """
    flipped_song = np.flip(song)
    convolution_sim_list = []
    for audio in data:
        sum = 0
        for i in range(0,len(audio)):
            sum += audio[i]*flipped_song[i]
        convolution_sim_list.append(sum)

In [26]:
# Step 1: Prepare the Feature Matrix
feature_matrix = np.column_stack((energy_list, variance_list, expectation_list, tempo_list, zero_cross_rate_list, fft_entrp_list, mfcc_entrp_list))
feature_matrix_test = np.column_stack((energy_list_test, variance_list_test, expectation_list_test, tempo_list_test, zero_cross_rate_list_test, fft_entrp_list_test, mfcc_entrp_list_test))

# Step 2: Normalize the Feature Matrix
scaler = StandardScaler()
normalized_features = scaler.fit_transform(feature_matrix)
normalized_features_test = scaler.fit_transform(feature_matrix_test)

In [None]:
# # SVD test
# # Center the data
# nf_c = normalized_features - normalized_features.mean()


# # Take SVD
# (nf_u, nf_s, nf_vh) = np.linalg.svd(nf_c)
# # 2-D Approximation

# # the pca transform matrix for reducing dimensionality to 2
# nf_v_approx = np.transpose(nf_vh[:2])

# # the dimensionality reduced transformed data
# nf_pca = np.dot(nf_c, nf_v_approx)

# # Plot the approximation
# plt.scatter(nf_pca[:, 0], nf_pca[:, 1])

# for i, tag in enumerate(Y):
#     plt.annotate(tag[0][0], (nf_pca[i, 0], nf_pca[i, 1]))
# plt.title("Types of Cancer")
# plt.show()


In [28]:
# Step 3: Apply ICA
ica = FastICA(n_components=10, random_state=42)
independent_components = ica.fit_transform(normalized_features)
independent_components_test = ica.fit_transform(normalized_features_test)

# Step 4: Clustering and Classifying
clf = SVC(kernel='linear')
clf.fit(independent_components, y)

# Map cluster labels to genres
predicted_genres = clf.predict(independent_components_test)



In [29]:
nb_model = sknb.GaussianNB()
nb_model.fit(normalized_features, y)
predicted_genres = nb_model.predict(normalized_features_test)

In [30]:
predictions = []
for filename, genre in zip(os.listdir(directory_test), predicted_genres):
    predictions.append((filename,genre))

In [31]:
# Sort the list nurmerically since the dict is random
sorted_data = sorted(predictions, key=lambda x: x[0][4:7])

In [32]:
# Puts it into a .csv file (finally!!!!)
df = pd.DataFrame(sorted_data, columns=['ID', 'genre'])
df.to_csv('test.csv', index=False)