In [1]:
from google.colab import drive, files
from tensorflow import keras
from nltk.tokenize import RegexpTokenizer
from nltk.corpus import stopwords
import pandas as pd
import librosa
from librosa import display
import numpy as np
import matplotlib.pyplot as plt
from PIL import Image

In [2]:
import nltk
nltk.download('stopwords')

[nltk_data] Downloading package stopwords to /root/nltk_data...
[nltk_data]   Unzipping corpora/stopwords.zip.


True

In [3]:
drive.mount('/content/drive/')

Mounted at /content/drive/


In [4]:
model = keras.models.load_model("/content/drive/MyDrive/GSN-2/classifier.hdf5")
nlp_model = keras.models.load_model("/content/drive/MyDrive/GSN-2/nlp.hdf5")

In [5]:
hop_length = 512
n_fft = 2048
n_mels = 96

def generate_spectrogram(file):
  y, sr = librosa.load(file)

  ms = librosa.feature.melspectrogram(y, sr=sr/2, fmin=1, n_fft=n_fft, hop_length=hop_length, n_mels=n_mels)
  ms_db = librosa.power_to_db(ms, ref=np.max)
  sp = librosa.display.specshow(ms_db)

  plt.savefig("temp.jpg")
  plt.clf()
  plt.close()

  img = Image.open("temp.jpg")
  img = np.array(img) / 255

  return img

In [6]:
df = pd.read_csv("drive/MyDrive/GSN-2/GSN2-kaggle.csv")
df = df.drop(df.loc[df['song'] == "jazz.00054.wav"].index)
del df['song']

In [7]:
tokens = open("/content/drive/MyDrive/GSN-2/tokens.txt").read()
characters = sorted(list(set(tokens)))
char_to_num = dict((c, i) for i, c in enumerate(characters))
num_to_char = dict((i, c) for i, c in enumerate(characters))
seq_len = 100
input_len = len(tokens)
vocab_len = len(characters)

In [8]:
x_data = []
y_data = []
for i in range(0, (input_len) - seq_len, 1):
    in_seq = tokens[i:i + seq_len]

    out_seq = tokens[i + seq_len]

    x_data.append([char_to_num[char] for char in in_seq])
    y_data.append(char_to_num[out_seq])

In [9]:
def predict_labels(spec):
  song = np.expand_dims(spec, axis=0)

  out = np.where(model.predict(song)[0] > 0.6)
  out = out[0]

  i=0
  tags = []
  for col in df.columns:
    if(np.any(out == i)):
      tags.append(col)
    i+=1
  return tags

def generate_text(tag):
  out = []
  input_len = len(tag)
  pattern = tag
  pattern = [char_to_num[value] for value in pattern]

  out = pattern

  for i in range(100):
    x = np.reshape(pattern, (1, len(pattern), 1))
    x = x / float(vocab_len)
    prediction = nlp_model.predict(x, verbose=0)
    index = np.argmax(prediction)
    result = num_to_char[index]

    out.append(index)

    pattern.append(index)
    pattern = pattern[1:len(pattern)]

  return out

def tokenize_words(input):
    input = input.lower()

    tokenizer = RegexpTokenizer(r'\w+')
    tokens = tokenizer.tokenize(input)

    filtered = filter(lambda token: token not in stopwords.words('english'), tokens)
    return " ".join(filtered)

In [10]:
song = files.upload()
spect = generate_spectrogram(list(song)[0])
tags = predict_labels(spect)
tags = [tag.lower() for tag in tags]

Saving rock.00010.wav to rock.00010.wav


In [11]:
song_review = ""
for tag in tags:
  tag_review = generate_text(tag)
  song_review = song_review.join([num_to_char[value] for value in tag_review])
print(song_review)

gorockll soacly sra soani soacly sra soani soacly sra soani soacly sra soani soacly sra soani soacly sra solrockll soacly sra soani soacly sra soani soacly sra soani soacly sra soani soacly sra soani soacly sra sodrockll soacly sra soani soacly sra soani soacly sra soani soacly sra soani soacly sra soani soacly sra soirockll soacly sra soani soacly sra soani soacly sra soani soacly sra soani soacly sra soani soacly sra soerockll soacly sra soani soacly sra soani soacly sra soani soacly sra soani soacly sra soani soacly sra sosrockll soacly sra soani soacly sra soani soacly sra soani soacly sra soani soacly sra soani soacly sra so rockll soacly sra soani soacly sra soani soacly sra soani soacly sra soani soacly sra soani soacly sra so rockll soacly sra soani soacly sra soani soacly sra soani soacly sra soani soacly sra soani soacly sra so9rockll soacly sra soani soacly sra soani soacly sra soani soacly sra soani soacly sra soani soacly sra so rockll soacly sra soani soacly sra soani soa