<a href="https://colab.research.google.com/github/rbqpark/tinger/blob/main/UseSNN.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# **Code to use Siamese Neural Network** 

**This notebook houses all the functions to use our siamese neural network within the larger Tinger model stack. For further instructions, refer to the Tinger Integration notebook.**

In [None]:
# Mount Google Drive to access directory
from google.colab import drive, auth
from oauth2client.client import GoogleCredentials
import gspread

drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [None]:
# Get authentication to read CSV files from Google Drive
auth.authenticate_user()
gc = gspread.authorize(GoogleCredentials.get_application_default())

In [None]:
# Install libraries not local to Colab
!pip install tensorflow
!pip install tensorflow-gpu
!pip install chinese-converter



In [None]:
# Install all other dependencies
from tensorflow import keras
import pandas as pd
import numpy as np
import os
import glob
import chinese_converter
from PIL import Image
import matplotlib.pyplot as plt
from timeit import default_timer

In [None]:
# Fetch standardized title and spectrogram data of a target song
def fetch_target(target_title, mel_path, direction):
  target_title = target_title
  if direction == 0:
    target_title = chinese_converter.to_simplified(target_title)
  mel_path = mel_path
  raw_files = os.listdir(mel_path)

  # Convert titles to simplified Mandarin to match substring in strings for Mandarin songs
  if direction == 0:
    target_mel = []
    target_titles = []
    for r in raw_files:
      if target_title.lower() in chinese_converter.to_simplified(r).lower():
        target_mel.append(r)
        target_titles.append(target_title+r[-10:])
        if len(target_mel) == 5:
          break

  # Convert titles to lowercase to match substring in strings for English songs
  if direction == 1:
    target_mel = []
    target_titles = []
    for r in raw_files:
      if target_title.lower() in r.lower():
        target_mel.append(r)
        target_titles.append(target_title+r[-10:])
        if len(target_mel) == 5:
          break

  # Need to sort file names to ensure indexing is correct later in the stack
  target_mel.sort()
  target_titles.sort()

  # Convert raw spectrogram into input data
  target_data = []
  for m in target_mel:
    to_append = np.asarray(Image.open(os.path.join(mel_path, m)).convert('L').resize((200, 200)))
    target_data.append(to_append.reshape((200, 200, 1)))
  
  target_data = np.asarray(target_data)/255.0
  return (target_titles, target_data)


In [None]:
# Fetch standardized titles and spectrogram data for all comparison songs in the opposite language of the target song
def fetch_comparison(mel_path, csv_path, direction):
  # Get standardized English or Mandarin titles based on recommendation direction
  csv_path = csv_path
  df_standard = pd.read_csv(csv_path)
  standard_titles = df_standard['Song Title']

  # Raw Youtube titles
  raw_titles = os.listdir(mel_path)
  raw_titles.sort()

  # Filter for missing files and standardize names
  # Simulatenously collect spectrogram data for files in same index
  comparison_titles = []
  comparison_data = []

  # Convert titles to lowercase to match substring in strings for English songs
  if direction == 0:
    for r in raw_titles:
      for s in standard_titles:
        if s.lower() in r.lower():
          comparison_titles.append(s+r[-10:])
          spec_data = np.asarray(Image.open(os.path.join(mel_path, r)).convert('L').resize((200, 200)))
          comparison_data.append(spec_data.reshape((200, 200, 1)))
          break

  # Convert titles to simplified Mandarin to match substring in strings for Mandarin songs
  if direction == 1:
    for r in raw_titles:
      for s in standard_titles:
        s = chinese_converter.to_simplified(s).lower()
        if s in chinese_converter.to_simplified(r).lower():
          comparison_titles.append(s+r[-10:])
          spec_data = np.asarray(Image.open(os.path.join(mel_path, r)).convert('L').resize((200, 200)))
          comparison_data.append(spec_data.reshape((200, 200, 1)))
          break
  
  comparison_data = np.asarray(comparison_data)/255.0
  
  return (comparison_titles, comparison_data)
    

In [None]:
def similarities_matrix(snn_path, mandarin_mel_path, english_mel_path, csv_path, target_title, direction):

  """
  Function that takes in a trained siamese neural network, a target song, and the direction in which we are calculating similarities: i.e. Mandarin >> English

  The function outputs a dictionary with song titles as keys and similarity scores are values
  
  Target_title should be in string form

  Direction is a 0/1 boolean value (0 = Mandarin >> English, 1 = English >> Mandarin)

  CSV file path is that of the comparison titles
  """
  
  snn = keras.models.load_model(snn_path)
  target_title = target_title
  direction = direction

  # Identify path to mel spectrograms
  mandarin_mel_path = mandarin_mel_path
  english_mel_path = english_mel_path

  if direction == 0:
    target_title, target_data = fetch_target(target_title, mandarin_mel_path, direction=direction)
    comparison_title, comparison_data = fetch_comparison(english_mel_path, csv_path, direction=direction)

  else:
    target_title, target_data = fetch_target(target_title, mandarin_mel_path, direction=direction)
    comparison_title, comparison_data = fetch_comparison(english_mel_path, csv_path, direction=direction)

  # Pre-load SNN input with zero arrays
  # to_predict[0] is the target song's spectrogram data
  # to_predict[1] is a list of len(comparison_data) where each element is the spectogram data for all songs in the other language
  to_predict = []
  to_predict.append(np.zeros((len(comparison_data), 200, 200, 1)))
  to_predict.append(np.zeros((len(comparison_data), 200, 200, 1)))

  results = []

  # Calcualte model predictions of each spectrogram (5 total) for the target song across all target data
  for n in range(5):
    for i in range(len(comparison_data)):
      to_predict[0][i] = target_data[n]
      to_predict[1][i] = comparison_data[i]
    
    result = snn.predict(to_predict)
    result = result.flatten().tolist()
    results.append(result)
  
  results_matrix = np.matrix(results)

  return [results_matrix, target_title, target_data, comparison_title, comparison_data]


In [None]:
def generate_heatmap(mat, n_row=5, n_col=40, save_path=None):

  """
  Creates a heat map visualization given a Tinger similarities matrix

  Can select the number of rows and columns to create a slice of the matrix
  
  Default (recommended) values are N_ROWS=5 and N_COL=80
  """
  
  extract = mat[0:n_row,0:n_col]

  plt.figure(figsize=(40, 20))
  plt.imshow(extract, cmap='hot', interpolation='nearest')
  plt.title('Mel Spectrogram Similarities Matrix')
  plt.ylabel('Target Song Slice #')

  plt.show()

  if save_path == None:
    return
  else:
    plt.savefig(save_path)
    return


In [None]:
def similarities_dict(sim_mat, comparison_titles):

  """
  Takes in a similarities matrix (see similarities_matrix() above) along with the list of all song titles used to compare to target input song.

  Calculates the average of all N similarity measures (where N = number of spectrograms per song) to arrive at the final similarity score per song.

  Output a dictionary with comparison song titles as keys and their similarity scores as values.
  """
  
  mat = sim_mat
  comparison_titles = comparison_titles
  n_songs = sim_mat.shape[1] / 5

  key = []
  val = []
  index = 0
  for i in range(int(n_songs)):
    key.append(comparison_titles[index][:-10])
    to_append = mat[0:5,index:index+4]
    val.append(to_append.mean())
    index += 5
  
  result = dict(zip(key, val))
  return result