<a href="https://colab.research.google.com/github/11doris/jazz-maestro/blob/colab_word_embeddings/recommender_score.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Sections as Input

In [1]:
!pip uninstall gensim -y

Found existing installation: gensim 3.6.0
Uninstalling gensim-3.6.0:
  Successfully uninstalled gensim-3.6.0


In [2]:
!pip install gensim

Collecting gensim
  Downloading gensim-4.1.2-cp37-cp37m-manylinux_2_12_x86_64.manylinux2010_x86_64.whl (24.1 MB)
[K     |████████████████████████████████| 24.1 MB 2.8 kB/s 
Installing collected packages: gensim
Successfully installed gensim-4.1.2


In [1]:
import gensim
from sklearn.manifold import TSNE
import matplotlib.pyplot as plt
import pprint
import pandas as pd
import numpy as np
from collections import Counter
import plotly.express as px
from tqdm import tqdm 
from gensim.models.doc2vec import Doc2Vec
import pickle
import os

In [2]:
import logging
logging.basicConfig(format='%(asctime)s : %(levelname)s : %(message)s', level=logging.INFO)

In [3]:
print(gensim.__version__)

4.1.2


# Initialization

## Download the Data

### Basic Plus Chords
M7 and 6 reduced to major triad, m7 reduced to m, dominant 7, m7b5, diminished, and all (b5) left as they are.

In [4]:
!wget --no-check-certificate 'https://docs.google.com/uc?export=download&id=17djlZRWFSUCviOxRTOF-nwbbZqU5gUP9' -O data.csv

--2021-11-19 16:40:25--  https://docs.google.com/uc?export=download&id=17djlZRWFSUCviOxRTOF-nwbbZqU5gUP9
Resolving docs.google.com (docs.google.com)... 74.125.70.101, 74.125.70.138, 74.125.70.139, ...
Connecting to docs.google.com (docs.google.com)|74.125.70.101|:443... connected.
HTTP request sent, awaiting response... 302 Moved Temporarily
Location: https://doc-0s-4c-docs.googleusercontent.com/docs/securesc/ha0ro937gcuc7l7deffksulhg5h7mbp1/j5pe1gtod62ia97p1dglmm9b16hbijv7/1637340000000/14329102864480165501/*/17djlZRWFSUCviOxRTOF-nwbbZqU5gUP9?e=download [following]
--2021-11-19 16:40:26--  https://doc-0s-4c-docs.googleusercontent.com/docs/securesc/ha0ro937gcuc7l7deffksulhg5h7mbp1/j5pe1gtod62ia97p1dglmm9b16hbijv7/1637340000000/14329102864480165501/*/17djlZRWFSUCviOxRTOF-nwbbZqU5gUP9?e=download
Resolving doc-0s-4c-docs.googleusercontent.com (doc-0s-4c-docs.googleusercontent.com)... 108.177.111.132, 2607:f8b0:4001:c07::84
Connecting to doc-0s-4c-docs.googleusercontent.com (doc-0s-4c-docs

### Read Chords Input Data

In [5]:
path_to_file = '/content/data.csv'
df = pd.read_csv(path_to_file, sep='\t', index_col="id")
df = df.reset_index()
df.head(5)

Unnamed: 0,id,file_name,title,tune_mode,tune_id,section_name,section_id,chords
0,0,dataset/jazz1350/26-2.xml,26-2,major,0,A,1,C Eb7 G# B7 E G7 Gm C7 F G#7 C# E7 Am D7 Dm G7
1,1,dataset/jazz1350/26-2.xml,26-2,major,0,A,2,C Eb7 G# B7 E G7 Gm C7 F Eb7 G# B7 E G7 C
2,2,dataset/jazz1350/26-2.xml,26-2,major,0,B,3,Gm C7 Bm E7 A C7 F Bbm Eb7 G# Dm G7
3,3,dataset/jazz1350/26-2.xml,26-2,major,0,A,4,C Eb7 G# B7 E G7 Gm C7 F Eb7 G# B7 E G7 C
4,4,dataset/jazz1350/500 Miles High.xml,500 Miles High,minor,1,,0,Am Am Cm Cm Eb Eb Em7b5 A7 Dm Dm Bm7b5 Bm7b5 B...


### Meta Data

In [6]:
titles = df.loc[:, ['id', 'tune_id', 'section_id', 'section_name', 'title', 'tune_mode']]
titles[:10]

Unnamed: 0,id,tune_id,section_id,section_name,title,tune_mode
0,0,0,1,A,26-2,major
1,1,0,2,A,26-2,major
2,2,0,3,B,26-2,major
3,3,0,4,A,26-2,major
4,4,1,0,,500 Miles High,minor
5,5,2,0,,502 Blues,minor
6,6,3,1,A,52nd Street Theme,major
7,7,3,2,A,52nd Street Theme,major
8,8,3,3,B,52nd Street Theme,major
9,9,3,4,A,52nd Street Theme,major


In [7]:
titles_dict = titles.to_dict()

sectionid_to_title = titles_dict['title']
sectionid_to_title_id = titles_dict['tune_id']


In [8]:
titles_rows = titles.to_dict(orient='records')
sectionid_to_section = []
for i, row in enumerate(titles_rows):
  name = f"{row['title']}, section{row['section_id']} ({row['section_name']})"
  sectionid_to_section.append(name)
  

In [9]:
title_to_sectionid = {}

for row in titles.iterrows():
  title = row[1]['title']
  if title not in title_to_sectionid:
    title_to_sectionid[title] = [row[1]['id']]
  else:
    title_to_sectionid[title].append(row[1]['id'])

## Helpers for N-Grams

In [10]:
def ngrams(tokens, n=2, sep='-'):
    return [sep.join(ngram) for ngram in zip(*[tokens[i:] for i in range(n)])]

# TF-IDF

In [11]:
from gensim import corpora
from gensim import similarities


In [12]:
ngrams_for_input = [1, 3]

In [13]:
lines = df.loc[:, 'chords'].tolist()
data = [line.split(' ') for line in lines]

processed_corpus = []
for line in data:
  tune_n = []
  for n in ngrams_for_input:
    tune_n.extend(ngrams(line, n=n))
  processed_corpus.append(tune_n)

for line in processed_corpus[:5]:
  print(line)

['C', 'Eb7', 'G#', 'B7', 'E', 'G7', 'Gm', 'C7', 'F', 'G#7', 'C#', 'E7', 'Am', 'D7', 'Dm', 'G7', 'C-Eb7-G#', 'Eb7-G#-B7', 'G#-B7-E', 'B7-E-G7', 'E-G7-Gm', 'G7-Gm-C7', 'Gm-C7-F', 'C7-F-G#7', 'F-G#7-C#', 'G#7-C#-E7', 'C#-E7-Am', 'E7-Am-D7', 'Am-D7-Dm', 'D7-Dm-G7']
['C', 'Eb7', 'G#', 'B7', 'E', 'G7', 'Gm', 'C7', 'F', 'Eb7', 'G#', 'B7', 'E', 'G7', 'C', 'C-Eb7-G#', 'Eb7-G#-B7', 'G#-B7-E', 'B7-E-G7', 'E-G7-Gm', 'G7-Gm-C7', 'Gm-C7-F', 'C7-F-Eb7', 'F-Eb7-G#', 'Eb7-G#-B7', 'G#-B7-E', 'B7-E-G7', 'E-G7-C']
['Gm', 'C7', 'Bm', 'E7', 'A', 'C7', 'F', 'Bbm', 'Eb7', 'G#', 'Dm', 'G7', 'Gm-C7-Bm', 'C7-Bm-E7', 'Bm-E7-A', 'E7-A-C7', 'A-C7-F', 'C7-F-Bbm', 'F-Bbm-Eb7', 'Bbm-Eb7-G#', 'Eb7-G#-Dm', 'G#-Dm-G7']
['C', 'Eb7', 'G#', 'B7', 'E', 'G7', 'Gm', 'C7', 'F', 'Eb7', 'G#', 'B7', 'E', 'G7', 'C', 'C-Eb7-G#', 'Eb7-G#-B7', 'G#-B7-E', 'B7-E-G7', 'E-G7-Gm', 'G7-Gm-C7', 'Gm-C7-F', 'C7-F-Eb7', 'F-Eb7-G#', 'Eb7-G#-B7', 'G#-B7-E', 'B7-E-G7', 'E-G7-C']
['Am', 'Am', 'Cm', 'Cm', 'Eb', 'Eb', 'Em7b5', 'A7', 'Dm', 'Dm', 'Bm7b

In [14]:
dictionary = corpora.Dictionary(processed_corpus)

2021-11-19 16:40:31,976 : INFO : adding document #0 to Dictionary(0 unique tokens: [])
2021-11-19 16:40:32,241 : INFO : built Dictionary(8375 unique tokens: ['Am', 'Am-D7-Dm', 'B7', 'B7-E-G7', 'C']...) from 5403 documents (total 154774 corpus positions)
2021-11-19 16:40:32,243 : INFO : Dictionary lifecycle event {'msg': "built Dictionary(8375 unique tokens: ['Am', 'Am-D7-Dm', 'B7', 'B7-E-G7', 'C']...) from 5403 documents (total 154774 corpus positions)", 'datetime': '2021-11-19T16:40:32.243777', 'gensim': '4.1.2', 'python': '3.7.12 (default, Sep 10 2021, 00:21:48) \n[GCC 7.5.0]', 'platform': 'Linux-5.4.104+-x86_64-with-Ubuntu-18.04-bionic', 'event': 'created'}


In [15]:
# dictionary.token2id

In [16]:
bow_corpus = [dictionary.doc2bow(text) for text in processed_corpus]

In [17]:
!rm -R index_tfidf
!mkdir index_tfidf

In [25]:
# Note: SparseMatrixSimilarity crashes due to lacking memory

index_tfidf = similarities.Similarity('/content/index_tfidf', bow_corpus, num_features=len(dictionary))

2021-11-19 16:41:51,195 : INFO : starting similarity index under /content/index_tfidf


## Test for single tunes

In [26]:
def get_sim_scores(tunes, index):

    df_sim = pd.DataFrame(columns=['reference_title',
                                  'similar_title',
                                  'ref_section', 
                                  'similar_section', 
                                  'score', 
                                  ])

    for tune in tunes:
      print()
      print("-"*50)
      for s1 in title_to_sectionid[tune]:
      
          query = processed_corpus[s1]
          query_bow = dictionary.doc2bow(query)

          # perform a similarity query against the corpus
          similarities = index[query_bow]
          sims = sorted(enumerate(similarities), key=lambda item: -item[1])
          
          print(s1, sectionid_to_section[s1])
          n = 0
          for s2, s2_score in sims:
            
            # store the top N best results
            if n > 40:
                break
            # don't count self-similarity between sections of the same tune
            if s2 not in title_to_sectionid[tune]:
                n += 1
            
                # print(f"\t{s2_score:.3f} {sectionid_to_section[s2]}")

                df_sim.loc[len(df_sim)] = [tune,
                                sectionid_to_title[s2],
                                sectionid_to_section[s1], 
                                sectionid_to_section[s2], 
                                s2_score, 
                                ]
    return df_sim

In [27]:
tunes_eval_list = [
  'Sweet Sue, Just You',
  'These Foolish Things', 
  'Blue Moon',
  'All Of Me',
  "All God's Chillun Got Rhythm",
  'I Got Rhythm',
  'Bye Bye Blackbird',
  'Old Fashioned Love',
  'Dinah',
  'Honeysuckle Rose',
  'Misty'
]

In [28]:
df_sim = get_sim_scores(tunes_eval_list, index_tfidf)

2021-11-19 16:42:13,644 : INFO : creating sparse index
2021-11-19 16:42:13,646 : INFO : creating sparse matrix from corpus
2021-11-19 16:42:13,648 : INFO : PROGRESS: at document #0/5403



--------------------------------------------------


2021-11-19 16:42:13,969 : INFO : created <5403x8375 sparse matrix of type '<class 'numpy.float32'>'
	with 101771 stored elements in Compressed Sparse Row format>
2021-11-19 16:42:13,971 : INFO : creating sparse shard #0
2021-11-19 16:42:13,973 : INFO : saving index shard to /content/index_tfidf.0
2021-11-19 16:42:13,975 : INFO : SparseMatrixSimilarity lifecycle event {'fname_or_handle': '/content/index_tfidf.0', 'separately': 'None', 'sep_limit': 10485760, 'ignore': frozenset(), 'datetime': '2021-11-19T16:42:13.975042', 'gensim': '4.1.2', 'python': '3.7.12 (default, Sep 10 2021, 00:21:48) \n[GCC 7.5.0]', 'platform': 'Linux-5.4.104+-x86_64-with-Ubuntu-18.04-bionic', 'event': 'saving'}
2021-11-19 16:42:13,983 : INFO : saved /content/index_tfidf.0
2021-11-19 16:42:13,985 : INFO : loading SparseMatrixSimilarity object from /content/index_tfidf.0
2021-11-19 16:42:13,988 : INFO : SparseMatrixSimilarity lifecycle event {'fname': '/content/index_tfidf.0', 'datetime': '2021-11-19T16:42:13.98860

3752 Sweet Sue, Just You, section1 (A)
3753 Sweet Sue, Just You, section2 (A)
3754 Sweet Sue, Just You, section3 (B)
3755 Sweet Sue, Just You, section4 (A)

--------------------------------------------------
3866 These Foolish Things, section1 (A)
3867 These Foolish Things, section2 (A)
3868 These Foolish Things, section3 (B)
3869 These Foolish Things, section4 (A)

--------------------------------------------------
496 Blue Moon, section1 (A)
497 Blue Moon, section2 (A)
498 Blue Moon, section3 (B)
499 Blue Moon, section4 (A)

--------------------------------------------------
198 All Of Me, section1 (A)
199 All Of Me, section2 (B)
200 All Of Me, section3 (A)
201 All Of Me, section4 (C)

--------------------------------------------------
188 All God's Chillun Got Rhythm, section1 (A)
189 All God's Chillun Got Rhythm, section2 (B)
190 All God's Chillun Got Rhythm, section3 (A)
191 All God's Chillun Got Rhythm, section4 (C)

--------------------------------------------------
1647 I Got R

In [29]:
import plotly.express as px
fig = px.histogram(df_sim, x="score", nbins=100)
fig.show()

In [30]:
def recommend_tune(df, tune_name):

  df_tune = df.query(f"reference_title == '{tune_name}'")
  ff = df_tune.iloc[:]

  # get the maximum similarity score for each section and store in new column
  ff['max'] = ff.groupby('ref_section')['score'].transform('max')

  # scale the score with the maxmum value of each section
  ff['score_div_max'] = ff['score'] / ff['max']

  # consider only the top N tunes for each group
  ff = ff.sort_values(['ref_section', 'score_div_max'], ascending=[True, False])
  result = ff.groupby('ref_section').head(30)
  
  # if multiple rows from the same similar tune, keep only the similar_title with the highest score_div_max
  result = result.groupby('similar_title').max('score_div_max').sort_values('score_div_max', ascending=False)

  return result, ff

In [31]:
result, details = recommend_tune(df_sim, 'Blue Moon')
result.head(30)

Unnamed: 0_level_0,score,max,score_div_max
similar_title,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
"Touch Of Your Lips, The",0.915782,0.925445,1.0
They All Laughed,0.801193,0.836416,1.0
Mountain Greenery,0.925445,0.925445,1.0
Jeepers Creepers,0.895107,0.925445,0.99726
It Could Happen To You,0.832354,0.836416,0.995144
Fools Rush In,0.887262,0.925445,0.992652
All Through The Day,0.778904,0.786083,0.990867
It's A Lovely Day Today,0.776206,0.786083,0.987435
Long Ago And Far Away,0.908974,0.925445,0.985274
At Last,0.911456,0.925445,0.984884


In [32]:
details.query('ref_section == "These Foolish Things, section3 (B)"').head(10)

Unnamed: 0,reference_title,similar_title,ref_section,similar_section,score,max,score_div_max


# SVD based LSI

In [33]:
from gensim import corpora, models, similarities
from collections import defaultdict

In [34]:
ngrams_for_input = [1, 2]

data_ngrams = []
for line in data:
  tune_n = []
  for n in ngrams_for_input:
    tune_n.extend(ngrams(line, n=n))
  data_ngrams.append(tune_n)

for line in data_ngrams[:5]:
  print(line)

['C', 'Eb7', 'G#', 'B7', 'E', 'G7', 'Gm', 'C7', 'F', 'G#7', 'C#', 'E7', 'Am', 'D7', 'Dm', 'G7', 'C-Eb7', 'Eb7-G#', 'G#-B7', 'B7-E', 'E-G7', 'G7-Gm', 'Gm-C7', 'C7-F', 'F-G#7', 'G#7-C#', 'C#-E7', 'E7-Am', 'Am-D7', 'D7-Dm', 'Dm-G7']
['C', 'Eb7', 'G#', 'B7', 'E', 'G7', 'Gm', 'C7', 'F', 'Eb7', 'G#', 'B7', 'E', 'G7', 'C', 'C-Eb7', 'Eb7-G#', 'G#-B7', 'B7-E', 'E-G7', 'G7-Gm', 'Gm-C7', 'C7-F', 'F-Eb7', 'Eb7-G#', 'G#-B7', 'B7-E', 'E-G7', 'G7-C']
['Gm', 'C7', 'Bm', 'E7', 'A', 'C7', 'F', 'Bbm', 'Eb7', 'G#', 'Dm', 'G7', 'Gm-C7', 'C7-Bm', 'Bm-E7', 'E7-A', 'A-C7', 'C7-F', 'F-Bbm', 'Bbm-Eb7', 'Eb7-G#', 'G#-Dm', 'Dm-G7']
['C', 'Eb7', 'G#', 'B7', 'E', 'G7', 'Gm', 'C7', 'F', 'Eb7', 'G#', 'B7', 'E', 'G7', 'C', 'C-Eb7', 'Eb7-G#', 'G#-B7', 'B7-E', 'E-G7', 'G7-Gm', 'Gm-C7', 'C7-F', 'F-Eb7', 'Eb7-G#', 'G#-B7', 'B7-E', 'E-G7', 'G7-C']
['Am', 'Am', 'Cm', 'Cm', 'Eb', 'Eb', 'Em7b5', 'A7', 'Dm', 'Dm', 'Bm7b5', 'Bm7b5', 'Bbm', 'Bbm', 'Fm', 'Fm', 'E7', 'E7', 'Fm', 'Fm', 'C#', 'C#', 'Fm', 'Fm', 'C#', 'C#', 'Am-Am', '

In [38]:
frequency = defaultdict(int)
for text in data_ngrams:
    for token in text:
        frequency[token] += 1

data_ngrams = [[token for token in text if frequency[token] > 1] for text in data_ngrams]
dictionary = corpora.Dictionary(data_ngrams)

# doc2bow counts the number of occurences of each distinct word,
# converts the word to its integer word id and returns the result
# as a sparse vector

bow_corpus = [dictionary.doc2bow(text) for text in data_ngrams]
lsi = models.LsiModel(bow_corpus, id2word=dictionary, num_topics=200)  # num_topics can be maximum the size of the number of unique tokens

2021-11-19 16:43:43,971 : INFO : adding document #0 to Dictionary(0 unique tokens: [])
2021-11-19 16:43:44,203 : INFO : built Dictionary(1602 unique tokens: ['Am', 'Am-D7', 'B7', 'B7-E', 'C']...) from 5403 documents (total 159684 corpus positions)
2021-11-19 16:43:44,204 : INFO : Dictionary lifecycle event {'msg': "built Dictionary(1602 unique tokens: ['Am', 'Am-D7', 'B7', 'B7-E', 'C']...) from 5403 documents (total 159684 corpus positions)", 'datetime': '2021-11-19T16:43:44.204611', 'gensim': '4.1.2', 'python': '3.7.12 (default, Sep 10 2021, 00:21:48) \n[GCC 7.5.0]', 'platform': 'Linux-5.4.104+-x86_64-with-Ubuntu-18.04-bionic', 'event': 'created'}
2021-11-19 16:43:44,322 : INFO : using serial LSI version on this node
2021-11-19 16:43:44,323 : INFO : updating model with new documents
2021-11-19 16:43:44,325 : INFO : preparing a new chunk of documents
2021-11-19 16:43:44,387 : INFO : using 100 extra samples and 2 power iterations
2021-11-19 16:43:44,394 : INFO : 1st phase: constructing 

In [39]:
!rm -R index_lsi
!mkdir index_lsi

In [40]:
index_lsi = similarities.Similarity('/content/index_lsi', bow_corpus, num_features=len(dictionary))

2021-11-19 16:43:51,527 : INFO : starting similarity index under /content/index_lsi


In [41]:
df_sim = get_sim_scores(tunes_eval_list, index_lsi)

2021-11-19 16:43:56,150 : INFO : creating sparse index
2021-11-19 16:43:56,152 : INFO : creating sparse matrix from corpus
2021-11-19 16:43:56,161 : INFO : PROGRESS: at document #0/5403



--------------------------------------------------


2021-11-19 16:43:56,443 : INFO : created <5403x1602 sparse matrix of type '<class 'numpy.float32'>'
	with 97814 stored elements in Compressed Sparse Row format>
2021-11-19 16:43:56,445 : INFO : creating sparse shard #0
2021-11-19 16:43:56,447 : INFO : saving index shard to /content/index_lsi.0
2021-11-19 16:43:56,450 : INFO : SparseMatrixSimilarity lifecycle event {'fname_or_handle': '/content/index_lsi.0', 'separately': 'None', 'sep_limit': 10485760, 'ignore': frozenset(), 'datetime': '2021-11-19T16:43:56.450235', 'gensim': '4.1.2', 'python': '3.7.12 (default, Sep 10 2021, 00:21:48) \n[GCC 7.5.0]', 'platform': 'Linux-5.4.104+-x86_64-with-Ubuntu-18.04-bionic', 'event': 'saving'}
2021-11-19 16:43:56,456 : INFO : saved /content/index_lsi.0
2021-11-19 16:43:56,458 : INFO : loading SparseMatrixSimilarity object from /content/index_lsi.0
2021-11-19 16:43:56,461 : INFO : SparseMatrixSimilarity lifecycle event {'fname': '/content/index_lsi.0', 'datetime': '2021-11-19T16:43:56.461370', 'gensim

3752 Sweet Sue, Just You, section1 (A)
3753 Sweet Sue, Just You, section2 (A)
3754 Sweet Sue, Just You, section3 (B)
3755 Sweet Sue, Just You, section4 (A)

--------------------------------------------------
3866 These Foolish Things, section1 (A)
3867 These Foolish Things, section2 (A)
3868 These Foolish Things, section3 (B)
3869 These Foolish Things, section4 (A)

--------------------------------------------------
496 Blue Moon, section1 (A)
497 Blue Moon, section2 (A)
498 Blue Moon, section3 (B)
499 Blue Moon, section4 (A)

--------------------------------------------------
198 All Of Me, section1 (A)
199 All Of Me, section2 (B)
200 All Of Me, section3 (A)
201 All Of Me, section4 (C)

--------------------------------------------------
188 All God's Chillun Got Rhythm, section1 (A)
189 All God's Chillun Got Rhythm, section2 (B)
190 All God's Chillun Got Rhythm, section3 (A)
191 All God's Chillun Got Rhythm, section4 (C)

--------------------------------------------------
1647 I Got R

In [42]:
df_sim

Unnamed: 0,reference_title,similar_title,ref_section,similar_section,score
0,"Sweet Sue, Just You",Roll Along Prarie Moon,"Sweet Sue, Just You, section1 (A)","Roll Along Prarie Moon, section1 (A)",0.852803
1,"Sweet Sue, Just You",Roll Along Prarie Moon,"Sweet Sue, Just You, section1 (A)","Roll Along Prarie Moon, section2 (A)",0.852803
2,"Sweet Sue, Just You",You Broke Your Promise,"Sweet Sue, Just You, section1 (A)","You Broke Your Promise, section1 (A)",0.852803
3,"Sweet Sue, Just You","Oh, What A Beautiful Mornin'","Sweet Sue, Just You, section1 (A)","Oh, What A Beautiful Mornin', section4 (B)",0.838041
4,"Sweet Sue, Just You","Breeze And I, The","Sweet Sue, Just You, section1 (A)","Breeze And I, The, section4 (C)",0.836334
...,...,...,...,...,...
2045,Misty,All Too Soon,"Misty, section4 (A)","All Too Soon, section2 (A)",0.745697
2046,Misty,Young At Heart,"Misty, section4 (A)","Young At Heart, section2 (A)",0.745166
2047,Misty,While We're Young,"Misty, section4 (A)","While We're Young, section4 (C)",0.744694
2048,Misty,"Glory Of Love, The","Misty, section4 (A)","Glory Of Love, The, section2 (A)",0.744683


In [43]:
import plotly.express as px
fig = px.histogram(df_sim, x="score", nbins=100)
fig.show()

In [44]:
result, details = recommend_tune(df_sim, 'These Foolish Things')
result.head(30)

Unnamed: 0_level_0,score,max,score_div_max
similar_title,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
More Than You Know,0.731126,0.731126,1.0
All The Way,0.794894,0.794894,1.0
Some Enchanted Evening,0.772427,0.794894,1.0
Dreaming The Hours Away,0.78561,0.794894,0.996915
"Folks Who Live On The Hill, The",0.782072,0.794894,0.99591
"Second Time Around, The",0.774371,0.794894,0.987967
I Don't Want To Miss Mississippi,0.785069,0.794894,0.98764
I Loves You Porgy,0.775315,0.794894,0.98515
"For You, For Me, For Evermore",0.780493,0.794894,0.981883
Fair And Square In Love,0.764861,0.779539,0.981171
