In [1]:
from sklearn.metrics.pairwise import cosine_similarity
import numpy as np



In [120]:
def sklearn_cosine(x: np.array, y: np.array) -> int:
    """is a function that takes in two arguments, x and y, 
    and returns the cosine similarity between them as calculated by 
    the cosine_similarity function from the scikit-learn library.

    :param x: first vector
    :type x: np.array 
    :param y: second vector
    :type y: np.array
    :return: scalar similarity value 
    :rtype: int
    """
    return cosine_similarity([x], [y])


def get_similar(song_to_compare: dict, songs_to_compare_to: dict, top_n_simlar: int=3) -> list:
    """This function takes in three parameters: song_to_compare, songs_to_compare_to, and top_n_similar, 
    and returns a list of top n most similar songs based on the cosine similarity score between their vectorized lyrics.
    The function first calculates the cosine similarity score between the vectorized lyric of song_to_compare and each of 
    the songs in songs_to_compare_to using the sklearn_cosine function. 
    It then filters out any scores equal to 1 (which would mean that the same song was found in songs_to_compare_to). 
    The indexes of the top n scores are then found and the corresponding song information (song name and artist name) 
    is returned in the form of a list.

    :param song_to_compare: lyric to find similar songs for
    :type song_to_compare: dict
    :param songs_to_compare_to: lyrics of songs to compare to song_to_compare
    :type songs_to_compare_to: dict
    :param top_n_simlar: number of most similar songs to return
    :type top_n_simlar: int
    :return: list of top n most similar songs containing song name and artist name
    :rtype: list
    """
    cosine_similarity_scores = []

    # calculate similarity score for each passed vectorized lyrics with gold_song
    for key, value in songs_to_compare_to.items():
        # calculate similarity score
        similarity_score = sklearn_cosine(song_to_compare["Vectorized_lyric"], value["Vectorized_lyric"])[0][0]
        # filter similarity score = 1
        # this would mean, that somehow, the same song was found in the songs to compare
        if similarity_score == 1:
            similarity_score = 0
        # Add score to list of all scores
        cosine_similarity_scores.append(similarity_score)
    cosine_similarity_scores = np.array(cosine_similarity_scores)

    # get indexes of top n values
    indexes_top_n = np.argsort(cosine_similarity_scores)[::-1][:top_n_simlar]
    # get top n dictionary keys
    top_n_keys = np.array(list(songs_to_compare_to))[indexes_top_n]
    # get top n dict entries
    top_n_dict = {key: songs_to_compare_to[key] for key in top_n_keys}
    # remove lyrics from dict
    top_n_songs_no_lyrics = {key: {"Song": value["Song"], "Artist": value["Artist"]} for key, value in top_n_dict.items()}
    
    return top_n_songs_no_lyrics


In [113]:
a = np.random.rand(100)
b = np.random.rand(100)
c = np.random.rand(100)
d = np.random.rand(100)
e = np.random.rand(100)

In [121]:
song_to_compare = {
    "Song": "Chanedlier",
    "Artist": "Sia",
    "Vectorized_lyric": a
}

all_songs_to_compare_to = {
    "Like Toy Soldiers_Eminem":
    {
        "Song": "Like Toy Soldiers",
        "Artist": "Eminem",
        "Vectorized_lyric": b
    },
    "Ass Like That_Eminem": 
    {
        "Song": "Ass Like That",
        "Artist": "Eminem",
        "Vectorized_lyric": c
    },
    "More Ass Like That_Eminem":
    {
        "Song": "More Ass Like That",
        "Artist": "Eminem",
        "Vectorized_lyric": d
    },
    "Chandelier_Sia":
    {
        "Song": "Chanedlier",
        "Artist": "Sia",
        "Vectorized_lyric": a
    },
}

top_n_songs_no_lyrics = get_similar(song_to_compare, all_songs_to_compare_to)

In [122]:
for item, value in top_n_songs_no_lyrics.items():
    print(item, value)

Like Toy Soldiers_Eminem {'Song': 'Like Toy Soldiers', 'Artist': 'Eminem'}
Ass Like That_Eminem {'Song': 'Ass Like That', 'Artist': 'Eminem'}
More Ass Like That_Eminem {'Song': 'More Ass Like That', 'Artist': 'Eminem'}
