# Task 1 Simple Text-based Similarity and Retrieval

### Team E
Version 1.0.1
Date: 14/11/2023

## Import Libraries  

In [1]:
import pandas as pd
import numpy as np

In [2]:
from ret import read_data # utility func to load data
from ret import get_id_from_info # utility func to return id by entering song's info
from ret import display_res # utility func to display results 
from ret import text_based # modularized text based retrieval system
from ret import random_baseline # base line retrieval system that returns random results 
from ret import cos_sim # wrapper function to calculate cosine similarity
from ret import euc_sim # wrapper function to calculate euclidean similarity 

## Load Data

In [3]:
"""
Please put data files in ""./data/" before use 
"""

info = read_data("information")
tfidf = read_data("lyrics_tf-idf")
word2vec = read_data("lyrics_word2vec")
bert = read_data("lyrics_bert")

## Dictionary to Store Results 

In [4]:
"""
Dict to store results:
query format: res["trackTRACKNUMBER"]["REPRESENTATION"]["SIMILARITYFUNCTION"]
sample query: res["track1"]["tfidf"]["cos_sim"]

returns - list[str] 
list containing ids of a retrieved tracks 

After running, use display_res(res["trackTRACKNUMBER"]["REPRESENTATION"]["SIMILARITYFUNCTION"],info) to display results
e.g. display_res(res["track1"]["tfidf"]["cos_sim"],info)

"""
res={
    "track1": 
    {
    "base_line": None,
    "tfidf":
    {
        "cos_sim": None,
        "euc_sim": None
    },
    "word2vec":
    {
        "cos_sim": None,
        "euc_sim": None
    },
    "bert":
    {
        "cos_sim": None,
        "euc_sim": None
    }
    },
    "track2": 
    {
    "base_line": None,
    "tfidf":
    {
        "cos_sim": None,
        "euc_sim": None
    },
    "word2vec":
    {
        "cos_sim": None,
        "euc_sim": None
    },
    "bert":
    {
        "cos_sim": None,
        "euc_sim": None
    }
    },
    "track3": 
    {
    "base_line": None,
    "tfidf":
    {
        "cos_sim": None,
        "euc_sim": None
    },
    "word2vec":
    {
        "cos_sim": None,
        "euc_sim": None
    },
    "bert":
    {
        "cos_sim": None,
        "euc_sim": None
    }
}
}

## Track 1 
"Love Me" "The 1975"

In [5]:
name = "Love Me"
artist = "The 1975"
id_track1 = get_id_from_info(name, artist, info)

In [6]:
res["track1"]["base_line"] = random_baseline(id=id_track1, info=info, N=10)

In [7]:
res["track1"]["tfidf"]["cos_sim"] = text_based(id=id_track1, repr=tfidf, N=10, sim_func=cos_sim)

In [8]:
res["track1"]["word2vec"]["cos_sim"] = text_based(id=id_track1, repr=word2vec, N=10, sim_func=cos_sim)

In [9]:
res["track1"]["bert"]["cos_sim"] = text_based(id=id_track1, repr=bert, N=10, sim_func=cos_sim)

In [10]:
display_res(res["track1"]["tfidf"]["cos_sim"], info)

Name: Oh Yeah                                  Singer: Big Time Rush
Name: The Gospel                               Singer: Alicia Keys
Name: Fire Starter                             Singer: Demi Lovato
Name: Rat Fink                                 Singer: Misfits
Name: How Bad Do You Want It (Oh Yeah)         Singer: Sevyn Streeter
Name: Yeah! (feat. Lil Jon & Ludacris)         Singer: Usher
Name: Regarde-moi                              Singer: Céline Dion
Name: Miss Independent                         Singer: Ne-Yo
Name: Euphoria                                 Singer: BTS
Name: Let There Be Love                        Singer: Simple Minds


In [11]:
display_res(res["track1"]["word2vec"]["cos_sim"], info)

Name: Miss Independent                         Singer: Ne-Yo
Name: If Our Love Is Wrong                     Singer: Calum Scott
Name: Looking For Clues                        Singer: Robert Palmer
Name: Out on the Tiles                         Singer: Led Zeppelin
Name: So Much Love                             Singer: The Rocket Summer
Name: Let There Be Love                        Singer: Simple Minds
Name: In the Evening                           Singer: Led Zeppelin
Name: All You Got                              Singer: Tegan and Sara
Name: Rosalyn                                  Singer: David Bowie
Name: How Bad Do You Want It (Oh Yeah)         Singer: Sevyn Streeter


In [12]:
display_res(res["track1"]["bert"]["cos_sim"], info)

Name: Thug City                                Singer: Dance Gavin Dance
Name: Shine                                    Singer: Take That
Name: One, Two, Three, GO!                     Singer: Belanova
Name: Right There                              Singer: Ariana Grande
Name: Bing Bing                                Singer: Crayon Pop
Name: Come Get It Bae                          Singer: Pharrell Williams
Name: We Made You                              Singer: Eminem
Name: Here I Am                                Singer: Monica
Name: Wannabe                                  Singer: why mona
Name: Edge of the World                        Singer: Faith No More


In [13]:
display_res(res["track1"]["base_line"], info)

Name: Low                                      Singer: Foo Fighters
Name: Take It on the Run                       Singer: REO Speedwagon
Name: Envision                                 Singer: After Forever
Name: The Man Who Built the Moon               Singer: Noel Gallagher's High Flying Birds
Name: Coming Home                              Singer: Avenged Sevenfold
Name: Don't Forget About Me                    Singer: Dusty Springfield
Name: Touch                                    Singer: Bright Eyes
Name: Mi-myo Mi-myo                            Singer: Lovelyz
Name: Life of a Nine                           Singer: Issues
Name: House of Glass                           Singer: Cage the Elephant


## Track 2 

"One" "U2"

In [14]:
name = 'One'
artist = 'U2'
id_track2 = get_id_from_info(name, artist, info)

In [15]:
res["track2"]["tfidf"]["cos_sim"] = text_based(id=id_track2, repr=tfidf, N=10, sim_func=cos_sim)

In [16]:
res["track2"]["word2vec"]["cos_sim"] = text_based(id=id_track2, repr=word2vec, N=10, sim_func=cos_sim)

In [17]:
res["track2"]["bert"]["cos_sim"] = text_based(id=id_track2, repr=bert, N=10, sim_func=cos_sim)

In [18]:
res["track2"]["base_line"] = random_baseline(id=id_track2, info=info, N=10)

In [19]:
display_res(res["track2"]["tfidf"]["cos_sim"], info)

Name: One                                      Singer: Mary J. Blige
Name: One Love (feat. Estelle)                 Singer: David Guetta
Name: Love the One You're With                 Singer: Stephen Stills
Name: One                                      Singer: Alanis Morissette
Name: No One - Acoustic                        Singer: Alicia Keys
Name: One Tribe (Defqon.1 2019 Anthem)         Singer: Phuture Noize
Name: You Can Be the One                       Singer: Late Night Alumni
Name: Rape Me                                  Singer: Nirvana
Name: Palavras No Corpo                        Singer: Gal Costa
Name: No One in the World                      Singer: Anita Baker


In [20]:
display_res(res["track2"]["word2vec"]["cos_sim"], info)

Name: One                                      Singer: Mary J. Blige
Name: One Love (feat. Estelle)                 Singer: David Guetta
Name: Quien Eres Tu (Feat. Trey Songz)         Singer: María José
Name: Dance With The One That Brought You      Singer: Shania Twain
Name: Apocalyptic                              Singer: Halestorm
Name: I Will Survive - Extended Version        Singer: Gloria Gaynor
Name: King For A Day                           Singer: Thompson Twins
Name: Never Let Me Down                        Singer: Kanye West
Name: Fica Mais um Pouco Amor                  Singer: Emicida
Name: I Will Survive                           Singer: Gloria Gaynor


In [21]:
display_res(res["track2"]["bert"]["cos_sim"], info)

Name: One                                      Singer: Mary J. Blige
Name: What About Love                          Singer: Austin Mahone
Name: All of Your Glory                        Singer: Broods
Name: La Tortura (feat. Alejandro Sanz)        Singer: Shakira
Name: Love One Another                         Singer: Cher
Name: Black Lake                               Singer: Björk
Name: El Triste                                Singer: José José
Name: Love Makes the World Go Round            Singer: Ashlee Simpson
Name: Keep It Together                         Singer: Madonna
Name: U Want Me 2                              Singer: Sarah McLachlan


In [22]:
display_res(res["track2"]["base_line"], info)

Name: Forest Serenade                          Singer: The Joy Formidable
Name: Sweet Disposition                        Singer: The Temper Trap
Name: Won't Let Go                             Singer: Black Stone Cherry
Name: Alive                                    Singer: Da Buzz
Name: Drive                                    Singer: Dornik
Name: Ride 'Em on Down                         Singer: The Rolling Stones
Name: Someone Better                           Singer: Juveniles
Name: Steady Rollin' Man                       Singer: Eric Clapton
Name: Peach Boy                                Singer: Jay Som
Name: Breathe                                  Singer: Wolfheart


## Track 3

"Every Christmas" "Kelly Clarkson"

In [32]:
name = 'Every Christmas'
artist = 'Kelly Clarkson'
id_track3 = get_id_from_info(name, artist, info)

In [24]:
res["track3"]["tfidf"]["cos_sim"] = text_based(id=id_track3, repr=tfidf, N=10, sim_func=cos_sim)

In [25]:
res["track3"]["word2vec"]["cos_sim"] = text_based(id=id_track3, repr=word2vec, N=10, sim_func=cos_sim)

In [26]:
res["track3"]["bert"]["cos_sim"] = text_based(id=id_track3, repr=bert, N=10, sim_func=cos_sim)

In [27]:
res["track3"]["base_line"] = random_baseline(id=id_track3, info=info, N=10)

In [28]:
display_res(res["track3"]["tfidf"]["cos_sim"], info)

Name: Christmas Conga                          Singer: Cyndi Lauper
Name: Three Ships                              Singer: Cyndi Lauper
Name: Hellhound On My Trail                    Singer: Robert Johnson
Name: St. Patrick's Day                        Singer: John Mayer
Name: Last Christmas                           Singer: Carly Rae Jepsen
Name: My Only Wish (This Year)                 Singer: Britney Spears
Name: Christmas Vacation                       Singer: Descendents
Name: Last Christmas - Studio Version          Singer: Jimmy Eat World
Name: The Christmas Song (Merry Christmas To You) Singer: Nat King Cole
Name: I Shut Doors and Windows                 Singer: September Malevolence


In [29]:
display_res(res["track3"]["word2vec"]["cos_sim"], info)

Name: St. Patrick's Day                        Singer: John Mayer
Name: Junesong Provision                       Singer: Coheed and Cambria
Name: My Only Wish (This Year)                 Singer: Britney Spears
Name: PERFECT!                                 Singer: WJSN
Name: Si Tu Novio Te Deja Sola                 Singer: J Balvin
Name: So Doggone Lonesome                      Singer: Johnny Cash
Name: The Best Day                             Singer: Taylor Swift
Name: Wait For You                             Singer: Elliott Yamin
Name: Jesus Christ                             Singer: Brand New
Name: Alone (feat. Big Sean & Stefflon Don)    Singer: Halsey


In [30]:
display_res(res["track3"]["bert"]["cos_sim"], info)

Name: My Only Wish (This Year)                 Singer: Britney Spears
Name: Christmas Conga                          Singer: Cyndi Lauper
Name: Merry Christmas, Kiss My Ass             Singer: All Time Low
Name: St. Patrick's Day                        Singer: John Mayer
Name: The Christmas Song (Merry Christmas To You) Singer: Nat King Cole
Name: Last Christmas                           Singer: Carly Rae Jepsen
Name: Next Year                                Singer: Foo Fighters
Name: December's Boudoir                       Singer: Laura Nyro
Name: Last Xmas                                Singer: Allie X
Name: Santa Claus Is Coming To Town            Singer: The Jackson 5


In [31]:
display_res(res["track3"]["base_line"], info)

Name: Esta Noche Sólo Cantan Para Mí           Singer: La casa azul
Name: Just For Now                             Singer: Imogen Heap
Name: Snaggletooth                             Singer: Vance Joy
Name: Don't Complain                           Singer: Everlast
Name: Major System Error                       Singer: Marmozets
Name: Visions                                  Singer: Maroon 5
Name: Young Blindness                          Singer: The Murlocs
Name: Judge Jury and Executioner               Singer: Atoms for Peace
Name: G4L                                      Singer: Rihanna
Name: My Tangerine Dream                       Singer: Wolfmother
