## Imports

In [1]:
import pandas as pd
import numpy as np
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.metrics.pairwise import cosine_similarity

## Helper Functions

In [2]:
def title_from_index(index):
    return df[df.index == index]["Title"].values[0]

def index_from_artist(artist):
    return df[df['Artist'] == artist].index.values.astype(int)[0]



## Mock Data

In [3]:
data = [["You should be sad", "Halsey", "2020", "Pop"], ["Old Town Road", "Lil Nas X", "2019", "Country"], 
        ["Godzilla", "Eminem", "2020", "Rap"], ["Piano Sonata No. 14", "Beethoven", "1801", "Classical"],
       ["Moonshine", "Caravan Palace", "2020", "Electroswing"], ["Sucker", "Jonas Brothers", "2019", "Pop"],
       ["ADHD", "Joyner Lucas", "2019", "Rap"], ["The Real Slim Shady", "Eminem", "2000", "Rap"]]
df = pd.DataFrame(data, columns=['Title', 'Artist', 'Year', 'Genre'])

In [4]:
user_test = "Eminem"

In [5]:
features = ['Title', 'Artist', 'Year', 'Genre']

In [6]:
features

['Title', 'Artist', 'Year', 'Genre']

In [7]:
# Create new column that is combination of features
def row_concat(row):
    return row['Title'] + " " + row['Artist'] + " " + row['Year'] + " " + row["Genre"]

In [8]:
#Test out new function
df['combined row'] = df.apply(row_concat, axis = 1)

In [9]:
# Try out CountVectorizer 
cv = CountVectorizer()
count_matrix = cv.fit_transform(df['combined row'])

In [10]:
#Find similarity
cosine_simil = cosine_similarity(count_matrix)

In [11]:
#Use helper function to find index of title.
artist_index = index_from_artist(user_test)

In [12]:
#Get a list of similar
similar_songs =  list(enumerate(cosine_simil[artist_index]))

In [13]:
#Sort list
sorted_similar_songs = sorted(similar_songs,key=lambda x:x[1],reverse=True)

In [14]:
## Step 8: Print titles of first 50 movies
i=0
for song in sorted_similar_songs:
        print(title_from_index(song[0]))
        i=i+1
        if i>50:
            break

Godzilla
The Real Slim Shady
Moonshine
ADHD
You should be sad
Old Town Road
Piano Sonata No. 14
Sucker
