## Recommendation System

In [1]:
import firebase_admin
from firebase_admin import db,credentials
import main as recommender
import pandas as pd
from random import sample
cred = credentials.Certificate("serviceAccountKey.json")
firebase_admin.initialize_app(cred, {'databaseURL': 'https://fir-demo-29d5b-default-rtdb.firebaseio.com'})

<firebase_admin.App at 0x17bb9dce130>

### Get data from Firebase Realtime DB

In [2]:
ref = db.reference('/')
data = ref.child('songs').order_by_child('id').get()
songs = []
for key in data:
    songs.append(data[key])

In [3]:
songs_df = pd.DataFrame(songs).dropna().drop_duplicates(['id'])

song_stat_df = songs_df.sample(5,ignore_index=True)
song_stat_df.drop(columns=['320kbps','album_id' ,'duration','has_lyrics' , 'image','perma_url', 'url', 'release_date', 'title' ,'album','language' , 'subtitle','year'],inplace=True) #drop all of unnecessary columns 
# to convert string into list of strings 
def spliting(text):
  text=str(text).split()
  return text
song_stat_df['genre'] = song_stat_df['genre'].apply(spliting)
song_stat_df['artist'] = song_stat_df['artist'].apply(spliting)
song_stat_df['all_tags'] = song_stat_df['artist'] + song_stat_df['genre'] 
song_stat_df.drop(columns=['genre','artist'],inplace=True)
song_stat_df

Unnamed: 0,id,all_tags
0,jgHYgDFD0xo,"[Lana, Del, Rey, YouTube]"
1,tSEgZC7zASQ,"[Hamlet, Trương, YouTube]"
2,HOLSM3HkKPs,"[YOUNG, STONER, LIFE, RECORDS, YouTube]"
3,gW9Cgg4qg-U,"[KhangProFilm, YouTube]"
4,-GQg25oP0S4,"[HYBE, LABELS, YouTube]"


In [4]:
def convert_lower(text):
  l=[]
  for item in text:
    l.append(item.lower())
  return l
song_stat_df['all_tags']=song_stat_df['all_tags'].apply(convert_lower)
song_stat_df

Unnamed: 0,id,all_tags
0,jgHYgDFD0xo,"[lana, del, rey, youtube]"
1,tSEgZC7zASQ,"[hamlet, trương, youtube]"
2,HOLSM3HkKPs,"[young, stoner, life, records, youtube]"
3,gW9Cgg4qg-U,"[khangprofilm, youtube]"
4,-GQg25oP0S4,"[hybe, labels, youtube]"


In [5]:
from nltk.stem.porter import PorterStemmer
ps=PorterStemmer()
def steming(text):
  l=[]
  for i in text:
    l.append(ps.stem(i))
  return l
song_stat_df['all_tags'] = song_stat_df['all_tags'].apply(steming)
song_stat_df['all_tags'] = song_stat_df['all_tags'].apply(lambda x: " ".join(x))
song_stat_df['all_tags']

0                lana del rey youtub
1               hamlet trương youtub
2    young stoner life record youtub
3                khangprofilm youtub
4                  hybe label youtub
Name: all_tags, dtype: object

In [6]:
from sklearn.feature_extraction.text import TfidfVectorizer
vectorizer = TfidfVectorizer()
all_tags = song_stat_df['all_tags']
data = vectorizer.fit_transform(all_tags)
print("Unique vocabulary: ", vectorizer.vocabulary_)
data.toarray()

Unique vocabulary:  {'lana': 5, 'del': 0, 'rey': 8, 'youtub': 12, 'hamlet': 1, 'trương': 10, 'young': 11, 'stoner': 9, 'life': 6, 'record': 7, 'khangprofilm': 3, 'hybe': 2, 'label': 4}


array([[0.55666851, 0.        , 0.        , 0.        , 0.        ,
        0.55666851, 0.        , 0.        , 0.55666851, 0.        ,
        0.        , 0.        , 0.26525553],
       [0.        , 0.67009179, 0.        , 0.        , 0.        ,
        0.        , 0.        , 0.        , 0.        , 0.        ,
        0.67009179, 0.        , 0.31930233],
       [0.        , 0.        , 0.        , 0.        , 0.        ,
        0.        , 0.48638585, 0.48638585, 0.        , 0.48638585,
        0.        , 0.48638585, 0.23176546],
       [0.        , 0.        , 0.        , 0.90275015, 0.        ,
        0.        , 0.        , 0.        , 0.        , 0.        ,
        0.        , 0.        , 0.43016528],
       [0.        , 0.        , 0.67009179, 0.        , 0.67009179,
        0.        , 0.        , 0.        , 0.        , 0.        ,
        0.        , 0.        , 0.31930233]])

In [7]:
from sklearn.metrics.pairwise import cosine_similarity
similarity = cosine_similarity(data)
similarity

array([[1.        , 0.08469671, 0.06147707, 0.11410372, 0.08469671],
       [0.08469671, 1.        , 0.07400325, 0.13735278, 0.10195398],
       [0.06147707, 0.07400325, 1.        , 0.09969746, 0.07400325],
       [0.11410372, 0.13735278, 0.09969746, 1.        , 0.13735278],
       [0.08469671, 0.10195398, 0.07400325, 0.13735278, 1.        ]])

In [8]:
def recommend(song):
    index = song_stat_df[song_stat_df['id'] == song].index.values[0]
    distances = sorted(list(enumerate(similarity[index])),reverse=True,key = lambda x: x[1])
    list_id = []
    for i in distances:
        if len(list_id) > 20:
            break
        id = song_stat_df.iloc[i[0]].id
        if id not in list_id:
            list_id.append(id)
    columns = ['320kbps','album','album_id','artist','duration','genre','has_lyrics','id','image','language','perma_url','release_date','subtitle','title','url','year']
    list_result = pd.DataFrame(columns= columns)
    for id in list_id:
        result = songs_df.query('id == @id')
        list_result = pd.concat([list_result, result], ignore_index=True).drop_duplicates(['id'])
    list_result.drop(columns=['320kbps','album_id' ,'duration','has_lyrics' , 'image','perma_url', 'url', 'release_date', 'genre' ,'album','language' , 'subtitle','year'],inplace=True)
    return list_result
    
recommend("jgHYgDFD0xo")

Unnamed: 0,artist,id,title
0,Lana Del Rey,jgHYgDFD0xo,Summertime Sadness (Lana Del Rey Vs. Cedric Ge...
1,KhangProFilm,gW9Cgg4qg-U,LÀM DÂU MÁ ANH ĐI - OFFICIAL MV | LÂM CHẤN KHA...
2,Hamlet Trương,tSEgZC7zASQ,Yêu Đi Rồi Khóc (Love then tears) | Hamlet Trư...
3,HYBE LABELS,-GQg25oP0S4,SEVENTEEN (세븐틴) '손오공' Official MV
4,YOUNG STONER LIFE RECORDS,HOLSM3HkKPs,Karlae - I Like (feat. Coi Leray) [Official Vi...


## API

In [None]:
import service as sv
self_sv = sv.firebase_service()
self_sv.get_data_and_preprocessing()

In [None]:
self_sv.random_update_stats()