In [1]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [2]:
import pandas as pd
import numpy as np
from gensim.models import Word2Vec
from sklearn.metrics.pairwise import cosine_similarity
import matplotlib.pyplot as plt

In [27]:
class RecommenderSystem:
  def __init__(self, song_sentence_model, lyric_model, data):
    self.song_sentence_model = song_sentence_model
    self.lyric_model = lyric_model
    self.data = data

  def initial_recommendation(self, user_tags, num_recommendations=10):
    matching_songs = self.data[self.data['NewTag'].apply(lambda tags: any(tag in tags for tag in user_tags))]
    matching_songs['tagmatches'] = matching_songs['NewTag'].apply(lambda tags: sum(tag in tags for tag in user_tags))
    matching_songs = matching_songs.sort_values(by='tagmatches', ascending=False)
    return matching_songs[:num_recommendations]

  def vectorize_song(self,song_id):
    info = self.data[self.data['SongID'] == song_id]['song_sentence'].iloc[0].split()
    vec = np.mean([self.song_sentence_model.wv[word] for word in info if word in self.song_sentence_model.wv.index_to_key], axis=0)
    return vec

  def recommend_songs_based_song_sentence(self, playlist, num_recommendations=20):
    playlist_song_vectors = np.array([self.vectorize_song(song) for song in playlist['SongID']])
    centroid_vector = np.mean(playlist_song_vectors, axis=0).reshape(1, -1)

    song_ids = self.data['SongID'].tolist()
    song_vectors = np.array([self.vectorize_song(song_id) for song_id in song_ids])
    similarities = cosine_similarity(centroid_vector, song_vectors)

    recommended_song_indices = similarities[0].argsort()[-num_recommendations:][::-1]
    recommended_songs = self.data.iloc[recommended_song_indices]

    exclude_song_ids = playlist['SongID'].tolist()
    recommended_songs = recommended_songs[~recommended_songs['SongID'].isin(exclude_song_ids)]
    if len(recommended_songs) < num_recommendations:
        more_recommendations = num_recommendations - len(recommended_songs)
        next_best_song_indices = [idx for idx in similarities[0].argsort() if idx not in recommended_song_indices][:more_recommendations]
        more_recommended_songs = self.data.iloc[next_best_song_indices]
        more_recommended_songs = more_recommended_songs[~more_recommended_songs['SongID'].isin(exclude_song_ids)]
        recommended_songs = pd.concat([recommended_songs, more_recommended_songs])
    return recommended_songs[:num_recommendations]

  def vectorize_lyrics(self,lyric):
    words = lyric.split()
    vectors = [self.lyric_model.wv[word] for word in words if word in self.lyric_model.wv.index_to_key]
    if vectors:
        return np.mean(vectors, axis=0)
    else:
        return np.zeros(self.lyric_model.vector_size)

  def lyric_processing(self, df):
    New_lyrics = []
    for lyric in df['Lyrics']:
      aa = lyric.replace("\n", " ")
      New_lyrics.append(aa)
    df['New_lyrics'] = New_lyrics

  def recommend_songs_based_lyric(self, playlist, num_recommendations=20):
    self.lyric_processing(playlist)
    user_playlist_vectors = playlist['New_lyrics'].apply(lambda x: self.vectorize_lyrics(x)).tolist()
    mean_user_vector = np.mean(user_playlist_vectors, axis=0)

    self.data['vector'] = self.data['New_lyrics'].apply(lambda x: self.vectorize_lyrics(x))

    # Calculate cosine similarity between the mean user vector and all song vectors
    self.data['similarity'] = self.data['vector'].apply(lambda x: cosine_similarity([x], [mean_user_vector])[0][0])

    # Exclude songs that are already in the user playlist
    playlist_song_ids = playlist['SongID'].tolist()
    recommended_songs = self.data[~self.data['SongID'].isin(playlist_song_ids)]

    # Sort the songs by similarity to recommend
    recommended_songs = recommended_songs.sort_values(by='similarity', ascending=False).head(num_recommendations)
    return recommended_songs

  def personalized_recommendation(self, user_playlist, num_recommendations=10):
    return self.recommend_songs_based_song_sentence(user_playlist), self.recommend_songs_based_lyric(user_playlist)




In [5]:
song_list = "/content/drive/MyDrive/Github/RecommenderSystem/data/final_songs.csv"
playlist_name = "/content/drive/MyDrive/Github/RecommenderSystem/data/playlists/playlist_1.csv"
song_db = pd.read_csv(song_list, encoding='utf-8')
playlist = pd.read_csv(playlist_name, encoding='utf-8')
song_sentence_model = Word2Vec.load("/content/drive/MyDrive/Github/RecommenderSystem/model/song_sentence_model.w2v")
lyric_model = Word2Vec.load("/content/drive/MyDrive/Github/RecommenderSystem/model/lyric_model.w2v")


In [28]:
recommender = RecommenderSystem(song_sentence_model=song_sentence_model, lyric_model=lyric_model, data=song_db)

In [34]:
user_tags = ["#새벽", "#휴식", "명곡"]
initial_playlist = recommender.initial_recommendation(user_tags=user_tags)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  matching_songs['tagmatches'] = matching_songs['NewTag'].apply(lambda tags: sum(tag in tags for tag in user_tags))


In [35]:
initial_playlist[['SongID', 'Title', 'Singer', 'Genre']]

Unnamed: 0,SongID,Title,Singer,Genre
2,188668,안녕,김창완,"포크/블루스, 국내드라마"
68,32651484,내 인생은 영화가 아니니깐,SAGA,"인디음악, 록/메탈"
127,35411219,살아내기,김현창,"인디음악, 포크/블루스"
119,35003354,밤의 창가에서,신지훈,"발라드, 인디음악"
102,33742930,별,Mingginyu (밍기뉴),"발라드, 인디음악"
97,33483664,love song,이예린,"인디음악, 포크/블루스"
94,33412196,그대는 은하가 되어요,초승 (CHOSNG),"인디음악, 록/메탈"
92,33340515,Northwest side 48km,이강승,"R&B/Soul, 인디음악"
90,33301174,시가 될 이야기,신지훈,"발라드, 인디음악"
20,8121604,그대가 날 사랑해 준다면,권나무,"인디음악, 포크/블루스"


In [30]:
playlist1, playlist2 = recommender.personalized_recommendation(user_playlist=playlist)

In [32]:
playlist1[['SongID', 'Title', 'Singer', 'Genre']]

Unnamed: 0,SongID,Title,Singer,Genre
91,33334136,FLYING HIGH WITH U,빈첸,랩/힙합
116,34701954,보라색 선,BRAINOFF,"랩/힙합, 인디음악"
22,8271355,"And July (Feat. DEAN, DJ Friz)",헤이즈 (Heize),랩/힙합
40,31562554,이 밤의 끝,펀치 (Punch),R&B/Soul
33,31184764,Slow,SOLE (쏠),R&B/Soul
28,30732377,Boat,죠지,R&B/Soul
23,30038185,어떻게 지내,Crush,R&B/Soul
32,31098679,OSAKA (Feat. ZICO),ELO,R&B/Soul
11,4182990,아모르 파티,김연자,성인가요/트로트
27,30707718,RIDE (Feat. THAMA),SOLE (쏠),R&B/Soul


In [33]:
playlist2[['SongID', 'Title', 'Singer', 'Genre']]

Unnamed: 0,SongID,Title,Singer,Genre
33,31184764,Slow,SOLE (쏠),R&B/Soul
108,34270805,오르트구름,윤하 (YOUNHA),록/메탈
90,33301174,시가 될 이야기,신지훈,"발라드, 인디음악"
119,35003354,밤의 창가에서,신지훈,"발라드, 인디음악"
11,4182990,아모르 파티,김연자,성인가요/트로트
96,33480898,신호등,이무진,록/메탈
166,36613696,I WANT,IVE (아이브),댄스
133,35642015,FLOWER,돕도지 (dopedozy),R&B/Soul
112,34481680,드라마,아이유,발라드
95,33413898,연남동 (Feat. lIlBOI),다운 (Dvwn),R&B/Soul
