In [1]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [81]:
import pandas as pd
import numpy as np
from gensim.models import Word2Vec
from sklearn.metrics.pairwise import cosine_similarity
import matplotlib.pyplot as plt
import openai
import random

from IPython.display import Image
from IPython.core.display import HTML

openai.api_key = ''

In [117]:
class RecommenderSystem:

  def __init__(self, song_sentence_model, lyric_model, data):
    self.song_sentence_model = song_sentence_model
    self.lyric_model = lyric_model
    self.data = data

  def create_playlist_title(self, tags) :
    def gpt_chat_gen(prompt, model="gpt-3.5-turbo"):
      messages = [{"role": "user", "content": prompt}]
      response = openai.ChatCompletion.create(
        model=model,
        messages=messages,
        temperature=0,
      )
      return response.choices[0].message["content"]

    prompt = f"""
    {tags}
    위 단어는 사용자가 원하는 노래 감성이다.
    유튜브 노래 플레이 리스트 제목처럼 센스있고 창의적인 노래 플레이리스트 제목을 한글로 만들어줘
    """

    return gpt_chat_gen(prompt)
  def create_album_image(self, tags):
    # DALLE
    prompt = f"""
    {selected_tags}
    위 단어는 사용자가 원하는 노래 감성이다.
    유튜브 노래 플레이 리스트 커버 이미지처럼 센스있고 창의적인 노래 플레이리스트 커버 그림을 만화처럼 만들어줘
    사람은 제외하고 만들어줘
    """
    response = openai.Image.create(
    prompt=prompt,
    n=1,
    size="512x512"
    )

    image_url = response['data'][0]['url']

    return Image(url= image_url)

  def initial_recommendation(self, user_tags, num_recommendations=10):
    matching_songs = self.data[self.data['NewTag'].apply(lambda tags: any(tag in tags for tag in user_tags))]
    matching_songs['tagmatches'] = matching_songs['NewTag'].apply(lambda tags: sum(tag in tags for tag in user_tags))
    matching_songs = matching_songs.sort_values(by='tagmatches', ascending=False)

    formatted_tags = ", ".join([tag[1:] for tag in user_tags])
    title = self.create_playlist_title(tags=formatted_tags)
    album_image = self.create_album_image(tags=formatted_tags)

    return title, album_image, matching_songs[:num_recommendations]

  def vectorize_song(self,song_id):
    info = self.data[self.data['SongID'] == song_id]['song_sentence'].iloc[0].split()
    vec = np.mean([self.song_sentence_model.wv[word] for word in info if word in self.song_sentence_model.wv.index_to_key], axis=0)
    return vec

  def recommend_songs_based_song_sentence(self, playlist, num_recommendations=20):
    playlist_song_vectors = np.array([self.vectorize_song(song) for song in playlist['SongID']])
    centroid_vector = np.mean(playlist_song_vectors, axis=0).reshape(1, -1)

    song_ids = self.data['SongID'].tolist()
    song_vectors = np.array([self.vectorize_song(song_id) for song_id in song_ids])
    similarities = cosine_similarity(centroid_vector, song_vectors)

    recommended_song_indices = similarities[0].argsort()[-num_recommendations:][::-1]
    recommended_songs = self.data.iloc[recommended_song_indices]

    exclude_song_ids = playlist['SongID'].tolist()
    recommended_songs = recommended_songs[~recommended_songs['SongID'].isin(exclude_song_ids)]
    if len(recommended_songs) < num_recommendations:
        more_recommendations = num_recommendations - len(recommended_songs)
        next_best_song_indices = [idx for idx in similarities[0].argsort() if idx not in recommended_song_indices][:more_recommendations]
        more_recommended_songs = self.data.iloc[next_best_song_indices]
        more_recommended_songs = more_recommended_songs[~more_recommended_songs['SongID'].isin(exclude_song_ids)]
        recommended_songs = pd.concat([recommended_songs, more_recommended_songs])

    matched_songs = self.data[self.data['SongID'].isin(playlist['SongID'])]
    unique_tags = set()
    for tags in matched_songs['NewTag'].dropna():
      unique_tags.update(tags.split(','))
    cleaned_tags = { tag.strip().replace("[", "").replace("]", "").replace("'", "").replace(" ", "") for tag in unique_tags}
    cleaned_tags = list(cleaned_tags)
    formatted_tags = ", ".join([tag[1:] for tag in cleaned_tags])
    title = self.create_playlist_title(tags=formatted_tags)
    album_image = self.create_album_image(tags=formatted_tags)

    return title, album_image, recommended_songs[:num_recommendations]

  def vectorize_lyrics(self,lyric):
    words = lyric.split()
    vectors = [self.lyric_model.wv[word] for word in words if word in self.lyric_model.wv.index_to_key]
    if vectors:
        return np.mean(vectors, axis=0)
    else:
        return np.zeros(self.lyric_model.vector_size)

  def lyric_processing(self, df):
    New_lyrics = []
    for lyric in df['Lyrics']:
      aa = lyric.replace("\n", " ")
      New_lyrics.append(aa)
    df['New_lyrics'] = New_lyrics

  def recommend_songs_based_lyric(self, playlist, num_recommendations=20):
    self.lyric_processing(playlist)
    user_playlist_vectors = playlist['New_lyrics'].apply(lambda x: self.vectorize_lyrics(x)).tolist()
    mean_user_vector = np.mean(user_playlist_vectors, axis=0)

    self.data['vector'] = self.data['New_lyrics'].apply(lambda x: self.vectorize_lyrics(x))

    # Calculate cosine similarity between the mean user vector and all song vectors
    self.data['similarity'] = self.data['vector'].apply(lambda x: cosine_similarity([x], [mean_user_vector])[0][0])

    # Exclude songs that are already in the user playlist
    playlist_song_ids = playlist['SongID'].tolist()
    recommended_songs = self.data[~self.data['SongID'].isin(playlist_song_ids)]

    # Sort the songs by similarity to recommend
    recommended_songs = recommended_songs.sort_values(by='similarity', ascending=False).head(num_recommendations)

    matched_songs = self.data[self.data['SongID'].isin(playlist['SongID'])]
    unique_genre= set()
    for tags in matched_songs['Genre'].dropna():
      unique_genre.update(tags.split(','))
    for tags in matched_songs['emotions'].dropna():
      unique_genre.update(tags.split(','))
    cleaned_genre = { tag.strip().replace(" ", "") for tag in unique_genre}

    formatted_tags = ", ".join([tag[1:] for tag in cleaned_genre])
    title = self.create_playlist_title(tags=formatted_tags)
    album_image = self.create_album_image(tags=formatted_tags)

    return title, album_image, recommended_songs


  def personalized_recommendation(self, user_playlist, num_recommendations=10):
    return self.recommend_songs_based_song_sentence(user_playlist), self.recommend_songs_based_lyric(user_playlist)




In [5]:
song_list = "/content/drive/MyDrive/Github/RecommenderSystem/data/final_songs.csv"
playlist_name = "/content/drive/MyDrive/Github/RecommenderSystem/data/playlists/playlist_1.csv"
song_db = pd.read_csv(song_list, encoding='utf-8')
playlist = pd.read_csv(playlist_name, encoding='utf-8')
song_sentence_model = Word2Vec.load("/content/drive/MyDrive/Github/RecommenderSystem/model/song_sentence_model.w2v")
lyric_model = Word2Vec.load("/content/drive/MyDrive/Github/RecommenderSystem/model/lyric_model.w2v")


In [133]:
matched_songs = song_db[song_db['SongID'].isin(playlist['SongID'])]
unique_genre= set()
for tags in matched_songs['Genre'].dropna():
  unique_genre.update(tags.split(','))
for tags in matched_songs['emotions'].dropna():
  unique_genre.update(tags.split(','))
cleaned_genre = { tag.strip().replace(" ", "") for tag in unique_genre}

In [134]:
cleaned_genre

{'공포', '놀람', '랩/힙합', '분노', '슬픔', '인디음악', '중립', '행복', '혐오'}

In [118]:
# 추천 객체 생성
recommender = RecommenderSystem(song_sentence_model=song_sentence_model, lyric_model=lyric_model, data=song_db)

In [119]:
# 초기 추천
all_tags = []

for tag_string in song_db['NewTag']:
    tag_list = eval(tag_string)
    all_tags.extend(tag_list)
unique_individual_tags = set(all_tags)
selected_tags = random.sample(unique_individual_tags, 5) # # 태그 5개 무작위 선택
title, image, initial_playlist = recommender.initial_recommendation(user_tags=selected_tags)

since Python 3.9 and will be removed in a subsequent version.
  selected_tags = random.sample(unique_individual_tags, 5) # # 태그 5개 무작위 선택
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  matching_songs['tagmatches'] = matching_songs['NewTag'].apply(lambda tags: sum(tag in tags for tag in user_tags))


In [120]:
selected_tags

['#편안한', '#R', '#애창곡', '#힘이_되어주는', '#도전']

In [121]:
title

'"힘차게 도전하는 편안한 애창곡들"'

In [122]:
image

In [123]:
initial_playlist[['SongID', 'Title', 'Singer', 'Genre']]

Unnamed: 0,SongID,Title,Singer,Genre
2,188668,안녕,김창완,"포크/블루스, 국내드라마"
103,33805501,빛나,양홍원,랩/힙합
120,35018128,Lovey Dovey (Feat. meenoi),BIG Naughty (서동현),R&B/Soul
119,35003354,밤의 창가에서,신지훈,"발라드, 인디음악"
116,34701954,보라색 선,BRAINOFF,"랩/힙합, 인디음악"
115,34631174,수영 (with 쟈코비),수퍼수,"R&B/Soul, 인디음악"
114,34599917,언제나 사랑해,케이시 (Kassy),발라드
113,34520489,너를 바라만 보는 게,민서 (MINSEO),"발라드, 국내드라마"
112,34481680,드라마,아이유,발라드
111,34415838,"Alone (feat. 쏠 (SOLE), 다운 (Dvwn))",Cosmic Boy,R&B/Soul


In [125]:
# 플레이 리스트 기반 추천
song_sentence_recommendation, lyric_recommendation = recommender.personalized_recommendation(user_playlist=playlist)

In [126]:
# 각 추천 방법의 결과 분해
song_sentence_title, song_sentence_image, song_sentence_playlist = song_sentence_recommendation
lyric_title, lyric_image, lyric_playlist = lyric_recommendation

In [127]:
song_sentence_title

'1. "스트레스 해소를 위한 음악 여행"\n2. "도전에 불타는 국내힙합 쇼타임"\n3. "감성을 녹여낸 국내힙합 명곡 모음"\n4. "감성힙합으로 힐링하는 시간"\n5. "열정과 응원이 가득한 국내힙합 스토리"\n6. "감성적인 음악으로 위로하는 시간"\n7. "감성힙합으로 스트레스를 날려버려"\n8. "국내힙합으로 전하는 열정과 응원"\n9. "감성적인 노래로 마음을 따뜻하게"\n10. "스트레스를 날려버릴 국내힙합 명곡 모음"'

In [128]:
song_sentence_image

In [129]:
song_sentence_playlist[['SongID', 'Title', 'Singer', 'Genre']]

Unnamed: 0,SongID,Title,Singer,Genre
91,33334136,FLYING HIGH WITH U,빈첸,랩/힙합
116,34701954,보라색 선,BRAINOFF,"랩/힙합, 인디음악"
22,8271355,"And July (Feat. DEAN, DJ Friz)",헤이즈 (Heize),랩/힙합
40,31562554,이 밤의 끝,펀치 (Punch),R&B/Soul
33,31184764,Slow,SOLE (쏠),R&B/Soul
28,30732377,Boat,죠지,R&B/Soul
23,30038185,어떻게 지내,Crush,R&B/Soul
32,31098679,OSAKA (Feat. ZICO),ELO,R&B/Soul
11,4182990,아모르 파티,김연자,성인가요/트로트
27,30707718,RIDE (Feat. THAMA),SOLE (쏠),R&B/Soul


In [130]:
lyric_title

'1. "립따라힙합포오" - 립따라 힙합 음악을 포함한 오감을 자극하는 플레이리스트\n2. "디음악의 람픔복" - 디스코 음악과 함께 랩, 펑크, 복고풍 등 다양한 음악을 즐길 수 있는 플레이리스트\n3. "힙합의 노립포오" - 힙합 음악을 중심으로 립싱크, 포크, 오디오북 등 다양한 장르를 담은 플레이리스트\n4. "힙합람픔복" - 힙합 음악과 함께 랩, 펑크, 복고풍 등 다양한 음악을 즐길 수 있는 플레이리스트\n5. "디음악의 노립오" - 디스코 음악과 함께 립싱크, 오디오북 등 다양한 음악을 즐길 수 있는 플레이리스트'

In [131]:
lyric_image

In [132]:
lyric_playlist[['SongID', 'Title', 'Singer', 'Genre']]

Unnamed: 0,SongID,Title,Singer,Genre
33,31184764,Slow,SOLE (쏠),R&B/Soul
108,34270805,오르트구름,윤하 (YOUNHA),록/메탈
90,33301174,시가 될 이야기,신지훈,"발라드, 인디음악"
119,35003354,밤의 창가에서,신지훈,"발라드, 인디음악"
11,4182990,아모르 파티,김연자,성인가요/트로트
96,33480898,신호등,이무진,록/메탈
166,36613696,I WANT,IVE (아이브),댄스
133,35642015,FLOWER,돕도지 (dopedozy),R&B/Soul
112,34481680,드라마,아이유,발라드
95,33413898,연남동 (Feat. lIlBOI),다운 (Dvwn),R&B/Soul


In [36]:
!pip install openai

Collecting openai
  Downloading openai-0.28.1-py3-none-any.whl (76 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m77.0/77.0 kB[0m [31m1.5 MB/s[0m eta [36m0:00:00[0m
Installing collected packages: openai
Successfully installed openai-0.28.1
