In [1]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [2]:
file_name = "/content/drive/MyDrive/Github/RecommenderSystem/data/final_songs.csv"

In [3]:
import pandas as pd
final_all_songs = pd.read_csv(file_name, encoding='utf-8')

In [33]:
final_all_songs[['song_sentence']]

Unnamed: 0,song_sentence
0,서주경 성인가요/트로트 임강현 강은경 임강현 1990 Winter 슬픔 공포
1,심수봉 성인가요/트로트 외국곡 심수봉 1990 Winter 슬픔 행복
2,김창완 포크/블루스 국내드라마 김창완 김창완 1990 Autumn 놀람 행복
3,리쌍 랩/힙합 길 개리 2000 Spring 놀람 슬픔
4,박상철 성인가요/트로트 박현진 한솔 2000 Spring 행복 중립
...,...
181,던 댄스 LEEZ 그루비룸 과카 던 던 과카 그루비룸 LEEZ 그루비룸 GONEIS...
182,투모로우바이투게더 댄스 RyanTedder TylerSpry SlowRabbit R...
183,BIGNaughty 록/메탈 dress BIGNaughty BIGNaughty dr...
184,Kep1er 댄스 YejuneSynn JoshFountain LaurenAquili...


In [4]:
from gensim.models import Word2Vec  # 도현 : Gensim 라이브러리를 임포트하는지 확인

# Create list of 'sentences' for Word2Vec training
sentences = final_all_songs['song_sentence'].str.split().tolist()

embedding_size = 100 # 100차원
window_size = 5 # 앞 뒤로 5개
min_count = 1 # 최소 빈도수
workers = 4 #  4개의 CPU 코어

epochs = 20
num_negatives = 100

# 모델 초기화
model = Word2Vec(sentences,
                 vector_size=embedding_size,
                 window=window_size,
                 min_count=min_count,
                 workers=workers,
                 sg=1,  # skip-gram 방식
                 negative=num_negatives  # 네거티브 샘플링
                 )
# 모델 학습
model.train(sentences, total_examples=model.corpus_count, epochs=epochs)



(28151, 43360)

In [40]:
model_path = "/content/drive/MyDrive/Github/RecommenderSystem/song_sentence_model.w2v"
model.save(model_path)

In [7]:
import pandas as pd
import numpy as np
from gensim.models import Word2Vec
from sklearn.model_selection import train_test_split
from sklearn.metrics.pairwise import cosine_similarity
from sklearn.manifold import TSNE
import matplotlib.pyplot as plt

# Reload all the playlist data
playlist_files = [
    "/content/drive/MyDrive/Github/RecommenderSystem/data/playlists/playlist_1.csv",
    "/content/drive/MyDrive/Github/RecommenderSystem/data/playlists/playlist_2.csv",
    "/content/drive/MyDrive/Github/RecommenderSystem/data/playlists/playlist_3.csv",
    "/content/drive/MyDrive/Github/RecommenderSystem/data/playlists/playlist_4.csv",
    "/content/drive/MyDrive/Github/RecommenderSystem/data/playlists/playlist_5.csv",
    "/content/drive/MyDrive/Github/RecommenderSystem/data/playlists/playlist_6.csv",
    "/content/drive/MyDrive/Github/RecommenderSystem/data/playlists/playlist_7.csv",
    "/content/drive/MyDrive/Github/RecommenderSystem/data/playlists/playlist_8.csv",
    "/content/drive/MyDrive/Github/RecommenderSystem/data/playlists/playlist_9.csv",
    "/content/drive/MyDrive/Github/RecommenderSystem/data/playlists/playlist_10.csv"
    ]
playlists = [pd.read_csv(file) for file in playlist_files]


In [8]:
from sklearn.model_selection import train_test_split

train_playlists = []
test_playlists = []

# Split each playlist into train (80%) and test (20%) sets
for playlist in playlists:
    train, test = train_test_split(playlist, test_size=0.2, random_state=42)
    train_playlists.append(train)
    test_playlists.append(test)

# Check the number of songs in the first training playlist
len(train_playlists[0]), len(test_playlists[0])

(19, 5)

In [12]:
# Creating a function to vectorize a song
def vectorize_song(song_id):
    info = final_all_songs[final_all_songs['SongID'] == song_id]['song_sentence'].iloc[0].split()
    vec = np.mean([model.wv[word] for word in info if word in model.wv.index_to_key], axis=0)
    return vec

def recommend_songs_for_playlist(playlist_train, model, num_recommendations=20):
    """Recommend songs for a given playlist using the trained Word2Vec model."""

    # Vectorizing the songs in the user's playlist
    playlist_song_vectors = np.array([vectorize_song(song) for song in playlist_train['SongID']])
    centroid_vector = np.mean(playlist_song_vectors, axis=0).reshape(1, -1)

    # Calculating cosine similarities of all songs to the centroid vector
    song_ids = final_all_songs['SongID'].tolist()
    song_vectors = np.array([vectorize_song(song_id) for song_id in song_ids])
    similarities = cosine_similarity(centroid_vector, song_vectors)

    # Getting the top recommended songs
    recommended_song_indices = similarities[0].argsort()[-num_recommendations:][::-1]
    recommended_songs = final_all_songs.iloc[recommended_song_indices]

    # Exclude songs that are already in the user's playlist
    exclude_song_ids = playlist_train['SongID'].tolist()
    recommended_songs = recommended_songs[~recommended_songs['SongID'].isin(exclude_song_ids)]

    # If excluding songs from the original recommendation leaves us with less than the desired number,
    # we fetch more recommendations.
    if len(recommended_songs) < num_recommendations:
        more_recommendations = num_recommendations - len(recommended_songs)
        next_best_song_indices = [idx for idx in similarities[0].argsort() if idx not in recommended_song_indices][:more_recommendations]
        more_recommended_songs = final_all_songs.iloc[next_best_song_indices]
        more_recommended_songs = more_recommended_songs[~more_recommended_songs['SongID'].isin(exclude_song_ids)]
        recommended_songs = pd.concat([recommended_songs, more_recommended_songs])


    return recommended_songs[:num_recommendations]

# 숨겨진 곡들과 추천된 곡들을 기반으로 NDCG를 계산
def compute_ndcg(recommended_items, hidden_items):
    dcg = 0.0
    idcg = 0.0
    for i, rec_item in enumerate(recommended_items):
        if rec_item in hidden_items:
            dcg += 1.0 / np.log2(i + 2)
    for i in range(len(hidden_items)):
        idcg += 1.0 / np.log2(i + 2)
    return dcg / idcg


In [10]:
# Retry the recommendation for the first playlist
recommended_for_playlist_1 = recommend_songs_for_playlist(train_playlists[0], model)

recommended_for_playlist_1[['SongID', 'Title', 'Singer', 'Genre']]

Unnamed: 0,SongID,Title,Singer,Genre
3,517091,Rush (Feat. 정인),리쌍,랩/힙합
91,33334136,FLYING HIGH WITH U,빈첸,랩/힙합
116,34701954,보라색 선,BRAINOFF,"랩/힙합, 인디음악"
22,8271355,"And July (Feat. DEAN, DJ Friz)",헤이즈 (Heize),랩/힙합
40,31562554,이 밤의 끝,펀치 (Punch),R&B/Soul
23,30038185,어떻게 지내,Crush,R&B/Soul
33,31184764,Slow,SOLE (쏠),R&B/Soul
32,31098679,OSAKA (Feat. ZICO),ELO,R&B/Soul
37,31376041,Make Up (Feat. Crush),샘김 (Sam Kim),R&B/Soul
24,30092083,더 나은 사람,구름,"R&B/Soul, 인디음악"


In [16]:
test_playlists[0][['SongID', 'Title', 'Singer', 'Genre']]

Unnamed: 0,SongID,Title,Singer,Genre
8,517091,Rush (Feat. 정인),리쌍,랩/힙합
16,32874182,It's me !,원슈타인,랩/힙합
0,33115807,"내일이 오면 (Feat. 기리보이, BIG Naughty (서동현))",릴보이 (lIlBOI),랩/힙합
18,33388143,"시한부 (Feat. Leellamarz, 로꼬)",TOIL,랩/힙합
11,32885942,비행,E SENS,랩/힙합


In [14]:
# Retry the recommendation for the first playlist
recommended_for_playlist_2 = recommend_songs_for_playlist(train_playlists[1], model)

recommended_for_playlist_2[['SongID', 'Title', 'Singer', 'Genre']]

Unnamed: 0,SongID,Title,Singer,Genre
12,4446485,너의 모든 순간,성시경,"발라드, 국내드라마"
106,34061322,사랑은 늘 도망가,임영웅,"발라드, 국내드라마"
81,33186638,Bloom,최정윤,발라드
90,33301174,시가 될 이야기,신지훈,"발라드, 인디음악"
2,188668,안녕,김창완,"포크/블루스, 국내드라마"
119,35003354,밤의 창가에서,신지훈,"발라드, 인디음악"
47,31901475,헤어져줘서 고마워,벤,발라드
100,33589488,비와 당신,이무진,"록/메탈, 국내드라마"
102,33742930,별,Mingginyu (밍기뉴),"발라드, 인디음악"
18,5465067,U & I (With Crush & 빈지노),토이,발라드


In [15]:
test_playlists[1][['SongID', 'Title', 'Singer', 'Genre']]

Unnamed: 0,SongID,Title,Singer,Genre
0,34481680,드라마,아이유,발라드
17,35505810,사랑한다고 말해줘,탑현,발라드
15,4711065,우산,윤하 (YOUNHA),발라드
1,34599917,언제나 사랑해,케이시 (Kassy),발라드


In [17]:
recommended_for_playlist_3 = recommend_songs_for_playlist(train_playlists[2], model)

recommended_for_playlist_3[['SongID', 'Title', 'Singer', 'Genre']]

Unnamed: 0,SongID,Title,Singer,Genre
121,35107561,BTBT (Feat. DeVita),B.I,댄스
184,36821415,Galileo,Kep1er (케플러),댄스
26,30705599,치어리더 (Feat. 올티),스텔라장 (Stella Jang),"랩/힙합, 인디음악"
172,36686163,TAXI,조유리,댄스
182,36801619,Back for More (TXT Ver.),투모로우바이투게더,댄스
32,31098679,OSAKA (Feat. ZICO),ELO,R&B/Soul
94,33412196,그대는 은하가 되어요,초승 (CHOSNG),"인디음악, 록/메탈"
119,35003354,밤의 창가에서,신지훈,"발라드, 인디음악"
159,36490426,Steal The Show (From “엘리멘탈”),Lauv,"POP, 애니메이션/웹툰, 키즈, 만화"
37,31376041,Make Up (Feat. Crush),샘김 (Sam Kim),R&B/Soul


In [18]:
test_playlists[2][['SongID', 'Title', 'Singer', 'Genre']]

Unnamed: 0,SongID,Title,Singer,Genre
10,36699655,Bubble,STAYC(스테이씨),댄스
9,36676297,댕댕 (dangdang),마마무+,댄스
0,35945927,Ditto,NewJeans,댄스


In [19]:
recommended_for_playlist_4 = recommend_songs_for_playlist(train_playlists[3], model)

recommended_for_playlist_4[['SongID', 'Title', 'Singer', 'Genre']]

Unnamed: 0,SongID,Title,Singer,Genre
3,517091,Rush (Feat. 정인),리쌍,랩/힙합
33,31184764,Slow,SOLE (쏠),R&B/Soul
8,3843570,If I Die Tomorrow,빈지노 (Beenzino),랩/힙합
15,4832347,187 (Feat. 임성현),BILL STAX (빌스택스),랩/힙합
13,4476043,이겨낼거야 2 (Feat. GRAY),스윙스,랩/힙합
91,33334136,FLYING HIGH WITH U,빈첸,랩/힙합
32,31098679,OSAKA (Feat. ZICO),ELO,R&B/Soul
9,3866221,"Big Up (Feat. The Legendary Poet, RiLord, JIMI...",레게 강 같은 평화,랩/힙합
40,31562554,이 밤의 끝,펀치 (Punch),R&B/Soul
34,31297008,Dank,박재범,R&B/Soul


In [20]:
test_playlists[3][['SongID', 'Title', 'Singer', 'Genre']]

Unnamed: 0,SongID,Title,Singer,Genre
8,31297008,Dank,박재범,R&B/Soul
16,32184905,헤어지기 싫어,Jimmy Brown,R&B/Soul
0,35107561,BTBT (Feat. DeVita),B.I,댄스
24,32922146,busy guy,meenoi (미노이),R&B/Soul
11,34415838,"Alone (feat. 쏠 (SOLE), 다운 (Dvwn))",Cosmic Boy,R&B/Soul
9,31184764,Slow,SOLE (쏠),R&B/Soul


In [21]:
recommended_for_playlist_5 = recommend_songs_for_playlist(train_playlists[4], model)

recommended_for_playlist_5[['SongID', 'Title', 'Singer', 'Genre']]

Unnamed: 0,SongID,Title,Singer,Genre
21,8124417,봄이 좋냐??,10CM,"인디음악, 포크/블루스"
119,35003354,밤의 창가에서,신지훈,"발라드, 인디음악"
141,36180700,심(心),DK(디셈버),록/메탈
30,30909912,이 오빠 뭐야,요요미,성인가요/트로트
102,33742930,별,Mingginyu (밍기뉴),"발라드, 인디음악"
45,31782025,why don't you love me?,조제 (Josee),"R&B/Soul, 인디음악"
55,32341095,막걸리 한잔,영탁,성인가요/트로트
67,32643920,Pink cheeks,eldon,"인디음악, 록/메탈"
24,30092083,더 나은 사람,구름,"R&B/Soul, 인디음악"
1,73294,백만송이 장미,심수봉,성인가요/트로트


In [22]:
test_playlists[4][['SongID', 'Title', 'Singer', 'Genre']]

Unnamed: 0,SongID,Title,Singer,Genre
0,33742930,별,Mingginyu (밍기뉴),"발라드, 인디음악"
17,32567536,날 위해 웃어줘,예빛,"인디음악, 포크/블루스"
15,33238609,유성,2단지,"인디음악, 포크/블루스"
1,33214673,아침,이예린,"인디음악, 포크/블루스"
8,35003354,밤의 창가에서,신지훈,"발라드, 인디음악"


In [23]:
recommended_for_playlist_6 = recommend_songs_for_playlist(train_playlists[5], model)

recommended_for_playlist_6[['SongID', 'Title', 'Singer', 'Genre']]

Unnamed: 0,SongID,Title,Singer,Genre
178,36777679,측정거부,이무진,록/메탈
67,32643920,Pink cheeks,eldon,"인디음악, 록/메탈"
94,33412196,그대는 은하가 되어요,초승 (CHOSNG),"인디음악, 록/메탈"
32,31098679,OSAKA (Feat. ZICO),ELO,R&B/Soul
65,32620041,추억속의 그대,dosii (도시),R&B/Soul
66,32620042,샴푸의 요정,dosii (도시),R&B/Soul
23,30038185,어떻게 지내,Crush,R&B/Soul
24,30092083,더 나은 사람,구름,"R&B/Soul, 인디음악"
40,31562554,이 밤의 끝,펀치 (Punch),R&B/Soul
54,32243272,Bunny,백예린 (Yerin Baek),R&B/Soul


In [24]:
test_playlists[5][['SongID', 'Title', 'Singer', 'Genre']]

Unnamed: 0,SongID,Title,Singer,Genre
10,34101563,strawberry moon,아이유,록/메탈
9,33725775,낙하 (with 아이유),AKMU (악뮤),록/메탈
0,36777679,측정거부,이무진,록/메탈


In [25]:
recommended_for_playlist_7 = recommend_songs_for_playlist(train_playlists[6], model)

recommended_for_playlist_7[['SongID', 'Title', 'Singer', 'Genre']]

Unnamed: 0,SongID,Title,Singer,Genre
1,73294,백만송이 장미,심수봉,성인가요/트로트
141,36180700,심(心),DK(디셈버),록/메탈
62,32508053,이제 나만 믿어요,임영웅,성인가요/트로트
119,35003354,밤의 창가에서,신지훈,"발라드, 인디음악"
90,33301174,시가 될 이야기,신지훈,"발라드, 인디음악"
54,32243272,Bunny,백예린 (Yerin Baek),R&B/Soul
69,32794652,취기를 빌려 (취향저격 그녀 X 산들),산들,발라드
20,8121604,그대가 날 사랑해 준다면,권나무,"인디음악, 포크/블루스"
167,36616378,사막에서 꽃을 피우듯,우디 (Woody),발라드
157,36430774,Allergy,(여자)아이들,댄스


In [26]:
test_playlists[6][['SongID', 'Title', 'Singer', 'Genre']]

Unnamed: 0,SongID,Title,Singer,Genre
0,34061322,사랑은 늘 도망가,임영웅,"발라드, 국내드라마"
1,32508053,이제 나만 믿어요,임영웅,성인가요/트로트
5,32046028,한잔해,박군,성인가요/트로트
14,73294,백만송이 장미,심수봉,성인가요/트로트


In [27]:
recommended_for_playlist_8 = recommend_songs_for_playlist(train_playlists[7], model)

recommended_for_playlist_8[['SongID', 'Title', 'Singer', 'Genre']]

Unnamed: 0,SongID,Title,Singer,Genre
34,31297008,Dank,박재범,R&B/Soul
27,30707718,RIDE (Feat. THAMA),SOLE (쏠),R&B/Soul
172,36686163,TAXI,조유리,댄스
37,31376041,Make Up (Feat. Crush),샘김 (Sam Kim),R&B/Soul
33,31184764,Slow,SOLE (쏠),R&B/Soul
24,30092083,더 나은 사람,구름,"R&B/Soul, 인디음악"
142,36182847,smallthing,dori,R&B/Soul
23,30038185,어떻게 지내,Crush,R&B/Soul
54,32243272,Bunny,백예린 (Yerin Baek),R&B/Soul
98,33542963,With You,네이비쿼카 (NavyQuokka),"R&B/Soul, 인디음악"


In [28]:
test_playlists[7][['SongID', 'Title', 'Singer', 'Genre']]

Unnamed: 0,SongID,Title,Singer,Genre
0,36365073,빛이 나는 너에게,던 (DAWN),R&B/Soul
13,30092083,더 나은 사람,구름,"R&B/Soul, 인디음악"
8,35950074,허수아비 (Feat. 원슈타인),구원찬,R&B/Soul
1,36799775,Heart,던 (DAWN),댄스
15,36290218,"Terminal (Feat. SUMIN, 키드밀리, CHAI)",코드 쿤스트 (CODE KUNST),R&B/Soul


In [29]:
recommended_for_playlist_9 = recommend_songs_for_playlist(train_playlists[8], model)

recommended_for_playlist_9[['SongID', 'Title', 'Singer', 'Genre']]

Unnamed: 0,SongID,Title,Singer,Genre
119,35003354,밤의 창가에서,신지훈,"발라드, 인디음악"
99,33552335,내 입술 따뜻한 커피처럼,청하,발라드
76,33061995,밤하늘의 별을(2020),경서,발라드
69,32794652,취기를 빌려 (취향저격 그녀 X 산들),산들,발라드
90,33301174,시가 될 이야기,신지훈,"발라드, 인디음악"
141,36180700,심(心),DK(디셈버),록/메탈
5,1356122,곤드레 만드레,박현빈,성인가요/트로트
184,36821415,Galileo,Kep1er (케플러),댄스
54,32243272,Bunny,백예린 (Yerin Baek),R&B/Soul
51,32156286,늦은 밤 너의 집 앞 골목길에서,노을,발라드


In [30]:
test_playlists[8][['SongID', 'Title', 'Singer', 'Genre']]

Unnamed: 0,SongID,Title,Singer,Genre
8,34787226,내가 아니라도,주호,발라드
16,36699655,Bubble,STAYC(스테이씨),댄스
0,36430774,Allergy,(여자)아이들,댄스
23,36613696,I WANT,IVE (아이브),댄스
11,36632907,Cool With You,NewJeans,댄스


In [31]:
recommended_for_playlist_10 = recommend_songs_for_playlist(train_playlists[9], model)

recommended_for_playlist_10[['SongID', 'Title', 'Singer', 'Genre']]

Unnamed: 0,SongID,Title,Singer,Genre
121,35107561,BTBT (Feat. DeVita),B.I,댄스
132,35546497,After LIKE,IVE (아이브),댄스
159,36490426,Steal The Show (From “엘리멘탈”),Lauv,"POP, 애니메이션/웹툰, 키즈, 만화"
70,32812198,Mad at Disney,salem ilese,POP
104,33859545,Every Second,Mina Okabe,POP
37,31376041,Make Up (Feat. Crush),샘김 (Sam Kim),R&B/Soul
32,31098679,OSAKA (Feat. ZICO),ELO,R&B/Soul
54,32243272,Bunny,백예린 (Yerin Baek),R&B/Soul
99,33552335,내 입술 따뜻한 커피처럼,청하,발라드
172,36686163,TAXI,조유리,댄스


In [32]:
test_playlists[9][['SongID', 'Title', 'Singer', 'Genre']]

Unnamed: 0,SongID,Title,Singer,Genre
0,30705599,치어리더 (Feat. 올티),스텔라장 (Stella Jang),"랩/힙합, 인디음악"
1,36549241,LOCKDOWN,이세계아이돌,댄스
5,36686163,TAXI,조유리,댄스
15,36779485,Mystify,Nackle,일렉트로니카


In [38]:
playlists[4][['SongID', 'Title', 'Singer', 'Genre']]

Unnamed: 0,SongID,Title,Singer,Genre
0,33742930,별,Mingginyu (밍기뉴),"발라드, 인디음악"
1,33214673,아침,이예린,"인디음악, 포크/블루스"
2,33264943,그 많던 일기는 그저 글자가 되고,우리같은사람들,"인디음악, 록/메탈"
3,33412196,그대는 은하가 되어요,초승 (CHOSNG),"인디음악, 록/메탈"
4,32651484,내 인생은 영화가 아니니깐,SAGA,"인디음악, 록/메탈"
5,30997607,너에게 바다라는 건,주보링,"인디음악, 포크/블루스"
6,33340515,Northwest side 48km,이강승,"R&B/Soul, 인디음악"
7,32376333,우주,615,"인디음악, 포크/블루스"
8,35003354,밤의 창가에서,신지훈,"발라드, 인디음악"
9,35411219,살아내기,김현창,"인디음악, 포크/블루스"
