In [1]:
import pandas as pd
data = pd.read_csv("data/spotify_millsongdata_prossed_traindata.csv")
data

Unnamed: 0,sadness,joy,love,anger,fear,surprise
0,0.000284,0.998917,0.000303,0.000132,0.000167,0.000196
1,0.002162,0.102594,0.891564,0.000729,0.002013,0.000938
2,0.804719,0.171543,0.002526,0.017968,0.001365,0.001879
3,0.002880,0.019789,0.972059,0.004212,0.000504,0.000556
4,0.029310,0.077233,0.838309,0.052503,0.001412,0.001232
...,...,...,...,...,...,...
57645,0.000865,0.997525,0.000827,0.000437,0.000178,0.000168
57646,0.005675,0.003453,0.000600,0.988928,0.000970,0.000374
57647,0.372944,0.036884,0.002040,0.546501,0.040127,0.001503
57648,0.050649,0.001487,0.000561,0.003820,0.942673,0.000810


In [20]:
import pandas as pd
c_data = pd.read_csv("data/spotify_millsongdata_clustered_data.csv")
cluster_counts = c_data['Cluster Name'].value_counts()
cluster_counts

Cluster Name
Happy          25891
Anxious/Sad    15669
Energetic      11809
Calm            4281
Name: count, dtype: int64

In [27]:
def extract_songs_by_cluster(data, cluster_name):

    return data[data['Cluster Name'] == cluster_name]

# Example usage: Extract all 'Happy' songs
happy_songs = extract_songs_by_cluster(c_data, 'Calm')
happy_songs.head()
#happy_songs.index

Unnamed: 0,sadness,joy,love,anger,fear,surprise,Cluster,Cluster Name
8,0.002233,0.006764,0.000896,0.002713,0.011955,0.97544,3,Calm
13,0.003269,0.088136,0.002857,0.003269,0.004295,0.898174,3,Calm
18,0.022205,0.008251,0.000537,0.006992,0.961055,0.000961,3,Calm
33,0.139644,0.261767,0.01707,0.012262,0.496369,0.072888,3,Calm
35,0.002148,0.005824,0.001295,0.000997,0.081222,0.908514,3,Calm


In [35]:
from transformers import pipeline

# 创建情绪分类的pipeline
emotion_classifier = pipeline("text-classification", model="bhadresh-savani/bert-base-uncased-emotion")

# 创建情感分类的pipeline
sentiment_classifier = pipeline("text-classification", model="cardiffnlp/twitter-roberta-base-sentiment")

# 映射函数：将情感和情绪分类结果映射到歌曲类别
def map_emotion_sentiment_to_song_category(emotion_prediction, sentiment_prediction):
    # 定义情感到歌曲类别的映射
    sentiment_mapping = {
        'LABEL_0': 'Anxious/Sad',  # 通常代表负面情感
        'LABEL_1': 'Neutral',      # 代表中性情感
        'LABEL_2': 'Happy',        # 代表正面情感
    }

    # 定义情绪到歌曲类别的映射
    emotion_mapping = {
        'sadness': 'Anxious/Sad',
        'joy': 'Happy',
        'love': 'Happy',
        'anger': 'Anxious/Sad',
        'fear': 'Anxious/Sad',
        'surprise': 'Energetic'
    }

    # 获取情感和情绪分类的最高得分类别
    top_emotion = max(emotion_prediction, key=lambda x: x['score'])['label']
    top_sentiment = max(sentiment_prediction, key=lambda x: x['score'])['label']

    # 如果情感是中性的，则返回'Neutral'（可以映射到'Calm'）
    if sentiment_mapping[top_sentiment] == 'Neutral':
        return 'Calm'

    # 否则，根据情绪返回相应的歌曲类别
    return emotion_mapping.get(top_emotion, 'Unknown')

# 示例文本
text = "I love using transformers. The best part is wide range of support and its easy to use"

# 使用pipeline进行情绪和情感分类
emotion_prediction = emotion_classifier(text)
sentiment_prediction = sentiment_classifier(text)

# 映射到歌曲类别
song_category = map_emotion_sentiment_to_song_category(emotion_prediction, sentiment_prediction)
print(f"The mapped song category is: {song_category}")
song_list = extract_songs_by_cluster(c_data, song_category)

The mapped song category is: Happy


In [39]:
song_list
knn_data = song_list.drop(columns=['Cluster', 'Cluster Name'])
knn_data

Unnamed: 0,sadness,joy,love,anger,fear,surprise
0,0.000284,0.998917,0.000303,0.000132,0.000167,0.000196
1,0.002162,0.102594,0.891564,0.000729,0.002013,0.000938
3,0.002880,0.019789,0.972059,0.004212,0.000504,0.000556
4,0.029310,0.077233,0.838309,0.052503,0.001412,0.001232
11,0.000533,0.997985,0.000700,0.000445,0.000202,0.000135
...,...,...,...,...,...,...
57625,0.004884,0.952083,0.002239,0.037981,0.001817,0.000997
57632,0.001208,0.989216,0.001292,0.007178,0.000750,0.000356
57638,0.005849,0.983987,0.001684,0.007313,0.000802,0.000365
57641,0.004559,0.963978,0.001969,0.027313,0.001425,0.000756


In [40]:
import numpy as np

# Generate a random set of emotion ratios
np.random.seed(10)  # Set a seed for reproducibility
random_emotion_ratios = np.array([0.97132064, 0.92075195, 0.93364823, 0.94880388, 0.0850701, 0.22479665])
#random_emotion_ratios /= random_emotion_ratios.sum()  # Normalize to sum up to 1

random_emotion_ratios

array([0.97132064, 0.92075195, 0.93364823, 0.94880388, 0.0850701 ,
       0.22479665])

In [43]:
from sklearn.neighbors import NearestNeighbors

# Reshape the generated emotion ratios for compatibility with NearestNeighbors
query_point = random_emotion_ratios.reshape(1, -1)

# Using NearestNeighbors to find the closest point
nn = NearestNeighbors(n_neighbors=10)
nn.fit(knn_data)
distance, index = nn.kneighbors(query_point)

# Retrieve the closest emotion ratio from the dataset
closest_emotion_ratio = knn_data.iloc[index[0]]

closest_emotion_ratio, distance[0]




(        sadness       joy      love     anger      fear  surprise
 10670  0.297166  0.215347  0.265114  0.212264  0.008144  0.001965
 49013  0.339184  0.236248  0.178180  0.239755  0.004645  0.001987
 48583  0.310853  0.190750  0.187313  0.300772  0.008200  0.002112
 32995  0.352378  0.247281  0.188399  0.204357  0.005482  0.002102
 5948   0.176612  0.237094  0.293821  0.286322  0.003418  0.002734
 11760  0.339477  0.207819  0.280213  0.163070  0.007928  0.001494
 26088  0.275260  0.141072  0.242191  0.330471  0.008596  0.002409
 52338  0.305945  0.202773  0.153145  0.327233  0.009492  0.001412
 7091   0.203317  0.182020  0.357383  0.251667  0.003650  0.001964
 56812  0.300726  0.178385  0.328973  0.181167  0.008574  0.002175,
 array([1.4131814 , 1.41341321, 1.41477153, 1.41498302, 1.41534155,
        1.4169231 , 1.41709351, 1.41752134, 1.41771662, 1.4181384 ]))

In [49]:
song_indices = closest_emotion_ratio.index
print(song_indices)

Index([10670, 49013, 48583, 32995, 5948, 11760, 26088, 52338, 7091, 56812], dtype='int64')


In [50]:
import pandas as pd

# Setting the path to the uploaded file
file_path = 'data/spotify_millsongdata.csv'

# Attempting to read the first few rows of the file to understand its structure
orginal_data = pd.read_csv(file_path)

orginal_data.iloc[song_indices]

Unnamed: 0,artist,song,link,text
10670,Kinks,End Of The Season,/k/kinks/end+of+the+season_20079121.html,Winter time is coming \r\nAll the sky is grey...
49013,Prince,And God Created Woman,/p/prince/and+god+created+woman_20111391.html,In a deep sleep I fell \r\nAnd the music star...
48583,Planetshakers,Never Stop,/p/planetshakers/never+stop_20729687.html,All my hopes \r\nAnd all my dreams \r\nGod I...
32995,Frankie Goes To Hollywood,Get It On,/f/frankie+goes+to+hollywood/get+it+on_2070552...,"Well you're dirty and sweet, clad in black. \..."
5948,Faith No More,Am I Evil?,/f/faith+no+more/am+i+evil_20583909.html,(Originally recorded by Diamond Head) \r\n \...
11760,Lionel Richie,Piece Of Love,/l/lionel+richie/piece+of+love_20083903.html,"Time can heal a broken heart \r\nOh, but does..."
26088,Bon Jovi,Amen,/b/bon+jovi/amen_21055380.html,Amen \r\nLast night I had a dream \r\nThe dr...
52338,Sting,Roxanne,/s/sting/roxanne_10225122.html,"Roxanne, you don't have to put on the red ligh..."
7091,Gordon Lightfoot,Heaven Don't Deserve Me,/g/gordon+lightfoot/heaven+dont+deserve+me_200...,I'm not afraid that when I'm dying \r\nThere'...
56812,Wyclef Jean,Sweetest Girl (Dollar Bill),/w/wyclef+jean/sweetest+girl+dollar+bill_20954...,Ah ah ah ah \r\n \r\nSome live for the bill ...
