In [None]:
import pandas as pd

# Load the data from the provided file
file_path = '/content/drive/MyDrive/spotify_millsongdata_prossed_traindata.csv'
data = pd.read_csv(file_path)

# Displaying the first few rows of the data to understand its structure
data.head()


Unnamed: 0,sadness,joy,love,anger,fear,surprise
0,0.000284,0.998917,0.000303,0.000132,0.000167,0.000196
1,0.002162,0.102594,0.891564,0.000729,0.002013,0.000938
2,0.804719,0.171543,0.002526,0.017968,0.001365,0.001879
3,0.00288,0.019789,0.972059,0.004212,0.000504,0.000556
4,0.02931,0.077233,0.838309,0.052503,0.001412,0.001232


In [None]:
from sklearn.preprocessing import StandardScaler

# Checking for missing values
missing_values = data.isnull().sum()

# Scaling the data
scaler = StandardScaler()
scaled_data = scaler.fit_transform(data)

missing_values, scaled_data[:5]  # Displaying the missing values and the first 5 rows of scaled data

(sadness     0
 joy         0
 love        0
 anger       0
 fear        0
 surprise    0
 dtype: int64,
 array([[-0.69848623,  1.50743393, -0.34349444, -0.60404857, -0.32238039,
         -0.13908145],
        [-0.69370838, -0.61597922,  3.41595651, -0.60223745, -0.31374952,
         -0.13074196],
        [ 1.34848066, -0.45263759, -0.33411921, -0.54992818, -0.31677999,
         -0.12016303],
        [-0.69188085, -0.81214728,  3.75549649, -0.5916693 , -0.3208069 ,
         -0.1350372 ],
        [-0.62462582, -0.67606097,  3.19132303, -0.44513741, -0.31655967,
         -0.12743789]]))

In [None]:
from sklearn.cluster import KMeans

# Number of clusters
n_clusters = 4

# K-means clustering
kmeans = KMeans(n_clusters=n_clusters, random_state=0)
clusters = kmeans.fit_predict(scaled_data)

# Adding the cluster labels to the original data for interpretation
clustered_data = data.copy()
clustered_data['Cluster'] = clusters

# Displaying the first few rows with cluster labels
clustered_data.head()




Unnamed: 0,sadness,joy,love,anger,fear,surprise,Cluster
0,0.000284,0.998917,0.000303,0.000132,0.000167,0.000196,0
1,0.002162,0.102594,0.891564,0.000729,0.002013,0.000938,0
2,0.804719,0.171543,0.002526,0.017968,0.001365,0.001879,1
3,0.00288,0.019789,0.972059,0.004212,0.000504,0.000556,0
4,0.02931,0.077233,0.838309,0.052503,0.001412,0.001232,0


In [None]:
# Assigning names to clusters based on the interpretation
cluster_names = {0: 'Happy', 1: 'Anxious/Sad', 2: 'Energetic', 3: 'Calm'}

# Adding the cluster names to the data
clustered_data['Cluster Name'] = clustered_data['Cluster'].map(cluster_names)

# Displaying the first few rows with cluster names
clustered_data.head()


Unnamed: 0,sadness,joy,love,anger,fear,surprise,Cluster,Cluster Name
0,0.000284,0.998917,0.000303,0.000132,0.000167,0.000196,0,Happy
1,0.002162,0.102594,0.891564,0.000729,0.002013,0.000938,0,Happy
2,0.804719,0.171543,0.002526,0.017968,0.001365,0.001879,1,Anxious/Sad
3,0.00288,0.019789,0.972059,0.004212,0.000504,0.000556,0,Happy
4,0.02931,0.077233,0.838309,0.052503,0.001412,0.001232,0,Happy


In [None]:
clustered_data.to_csv('/content/drive/MyDrive/spotify_millsongdata_clustered_data.csv', index=False)