<a href="https://colab.research.google.com/github/Disnu26/myprojects/blob/main/Spotify_song_recommendation.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [11]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import LabelEncoder, MinMaxScaler
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Embedding, Flatten


file_path = "/content/generated_music_dataset.csv"
df = pd.read_csv(file_path)

print("Columns in dataset:", df.columns)

df['listen_timestamp'] = pd.to_datetime(df['listen_timestamp'], errors='coerce')

df.fillna(0, inplace=True)

le_user = LabelEncoder()
le_song = LabelEncoder()
le_artist = LabelEncoder()
le_genre = LabelEncoder()

df['user_id'] = le_user.fit_transform(df['user_id'])
df['song_id'] = le_song.fit_transform(df['song_id'])
df['artist_id'] = le_artist.fit_transform(df['artist_id'])
df['genre'] = le_genre.fit_transform(df['genre'])

scaler = MinMaxScaler()
df[['listen_count', 'added_to_playlist']] = scaler.fit_transform(df[['listen_count', 'added_to_playlist']])

features = ['user_id', 'song_id', 'artist_id', 'genre', 'listen_count', 'added_to_playlist']
target = 'listen_count'

X = df[features]
y = df[target]

from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

model = Sequential([
    Dense(64, activation='relu', input_shape=(len(features),)),
    Dense(32, activation='relu'),
    Dense(16, activation='relu'),
    Dense(1, activation='linear')  # Predict listen count
])

# Compile model
model.compile(optimizer='adam', loss='mse', metrics=['mae'])

# Train model
model.fit(X_train, y_train, epochs=20, batch_size=32, validation_data=(X_test, y_test))

# Function to get top 3 songs for a given user
def get_top_songs(user_id, df, model, le_song):
    user_data = df[df['user_id'] == le_user.transform([user_id])[0]]
    X_user = user_data[features]
    predicted_scores = model.predict(X_user).flatten()

    user_data = user_data.copy()
    user_data['predicted_score'] = predicted_scores

    # Get top 3 songs
    top_songs = user_data.sort_values(by='predicted_score', ascending=False).head(3)

    top_songs_list = top_songs['song'].values  # Use actual song names

    return top_songs_list

# Example: Get top 3 songs for user 101
top_songs = get_top_songs(103, df, model, le_song)
print("Top 3 recommended songs for user 101:", top_songs)

Columns in dataset: Index(['user_id', 'song_id', 'artist_id', 'genre', 'listen_count',
       'repeat_play', 'skip_flag', 'liked', 'added_to_playlist',
       'listen_timestamp', 'song'],
      dtype='object')
Epoch 1/20


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


[1m13/13[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 23ms/step - loss: 156.8403 - mae: 9.7244 - val_loss: 25.8951 - val_mae: 4.3346
Epoch 2/20
[1m13/13[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 8ms/step - loss: 13.8352 - mae: 3.0269 - val_loss: 5.3929 - val_mae: 1.6622
Epoch 3/20
[1m13/13[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 8ms/step - loss: 3.8831 - mae: 1.4723 - val_loss: 0.6938 - val_mae: 0.6381
Epoch 4/20
[1m13/13[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 8ms/step - loss: 1.1004 - mae: 0.8082 - val_loss: 0.5494 - val_mae: 0.5596
Epoch 5/20
[1m13/13[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 14ms/step - loss: 0.6112 - mae: 0.6092 - val_loss: 0.4008 - val_mae: 0.4849
Epoch 6/20
[1m13/13[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 15ms/step - loss: 0.4794 - mae: 0.5552 - val_loss: 0.3540 - val_mae: 0.4601
Epoch 7/20
[1m13/13[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 15ms/step - loss: 0.4708 - ma