In [3]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from keras.models import Sequential
from keras.layers import Dense

# Step 1: Data Preprocessing
# Load the two CSV files

data = pd.read_csv("song_artist.csv")

In [4]:
#create a new code popularity based on the weighted sum
weights = {
    'acousticness': 0.1,
    'danceability': 0.2,
    'energy': 0.3,
    'instrumentalness': 0.1,
    'liveness': 0.1,
    'speechiness': 0.2
}

In [5]:
data['popularity'] = (data['acousticness'] * weights['acousticness'] +
                      data['danceability'] * weights['danceability'] +
                      data['energy'] * weights['energy'] +
                      data['instrumentalness'] * weights['instrumentalness'] +
                      data['liveness'] * weights['liveness'] +
                      data['speechiness'] * weights['speechiness'])

# Normalize the popularity score to a range between 0 and 100 (optional)
data['popularity'] = (data['popularity'] - data['popularity'].min()) / (data['popularity'].max() - data['popularity'].min()) * 100

In [6]:
data.dtypes

id                           int64
name                        object
artists                     object
acousticness               float64
danceability               float64
energy                     float64
instrumentalness           float64
liveness                   float64
speechiness                float64
acousticness_artist        float64
danceability_artist        float64
energy_artist              float64
instrumentalness_artist    float64
liveness_artist            float64
speechiness_artist         float64
popularity                 float64
dtype: object

In [7]:
y = data['popularity']
X = data.drop(columns=['name', 'artists', 'id', 'popularity'])


# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Scale the features
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)


In [8]:
sorted_data = data.sort_values(by='popularity', ascending=False)

# Display the top 5 songs with highest popularity
top_songs = sorted_data.head(5)
print("Top 5 Songs with Highest Popularity:")
for i, song in top_songs.iterrows():
    print(f"{i+1}. {song['name']} by {song['artists']}")

Top 5 Songs with Highest Popularity:
57589. The Improv Fairy Tale by Mitch Hedberg
55834. Pissed Off by Dane Cook
79913. The World on Fire by Jim Morrison
52090. Free Dog by Bill Burr
56160. You know who's a great lady? by John Mulaney


In [9]:
user_feedback = []
for i, song in top_songs.iterrows():
    feedback = input(f"On a scale of 1 to 10, how much do you like '{song['name']}' by {song['artists']}? ")
    user_feedback.append(float(feedback))

In [None]:

# Step 2: Model Training
# Define and train a deep learning model
model = Sequential()
model.add(Dense(64, activation='relu', input_shape=(X_train_scaled.shape[1],)))
model.add(Dense(32, activation='relu'))
model.add(Dense(1, activation='linear'))  # Linear activation for regression task
model.compile(optimizer='adam', loss='mean_squared_error')
model.fit(X_train_scaled, y_train, epochs=10, batch_size=32, validation_data=(X_test_scaled, y_test))

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<keras.src.callbacks.History at 0x20127c971d0>

In [None]:
# Step 3: Recommendation Generation
# Use the trained model to generate recommendations for the user
def generate_recommendations(model, user_feedback):
    # Convert user feedback to an array format that can be used for predictions
    user_data = np.zeros((1, len(data.columns)))
    print(user_data)
    for song_index, feedback_rating in user_feedback.items():
        user_data[0, song_index] = feedback_rating
    
    user_data_scaled = scaler.transform(user_data)
    predictions = model.predict(user_data_scaled)
    # Return top 5 recommended songs based on predictions
    top_recommendations_indices = np.argsort(predictions.flatten())[-5:][::-1]
    return top_recommendations_indices


In [None]:
# Step 4: Feedback Incorporation
# Collect feedback from the user and update the model's predictions
def collect_feedback(predictions, user_feedback):
    # Incorporate user feedback and update the model
    # For simplicity, we assume user_feedback is a list of indices of liked songs
    return predictions

In [None]:
# Step 5: Repeat Recommendations
# Repeat recommendation process until user is satisfied or a stopping condition is met
while True:
    # Generate recommendations based on user feedback
    recommendations = generate_recommendations(model, user_feedback)

    print("\nRecommended songs:")
    for i, song_index in enumerate(recommendations):
        print(f"{i+1}. {data.iloc[song_index]['name']} by {data.iloc[song_index]['artists']}")
    
    # Collect feedback from the user
    user_feedback = {}
    for i, song_index in enumerate(recommendations):
        feedback = input(f"On a scale of 1 to 10, how much do you like '{data.iloc[song_index]['name']}' by {data.iloc[song_index]['artists']}? ")
        user_feedback[song_index] = float(feedback)
    
    satisfied = input("\nAre you satisfied with the recommendations? (yes/no): ")
    if satisfied.lower() == 'yes':
        break


NameError: name 'model' is not defined