In [33]:
import requests
from dotenv import load_dotenv
import os
import json
import base64   
import pandas as pd
import librosa
import numpy as np
import lyricsgenius
import langdetect
import re
import string
import tempfile
from datetime import datetime, timedelta

from sklearn.preprocessing import LabelEncoder
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense, Embedding, Dropout
from tensorflow.keras.optimizers import Adam

In [23]:
# For any api we can try using the "+" email trick to get more API keys

genius_client_id = "wZZ2RWc5mqp-5Pbz2W1rQJWE8LQ3pFBrb1Hw5_AOqgybq28mt7kjdjcG4zktCNbO"
genius_client_secret = "PefqBJHor_muDgTutGlaXXaxmzsI7TQCps9FQ3FwkUTT0WJIT3s0A5YA9mnFbfp_-CBhQF7b0omgE8kaM3dJ3w"
genius_access_token = "NUHHVpwnmbDYUYw8Padu0gQeHvYN4OsKYtE2MKNUpBUI6yR-xZXKY6S5NvCnFbiP"

lastfm_api_key = "97d5a64d5ba4a8bc580b752ceff3b87f"
lastfm_secret = "35175090bd61f6f16ac607bd26e5b1de"

In [24]:
base_url = 'http://ws.audioscrobbler.com/2.0/'

def lastfm_get(payload):
    headers = {'user-agent': 'DataCollectorBot'}
    payload['api_key'] = lastfm_api_key
    payload['format'] = 'json'
    response = requests.get(base_url, headers=headers, params=payload)
    return response.json()


def get_recent_tracks(user):
    payload = {'method': 'user.getrecenttracks', 'user': user}
    return lastfm_get(payload)

def get_weekly_artist_chart(user):
    payload = {'method': 'user.getweeklyartistchart', 'user': user}
    return lastfm_get(payload)

def get_weekly_track_chart(user):
    payload = {'method': 'user.getweeklytrackchart', 'user': user}
    return lastfm_get(payload)

In [25]:
def get_one_month_ago_timestamp():
    one_month_ago = datetime.now() - timedelta(days=30)
    return int(one_month_ago.timestamp())

def recent_tracks_last_month_to_df(user):
    from_timestamp = get_one_month_ago_timestamp()
    
    payload = {
        'method': 'user.getrecenttracks',
        'user': user,
        'from': from_timestamp,
        'limit': 200  # Adjust based on Last.fm API limits
    }
    
    recent_tracks = lastfm_get(payload)
    tracks_list = []
    
    if 'track' in recent_tracks['recenttracks']:
        for track in recent_tracks['recenttracks']['track']:
            if 'date' in track:  # Ensure the track has a timestamp
                track_info = {
                    'Artist': track['artist']['#text'],
                    'Track Name': track['name'],
                    'Timestamp': track['date']['uts']
                }
                tracks_list.append(track_info)

    # Only proceed if more than 50 tracks were found, else return an empty DataFrame
    if len(tracks_list) > 50:
        df = pd.DataFrame(tracks_list)
        return df
    else:
        return pd.DataFrame()  

In [26]:
def list_to_df(data_list, columns):
    if data_list:
        df = pd.DataFrame(data_list, columns=columns)
        return df
    else:
        return pd.DataFrame(columns=columns)

def get_weekly_artist_chart_df(user):
    result = get_weekly_artist_chart(user)
    artists = []
    if 'weeklyartistchart' in result and 'artist' in result['weeklyartistchart']:
        for item in result['weeklyartistchart']['artist']:
            artists.append({
                'Artist': item['name'],
                'Play Count': item['playcount']
            })
    return list_to_df(artists, ['Artist', 'Play Count'])

def get_weekly_track_chart_df(user):
    result = get_weekly_track_chart(user)
    tracks = []
    if 'weeklytrackchart' in result and 'track' in result['weeklytrackchart']:
        for item in result['weeklytrackchart']['track']:
            tracks.append({
                'Track Name': item['name'],
                'Artist': item['artist']['#text'],
                'Play Count': item['playcount']
            })
    return list_to_df(tracks, ['Track Name', 'Artist', 'Play Count'])

In [32]:
df = recent_tracks_last_month_to_df('Bans77')
df

Unnamed: 0,Artist,Track Name,Timestamp
0,"Maggie Lindemann, Jasiah",taking over me,1709902652
1,DZP,Always,1709901999
2,"Blazy, Kevin Brauer",Harvest Moon,1709901777
3,Blazy,Harvest Moon,1709901774
4,Mandragora,Drama,1709901506
...,...,...,...
195,Side of Despondency,Sincere Regret,1709868766
196,Side of Despondency,Sincere Regret,1709868537
197,Side of Despondency,Sincere Regret,1709868308
198,Side of Despondency,Sincere Regret,1709868079


# LSTM


In [34]:
# Combine 'Artist' and 'Track Name' into a single feature
df['Artist_Track'] = df['Artist'] + ' - ' + df['Track Name']

label_encoder = LabelEncoder()
combined_encoded = label_encoder.fit_transform(df['Artist_Track'])

sequence_length = 3  
sequences = [combined_encoded[i: i + sequence_length + 1] for i in range(len(combined_encoded) - sequence_length)]

sequences = np.array(sequences)

X, y = sequences[:, :-1], sequences[:, -1]
y = to_categorical(y, num_classes=len(label_encoder.classes_))

vocab_size = len(label_encoder.classes_)  # Number of unique artist-track combinations

model = Sequential([
    Embedding(input_dim=vocab_size, output_dim=50, input_length=sequence_length),
    LSTM(100, return_sequences=False), 
    Dropout(0.2),
    Dense(vocab_size, activation='softmax')
])

model.compile(loss='categorical_crossentropy', optimizer=Adam(learning_rate=0.001), metrics=['accuracy'])
model.summary()
model.fit(X, y, epochs=100, batch_size=32, verbose=1)

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 embedding (Embedding)       (None, 3, 50)             3150      
                                                                 
 lstm (LSTM)                 (None, 100)               60400     
                                                                 
 dropout (Dropout)           (None, 100)               0         
                                                                 
 dense (Dense)               (None, 63)                6363      
                                                                 
Total params: 69,913
Trainable params: 69,913
Non-trainable params: 0
_________________________________________________________________
Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/10

<keras.callbacks.History at 0x1f0909e2970>