In [None]:
import tensorflow
from sklearn.preprocessing import LabelEncoder
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense, Embedding, Dropout
from tensorflow.keras.optimizers import Adam
import numpy as np
import pandas as pd

df_users = pd.read_csv('../Data/multiple_user_seq_df.csv')
df_users = df_users.sort_values(by=['Username', 'Timestamp'])

def sequential_pattern_mining(df_user):
    label_encoder = LabelEncoder()
    combined_encoded = label_encoder.fit_transform(df_user['Artist'] + ' - ' + df_user['Track Name'])

    sequence_length = 3
    sequences = [combined_encoded[i: i + sequence_length + 1] for i in range(len(combined_encoded) - sequence_length)]

    sequences = np.array(sequences)

    X, y = sequences[:, :-1], sequences[:, -1]
    y = to_categorical(y, num_classes=len(label_encoder.classes_))

    vocab_size = len(label_encoder.classes_)  # Number of unique artist-track combinations

    model = Sequential([
        Embedding(input_dim=vocab_size, output_dim=50),
        LSTM(100, return_sequences=False),
        Dropout(0.2),
        Dense(vocab_size, activation='softmax')
    ])

    model.compile(loss='categorical_crossentropy', optimizer=Adam(learning_rate=0.001), metrics=['accuracy'])
    model.summary()
    model.fit(X, y, epochs=20, batch_size=32, verbose=1)


for user, df_user in df_users.groupby('Username'):
    print(f"Sequential pattern mining for user: {user}")
    sequential_pattern_mining(df_user)


In [15]:
import requests
from datetime import datetime, timedelta
test = pd.read_csv("../Data/lastfm_user_clean.csv")

lastfm_api_key = "9285c225124a467ccf14911a4389058f"
lastfm_secret = "35175090bd61f6f16ac607bd26e5b1de"

base_url = 'http://ws.audioscrobbler.com/2.0/'


usernames_list = df['Username'].tolist()

def lastfm_get(payload):
    headers = {'user-agent': 'DataCollectorBot'}
    payload['api_key'] = lastfm_api_key
    payload['format'] = 'json'
    response = requests.get(base_url, headers=headers, params=payload)
    return response.json()

def get_one_month_ago_timestamp():
    one_month_ago = datetime.now() - timedelta(days=30)
    return int(one_month_ago.timestamp())

def recent_tracks_last_month_to_df(user):
    from_timestamp = get_one_month_ago_timestamp()

    payload = {
        'method': 'user.getrecenttracks',
        'user': user,
        'from': from_timestamp,
        'limit': 100  # Adjust based on Last.fm API limits
    }

    recent_tracks = lastfm_get(payload)

    tracks_list = []

    if 'recenttracks' in recent_tracks and 'track' in recent_tracks['recenttracks']:
        for track in recent_tracks['recenttracks']['track']:
            if 'date' in track:  # Ensure the track has a timestamp
                track_info = {
                    'Artist': track['artist']['#text'],
                    'Track Name': track['name'],
                    'Timestamp': track['date']['uts']
                }
                tracks_list.append(track_info)

    # Only proceed if more than 50 tracks were found, else return an empty DataFrame
    if len(tracks_list) == 100:
        df = pd.DataFrame(tracks_list)
        return df
    else:
        return pd.DataFrame()


def get_recent_tracks_for_multiple_users(users):
    dfs = []
    for user in users:
        df = recent_tracks_last_month_to_df(user)
        if not df.empty:
            df['Username'] = user
            dfs.append(df)
    if dfs:
        return pd.concat(dfs, ignore_index=True)
    else:
        return pd.DataFrame()

multiple_user_seq_df_2 = get_recent_tracks_for_multiple_users(usernames_list)

multiple_user_seq_df_2.to_csv('multiple_user_seq_df_2.csv', index=False)

In [41]:
import torch
import pandas as pd
from torch.utils.data import Dataset
from sklearn.preprocessing import LabelEncoder
from torch.utils.data import DataLoader, random_split


updated_df = pd.read_csv("../Data/multiple_user_seq_df_2.csv")
updated_df = updated_df.sort_values(by=['Username', 'Timestamp'])
updated_df = updated_df.head(12800)
updated_df = updated_df.reset_index(drop=True)

class SongDataset(Dataset):
    def __init__(self, df, sequence_length=10):
        self.sequence_length = sequence_length
        
        # Encode 'Song' into numerical values
        self.song_encoder = LabelEncoder()
        updated_df['Song'] = self.song_encoder.fit_transform(updated_df['Track Name'])
        
        # Create sequences of songs
        self.sequences = []
        for username in updated_df['Username'].unique():
            user_df = updated_df[updated_df['Username'] == username]
            songs = user_df['Song'].values
            for i in range(len(songs) - sequence_length):
                input_sequence = songs[i:i+sequence_length]
                target_song = songs[i+sequence_length]
                self.sequences.append((input_sequence, target_song))
    
    def __len__(self):
        return len(self.sequences)
    
    def __getitem__(self, idx):
        input_sequence, target_song = self.sequences[idx]
        # Add an extra dimension for input_size
        input_sequence = torch.tensor(input_sequence, dtype=torch.float32).unsqueeze(-1)
        target_song = torch.tensor(target_song, dtype=torch.long)
        return input_sequence, target_song


dataset = SongDataset(updated_df)
batch_size = 6400
num_unique_songs = len(dataset.song_encoder.classes_)
dataloader = DataLoader(dataset, batch_size=batch_size, shuffle=True)

import torch.nn as nn

class SimpleLSTM(nn.Module):
    def __init__(self, input_size, hidden_size, output_size):
        super(SimpleLSTM, self).__init__()
        self.hidden_size = hidden_size
        self.lstm = nn.LSTM(input_size, hidden_size, batch_first=True)
        self.linear = nn.Linear(hidden_size, output_size)
    
    def forward(self, x):
        batch_size = x.size(0)
        hidden = (torch.zeros(1, batch_size, self.hidden_size),
                  torch.zeros(1, batch_size, self.hidden_size))
        out, hidden = self.lstm(x, hidden)
        out = self.linear(out[:, -1, :])  
        return out

def calculate_accuracy(outputs, targets):
    _, predicted = torch.max(outputs, 1)
    correct = (predicted == targets).sum().item()
    total = targets.size(0)
    accuracy = correct / total
    return accuracy

model = SimpleLSTM(input_size=1, hidden_size=50, output_size=num_unique_songs)

# Define loss function and optimizer
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters())

for epoch in range(50):
    model.train()
    train_loss = 0.0
    train_accuracy = 0.0
    for inputs, targets in dataloader:
        # print(inputs)
        # print(targets)
        optimizer.zero_grad()
        # Forward pass
        outputs = model(inputs)
        loss = criterion(outputs, targets)
        loss.backward()
        optimizer.step()

        train_loss += loss.item()
        train_accuracy += calculate_accuracy(outputs, targets)
    
    # Calculate average loss and accuracy over the epoch
    train_loss /= len(dataloader)
    train_accuracy /= len(dataloader)
    
    print(f'Epoch {epoch+1}/{50}, Loss: {train_loss:.4f}, Accuracy: {train_accuracy:.4f}')
    

Epoch 1/50, Loss: 9.1947, Accuracy: 0.0001
Epoch 2/50, Loss: 9.1849, Accuracy: 0.0001
Epoch 3/50, Loss: 9.1779, Accuracy: 0.0001
Epoch 4/50, Loss: 9.1713, Accuracy: 0.0001
Epoch 5/50, Loss: 9.1645, Accuracy: 0.0003
Epoch 6/50, Loss: 9.1584, Accuracy: 0.0003
Epoch 7/50, Loss: 9.1514, Accuracy: 0.0002
Epoch 8/50, Loss: 9.1446, Accuracy: 0.0004
Epoch 9/50, Loss: 9.1388, Accuracy: 0.0004
Epoch 10/50, Loss: 9.1319, Accuracy: 0.0004
Epoch 11/50, Loss: 9.1252, Accuracy: 0.0005
Epoch 12/50, Loss: 9.1190, Accuracy: 0.0004
Epoch 13/50, Loss: 9.1114, Accuracy: 0.0005
Epoch 14/50, Loss: 9.1057, Accuracy: 0.0006
Epoch 15/50, Loss: 9.0989, Accuracy: 0.0008
Epoch 16/50, Loss: 9.0910, Accuracy: 0.0009
Epoch 17/50, Loss: 9.0844, Accuracy: 0.0009
Epoch 18/50, Loss: 9.0776, Accuracy: 0.0008
Epoch 19/50, Loss: 9.0694, Accuracy: 0.0008
Epoch 20/50, Loss: 9.0622, Accuracy: 0.0007
Epoch 21/50, Loss: 9.0547, Accuracy: 0.0006
Epoch 22/50, Loss: 9.0466, Accuracy: 0.0008
Epoch 23/50, Loss: 9.0383, Accuracy: 0.00