In [5]:
import torch
import torch.nn as nn
import torch.optim as optim
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from torch.utils.data import Dataset, DataLoader

In [10]:
class SpotifyDataset(Dataset):
    def __init__(self, X, y):
        self.X = torch.FloatTensor(X)
        self.y = torch.FloatTensor(y)

    def __len__(self):
        return len(self.X)

    def __getitem__(self, idx):
        return self.X[idx], self.y[idx]

# Neural Network Model
class SpotifyRankPredictor(nn.Module):
    def __init__(self):
        super(SpotifyRankPredictor, self).__init__()
        self.layers = nn.Sequential(
            nn.Linear(8, 256),
            nn.ReLU(),
            nn.Dropout(0.2),
            nn.Linear(256, 128),
            nn.ReLU(),
            nn.Dropout(0.2),
            nn.Linear(128, 64),
            nn.ReLU(),
            nn.Dropout(0.2),
            nn.Linear(64, 32),
            nn.ReLU(),
            nn.Linear(32, 1)
        )

    def forward(self, x):
        return self.layers(x)

# Data Preprocessing
def preprocess_data(df):
    # Features and target
    X = df[['Danceability', 'Energy', 'Loudness', 'Speechiness',
            'Acousticness', 'Liveness', 'Tempo', 'Duration (ms)']].values
    y = df['Highest Charting Position'].values

    # Scale features
    scaler = StandardScaler()
    X_scaled = scaler.fit_transform(X)

    return X_scaled, y

# Training function
def train_model(model, train_loader, val_loader, criterion, optimizer,
                num_epochs=100, patience=10):
    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
    model = model.to(device)

    best_val_loss = float('inf')
    patience_counter = 0
    train_losses = []
    val_losses = []

    for epoch in range(num_epochs):
        # Training
        model.train()
        train_loss = 0
        for X_batch, y_batch in train_loader:
            X_batch, y_batch = X_batch.to(device), y_batch.to(device)

            optimizer.zero_grad()
            outputs = model(X_batch)
            loss = criterion(outputs, y_batch.view(-1, 1))
            loss.backward()
            optimizer.step()

            train_loss += loss.item()

        train_loss /= len(train_loader)
        train_losses.append(train_loss)

        # Validation
        model.eval()
        val_loss = 0
        with torch.no_grad():
            for X_batch, y_batch in val_loader:
                X_batch, y_batch = X_batch.to(device), y_batch.to(device)
                outputs = model(X_batch)
                loss = criterion(outputs, y_batch.view(-1, 1))
                val_loss += loss.item()

        val_loss /= len(val_loader)
        val_losses.append(val_loss)

        print(f'Epoch [{epoch+1}/{num_epochs}], Train Loss: {train_loss:.4f}, Val Loss: {val_loss:.4f}')

        # Early stopping
        if val_loss < best_val_loss:
            best_val_loss = val_loss
            patience_counter = 0
            torch.save(model.state_dict(), 'best_model.pth')
        else:
            patience_counter += 1
            if patience_counter >= patience:
                print('Early stopping triggered')
                break

    return train_losses, val_losses

In [12]:
# Main execution
def main():
    # Load data
    df = pd.read_csv('spotify_dataset.csv')

    # Preprocess data
    X_scaled, y = preprocess_data(df)

    # Split data
    X_train, X_temp, y_train, y_temp = train_test_split(X_scaled, y, test_size=0.2, random_state=42)
    X_val, X_test, y_val, y_test = train_test_split(X_temp, y_temp, test_size=0.5, random_state=42)

    # Create datasets and dataloaders
    train_dataset = SpotifyDataset(X_train, y_train)
    val_dataset = SpotifyDataset(X_val, y_val)
    test_dataset = SpotifyDataset(X_test, y_test)

    batch_size = 32
    train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
    val_loader = DataLoader(val_dataset, batch_size=batch_size)
    test_loader = DataLoader(test_dataset, batch_size=batch_size)

    # Initialize model, loss function, and optimizer
    model = SpotifyRankPredictor()
    criterion = nn.MSELoss()
    optimizer = optim.Adam(model.parameters(), lr=0.001)

    # Train model
    train_losses, val_losses = train_model(model, train_loader, val_loader,
                                           criterion, optimizer, num_epochs=100, patience=10)

    # Evaluate on test set
    model.load_state_dict(torch.load('best_model.pth'))
    model.eval()
    test_loss = 0
    predictions = []
    actuals = []

    with torch.no_grad():
        for X_batch, y_batch in test_loader:
            outputs = model(X_batch)
            test_loss += criterion(outputs, y_batch.view(-1, 1)).item()
            predictions.extend(outputs.numpy().flatten())
            actuals.extend(y_batch.numpy())

    test_loss /= len(test_loader)
    print(f'Test Loss: {test_loss:.4f}')

    # Calculate R-squared
    r2 = 1 - np.sum((np.array(actuals) - np.array(predictions)) ** 2) / np.sum((np.array(actuals) - np.mean(actuals)) ** 2)
    print(f'R-squared: {r2:.4f}')

if __name__ == "__main__":
    main()

Epoch [1/100], Train Loss: 7409.7060, Val Loss: 3941.2246
Epoch [2/100], Train Loss: 3649.7910, Val Loss: 3695.9786
Epoch [3/100], Train Loss: 3563.4630, Val Loss: 3704.5863
Epoch [4/100], Train Loss: 3521.8801, Val Loss: 3790.1949
Epoch [5/100], Train Loss: 3483.9616, Val Loss: 3987.2942
Epoch [6/100], Train Loss: 3405.0490, Val Loss: 3691.5043
Epoch [7/100], Train Loss: 3414.3652, Val Loss: 3706.0746
Epoch [8/100], Train Loss: 3450.8652, Val Loss: 3703.0904
Epoch [9/100], Train Loss: 3384.4123, Val Loss: 3870.0878
Epoch [10/100], Train Loss: 3393.1992, Val Loss: 3747.3066
Epoch [11/100], Train Loss: 3382.8692, Val Loss: 3907.6010
Epoch [12/100], Train Loss: 3360.9840, Val Loss: 3716.1902
Epoch [13/100], Train Loss: 3316.6113, Val Loss: 3847.2645
Epoch [14/100], Train Loss: 3357.7627, Val Loss: 3760.8304
Epoch [15/100], Train Loss: 3320.0762, Val Loss: 3754.1818
Epoch [16/100], Train Loss: 3320.9626, Val Loss: 3671.4885
Epoch [17/100], Train Loss: 3292.2022, Val Loss: 3862.3259
Epoch 

  model.load_state_dict(torch.load('best_model.pth'))
