In [35]:
import torch
import torch.nn as nn
import torch.optim as optim
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, MinMaxScaler
from torch.utils.data import Dataset, DataLoader


In [36]:
class SpotifyDataset(Dataset):
    def __init__(self, X, y):
        self.X = torch.FloatTensor(X)
        self.y = torch.FloatTensor(y)

    def __len__(self):
        return len(self.X)

    def __getitem__(self, idx):
        return self.X[idx], self.y[idx]

class SpotifyRankPredictor(nn.Module):
    def __init__(self):
        super(SpotifyRankPredictor, self).__init__()
        self.layers = nn.Sequential(
            nn.Linear(8, 32),  # 입력층을 32로 줄임
            nn.ReLU(),
            nn.BatchNorm1d(32),
            nn.Dropout(0.2),  # Dropout 비율을 0.2로 낮춤
            nn.Linear(32, 16),
            nn.ReLU(),
            nn.BatchNorm1d(16),
            nn.Linear(16, 1)
        )

    def forward(self, x):
        return self.layers(x)

In [37]:
def preprocess_data(df):
    # Features and target
    X = df[['Danceability', 'Energy', 'Loudness', 'Speechiness',
            'Acousticness', 'Liveness', 'Tempo', 'Duration (ms)']].values
    y = df['Highest Charting Position'].values

    scaler = MinMaxScaler()
    X_scaled = scaler.fit_transform(X)
    y_scaler = MinMaxScaler()
    y_scaled = y_scaler.fit_transform(y.reshape(-1, 1)).flatten()

    return X_scaled, y_scaled,y_scaler

In [38]:
def train_model(model, train_loader, val_loader, criterion, optimizer,
                num_epochs=100, patience=10):
    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
    model = model.to(device)

    best_val_loss = float('inf')
    patience_counter = 0
    train_losses = []
    val_losses = []

    for epoch in range(num_epochs):
        model.train()
        train_loss = 0
        for X_batch, y_batch in train_loader:
            X_batch, y_batch = X_batch.to(device), y_batch.to(device)

            optimizer.zero_grad()
            outputs = model(X_batch)
            loss = criterion(outputs, y_batch.view(-1, 1))
            loss.backward()
            optimizer.step()

            train_loss += loss.item()

        train_loss /= len(train_loader)
        train_losses.append(train_loss)

        model.eval()
        val_loss = 0
        with torch.no_grad():
            for X_batch, y_batch in val_loader:
                X_batch, y_batch = X_batch.to(device), y_batch.to(device)
                outputs = model(X_batch)
                loss = criterion(outputs, y_batch.view(-1, 1))
                val_loss += loss.item()

        val_loss /= len(val_loader)
        val_losses.append(val_loss)

        print(f'Epoch [{epoch+1}/{num_epochs}], Train Loss: {train_loss:.4f}, Val Loss: {val_loss:.4f}')

        if val_loss < best_val_loss:
            best_val_loss = val_loss
            patience_counter = 0
            torch.save(model.state_dict(), 'best_model.pth')
        else:
            patience_counter += 1
            if patience_counter >= patience:
                print('Early stopping triggered')
                break

    return train_losses, val_losses

In [39]:
def main():
    df = pd.read_csv('spotify_dataset.csv')
    X_scaled, y_scaled, y_scaler = preprocess_data(df)

    X_train, X_temp, y_train, y_temp = train_test_split(X_scaled, y_scaled, test_size=0.2, random_state=42)
    X_val, X_test, y_val, y_test = train_test_split(X_temp, y_temp, test_size=0.5, random_state=42)

    train_dataset = SpotifyDataset(X_train, y_train)
    val_dataset = SpotifyDataset(X_val, y_val)
    test_dataset = SpotifyDataset(X_test, y_test)

    batch_size = 32
    train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
    val_loader = DataLoader(val_dataset, batch_size=batch_size)
    test_loader = DataLoader(test_dataset, batch_size=batch_size)

    model = SpotifyRankPredictor()
    criterion = nn.MSELoss()
    optimizer = optim.Adam(model.parameters(), lr=0.0005, weight_decay=0.0001)


    train_losses, val_losses = train_model(
        model=model,
        train_loader=train_loader,
        val_loader=val_loader,
        criterion=criterion,
        optimizer=optimizer,
        num_epochs=100,
        patience=10
    )


    model.load_state_dict(torch.load('best_model.pth'))
    model.eval()
    test_loss = 0
    predictions = []
    actuals = []

    with torch.no_grad():
        for X_batch, y_batch in test_loader:
            outputs = model(X_batch)
            test_loss += criterion(outputs, y_batch.view(-1, 1)).item()
            predictions.extend(outputs.numpy().flatten())
            actuals.extend(y_batch.numpy())

    test_loss /= len(test_loader)
    print(f'Test Loss: {test_loss:.4f}')


    predictions = y_scaler.inverse_transform(np.array(predictions).reshape(-1, 1)).flatten()
    actuals = y_scaler.inverse_transform(np.array(actuals).reshape(-1, 1)).flatten()


    r2 = 1 - np.sum((actuals - predictions) ** 2) / np.sum((actuals - np.mean(actuals)) ** 2)
    print(f'R-squared: {r2:.4f}')

if __name__ == "__main__":
    main()

Epoch [1/100], Train Loss: 0.6314, Val Loss: 0.3815
Epoch [2/100], Train Loss: 0.4150, Val Loss: 0.4042
Epoch [3/100], Train Loss: 0.3363, Val Loss: 0.3238
Epoch [4/100], Train Loss: 0.2672, Val Loss: 0.2605
Epoch [5/100], Train Loss: 0.2185, Val Loss: 0.2010
Epoch [6/100], Train Loss: 0.1766, Val Loss: 0.1659
Epoch [7/100], Train Loss: 0.1426, Val Loss: 0.1381
Epoch [8/100], Train Loss: 0.1243, Val Loss: 0.1231
Epoch [9/100], Train Loss: 0.1156, Val Loss: 0.1151
Epoch [10/100], Train Loss: 0.1053, Val Loss: 0.1069
Epoch [11/100], Train Loss: 0.1038, Val Loss: 0.1015
Epoch [12/100], Train Loss: 0.0990, Val Loss: 0.0988
Epoch [13/100], Train Loss: 0.0960, Val Loss: 0.0985
Epoch [14/100], Train Loss: 0.0931, Val Loss: 0.0973
Epoch [15/100], Train Loss: 0.0901, Val Loss: 0.0979
Epoch [16/100], Train Loss: 0.0937, Val Loss: 0.0960
Epoch [17/100], Train Loss: 0.0909, Val Loss: 0.0939
Epoch [18/100], Train Loss: 0.0904, Val Loss: 0.0956
Epoch [19/100], Train Loss: 0.0929, Val Loss: 0.0952
Ep

  model.load_state_dict(torch.load('best_model.pth'))
