In [17]:
import torch
import torch.nn as nn
import torch.optim as optim
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, MinMaxScaler
from torch.utils.data import Dataset, DataLoader


In [18]:
class SpotifyDataset(Dataset):
    def __init__(self, X, y):
        self.X = torch.FloatTensor(X)
        self.y = torch.FloatTensor(y)

    def __len__(self):
        return len(self.X)

    def __getitem__(self, idx):
        return self.X[idx], self.y[idx]

class SpotifyRankPredictor(nn.Module):
    def __init__(self):
        super(SpotifyRankPredictor, self).__init__()
        self.layers = nn.Sequential(
            nn.Linear(8, 64),  # 더 작은 은닉층으로 시작
            nn.ReLU(),
            nn.BatchNorm1d(64),  # BatchNormalization 추가
            nn.Dropout(0.3),
            nn.Linear(64, 32),
            nn.ReLU(),
            nn.BatchNorm1d(32),
            nn.Dropout(0.3),
            nn.Linear(32, 16),
            nn.ReLU(),
            nn.BatchNorm1d(16),
            nn.Linear(16, 1)
        )

    def forward(self, x):
        return self.layers(x)

In [19]:
def preprocess_data(df):
    # Features and target
    X = df[['Danceability', 'Energy', 'Loudness', 'Speechiness',
            'Acousticness', 'Liveness', 'Tempo', 'Duration (ms)']].values
    y = df['Highest Charting Position'].values

    scaler = MinMaxScaler()
    X_scaled = scaler.fit_transform(X)
    y_scaler = MinMaxScaler()
    y_scaled = y_scaler.fit_transform(y.reshape(-1, 1)).flatten()

    return X_scaled, y_scaled,y_scaler

In [20]:
def train_model(model, train_loader, val_loader, criterion, optimizer,
                num_epochs=100, patience=10):
    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
    model = model.to(device)

    best_val_loss = float('inf')
    patience_counter = 0
    train_losses = []
    val_losses = []

    for epoch in range(num_epochs):
        model.train()
        train_loss = 0
        for X_batch, y_batch in train_loader:
            X_batch, y_batch = X_batch.to(device), y_batch.to(device)

            optimizer.zero_grad()
            outputs = model(X_batch)
            loss = criterion(outputs, y_batch.view(-1, 1))
            loss.backward()
            optimizer.step()

            train_loss += loss.item()

        train_loss /= len(train_loader)
        train_losses.append(train_loss)

        model.eval()
        val_loss = 0
        with torch.no_grad():
            for X_batch, y_batch in val_loader:
                X_batch, y_batch = X_batch.to(device), y_batch.to(device)
                outputs = model(X_batch)
                loss = criterion(outputs, y_batch.view(-1, 1))
                val_loss += loss.item()

        val_loss /= len(val_loader)
        val_losses.append(val_loss)

        print(f'Epoch [{epoch+1}/{num_epochs}], Train Loss: {train_loss:.4f}, Val Loss: {val_loss:.4f}')

        if val_loss < best_val_loss:
            best_val_loss = val_loss
            patience_counter = 0
            torch.save(model.state_dict(), 'best_model.pth')
        else:
            patience_counter += 1
            if patience_counter >= patience:
                print('Early stopping triggered')
                break

    return train_losses, val_losses

In [21]:
def main():
    df = pd.read_csv('spotify_dataset.csv')
    X_scaled, y_scaled, y_scaler = preprocess_data(df)

    X_train, X_temp, y_train, y_temp = train_test_split(X_scaled, y_scaled, test_size=0.2, random_state=42)
    X_val, X_test, y_val, y_test = train_test_split(X_temp, y_temp, test_size=0.5, random_state=42)

    train_dataset = SpotifyDataset(X_train, y_train)
    val_dataset = SpotifyDataset(X_val, y_val)
    test_dataset = SpotifyDataset(X_test, y_test)

    batch_size = 32
    train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
    val_loader = DataLoader(val_dataset, batch_size=batch_size)
    test_loader = DataLoader(test_dataset, batch_size=batch_size)

    model = SpotifyRankPredictor()
    criterion = nn.MSELoss()
    optimizer = optim.Adam(model.parameters(), lr=0.001)


    train_losses, val_losses = train_model(
        model=model,
        train_loader=train_loader,
        val_loader=val_loader,
        criterion=criterion,
        optimizer=optimizer,
        num_epochs=100,
        patience=10
    )


    model.load_state_dict(torch.load('best_model.pth'))
    model.eval()
    test_loss = 0
    predictions = []
    actuals = []

    with torch.no_grad():
        for X_batch, y_batch in test_loader:
            outputs = model(X_batch)
            test_loss += criterion(outputs, y_batch.view(-1, 1)).item()
            predictions.extend(outputs.numpy().flatten())
            actuals.extend(y_batch.numpy())

    test_loss /= len(test_loader)
    print(f'Test Loss: {test_loss:.4f}')


    predictions = y_scaler.inverse_transform(np.array(predictions).reshape(-1, 1)).flatten()
    actuals = y_scaler.inverse_transform(np.array(actuals).reshape(-1, 1)).flatten()


    r2 = 1 - np.sum((actuals - predictions) ** 2) / np.sum((actuals - np.mean(actuals)) ** 2)
    print(f'R-squared: {r2:.4f}')

if __name__ == "__main__":
    main()

Epoch [1/100], Train Loss: 0.2463, Val Loss: 0.1119
Epoch [2/100], Train Loss: 0.1312, Val Loss: 0.1013
Epoch [3/100], Train Loss: 0.1128, Val Loss: 0.0960
Epoch [4/100], Train Loss: 0.1033, Val Loss: 0.0923
Epoch [5/100], Train Loss: 0.0931, Val Loss: 0.0902
Epoch [6/100], Train Loss: 0.0919, Val Loss: 0.0905
Epoch [7/100], Train Loss: 0.0926, Val Loss: 0.0907
Epoch [8/100], Train Loss: 0.0906, Val Loss: 0.0909
Epoch [9/100], Train Loss: 0.0876, Val Loss: 0.0900
Epoch [10/100], Train Loss: 0.0875, Val Loss: 0.0894
Epoch [11/100], Train Loss: 0.0880, Val Loss: 0.0884
Epoch [12/100], Train Loss: 0.0866, Val Loss: 0.0895
Epoch [13/100], Train Loss: 0.0864, Val Loss: 0.0902
Epoch [14/100], Train Loss: 0.0869, Val Loss: 0.0907
Epoch [15/100], Train Loss: 0.0868, Val Loss: 0.0910
Epoch [16/100], Train Loss: 0.0860, Val Loss: 0.0904
Epoch [17/100], Train Loss: 0.0848, Val Loss: 0.0904
Epoch [18/100], Train Loss: 0.0852, Val Loss: 0.0904
Epoch [19/100], Train Loss: 0.0842, Val Loss: 0.0909
Ep

  model.load_state_dict(torch.load('best_model.pth'))
