In [11]:
import numpy as np
import pandas as pd
import torch
from torch import nn
from torch.utils.data import DataLoader, TensorDataset
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, LabelEncoder

In [12]:
data = pd.read_csv('data/data.csv')

In [13]:
label_encoders = {}
for column in ['artists', 'id', 'name', 'release_date']:
    label_encoders[column] = LabelEncoder()
    data[column] = label_encoders[column].fit_transform(data[column])

# Feature selection
features = data.dropna(subset=['popularity'])
target = data['popularity']


# Scaling features
scaler = StandardScaler()
features_scaled = scaler.fit_transform(features)

# Splitting the dataset
X_train, X_val, y_train, y_val = train_test_split(features_scaled, target, test_size=0.2, random_state=42)


# Convert data to PyTorch tensors
train_data = TensorDataset(torch.from_numpy(X_train).float(), torch.from_numpy(y_train.values).float())
val_data = TensorDataset(torch.from_numpy(X_val).float(), torch.from_numpy(y_val.values).float())

# Create DataLoaders
train_loader = DataLoader(train_data, batch_size=64, shuffle=True)
val_loader = DataLoader(val_data, batch_size=64, shuffle=False)

In [14]:
num_classes = len(np.unique(target))

class FullyConnectedNet(nn.Module):
    def __init__(self, input_size, num_classes):
        super(FullyConnectedNet, self).__init__()
        self.fc1 = nn.Linear(input_size, 128)
        self.relu = nn.ReLU()
        self.fc2 = nn.Linear(128, 64)
        self.fc3 = nn.Linear(64, num_classes)

    def forward(self, x):
        x = self.relu(self.fc1(x))
        x = self.relu(self.fc2(x))
        x = self.fc3(x)
        return x

In [15]:
n_estimators = 5
models = []
input_size = X_train.shape[1]  # Number of features
num_classes = len(np.unique(y_train))  # Number of unique classes

for _ in range(n_estimators):
    model = FullyConnectedNet(input_size, num_classes)
    models.append(model)


In [17]:
criterion = nn.CrossEntropyLoss()

for model in models:
    optimizer = torch.optim.Adam(model.parameters(), lr=0.001)

    for epoch in range(10):
        model.train()
        for inputs, labels in train_loader:
            labels = labels.long()  # Convert labels to long, ensure they are 0-indexed

            optimizer.zero_grad()
            outputs = model(inputs)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()

        # Validation step
        model.eval()
        with torch.no_grad():
            val_loss = sum(criterion(model(inputs), labels.long()).item() 
                           for inputs, labels in val_loader) / len(val_loader)
        print(f'Epoch {epoch}, Validation Loss: {val_loss}')


IndexError: Target 100 is out of bounds.

In [None]:
def evaluate_ensemble(models, loader, criterion):
    total_loss = 0
    for inputs, labels in loader:
        labels = labels.long()
        outputs = [model(inputs) for model in models]
        avg_output = torch.mean(torch.stack(outputs), dim=0)
        total_loss += criterion(avg_output, labels).item()
    return total_loss / len(loader)

ensemble_val_loss = evaluate_ensemble(models, val_loader, criterion)
print(f'Ensemble Validation Loss: {ensemble_val_loss}')