In [None]:
!pip install gdown

In [None]:
!gdown --id 1VeWJvKGgg8sMMc0o_zmrSzgDCdTccGfQ

In [None]:
!unzip results.zip -d /kaggle/working/results

In [5]:
!rm results.zip

In [None]:
import os
import numpy as np
import pandas as pd
import torch
import torch.nn as nn
import torch.optim as optim
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import normalize
from torch.utils.data import TensorDataset, DataLoader

def get_data(save_dir):
    data = []
    # Iterate through each file in the directory
    for filename in os.listdir(save_dir):
        if filename.endswith('.npz'):

            # Load the file
            loaded_file = dict(np.load(os.path.join(save_dir, filename), allow_pickle=True))

            # Reshape the residual streams
            if not data:
                print("Residual shape:", loaded_file['residual_stream'].shape)
            residual_unconditional = loaded_file['residual_stream'].reshape(24, 2, 1024)[:, 1, :]
            residual_conditional = loaded_file['residual_stream'].reshape(24, 2, 1024)[:, 0, :]

            # Create a dictionary for the current file's data
            file_data = {
                'filename': filename,
                'genre': loaded_file['genre'],  # Keeping original genre if needed
            }

            # Add residuals to the dictionary
            for layer in range(24):
                file_data[f'residual_conditional_{layer + 1}'] = residual_conditional[layer, :]
                file_data[f'residual_unconditional_{layer + 1}'] = residual_unconditional[layer, :]

            # Append the file data to the list
            data.append(file_data)

    return data

# Define your linear probe (a single linear layer without bias)
class SimpleNN(nn.Module):
    def __init__(self, input_size, num_classes):
        super(SimpleNN, self).__init__()
        self.linear = nn.Linear(input_size, num_classes, bias=False)  # No bias

    def forward(self, x):
        return self.linear(x)  # Raw dot products

def sign(x):
    return torch.where(x < 0, -1, 1)



In [None]:
# Directory containing processed chord files
save_dir = '/kaggle/working/results/lewtun/'
data = get_data(save_dir)
df = pd.DataFrame(data)
inv_genre_map = {'Classical': 0, 'Electronic': 1, 'Rock': 2, 'Jazz': 3}
df['label'] = df['genre'].map(lambda x: inv_genre_map[x])
min_count = df.groupby('label').size().min()
cdf = df.groupby('label').sample(n=min_count, random_state=42)


In [None]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

layers = list(range(1, 25))  # Layers 1 to 24
acc_list = []
loss_list = []
vloss_list = []
weights_dict = {i:0 for i in range(1, len(layers)+1)}
print(weights_dict)

cond_col = f"residual_conditional_{i}"
uncond_col = f"residual_unconditional_{i}"
num_classes = 4  # For example, 10 classes
input_size = 1024

for i in range(1, len(layers)+1):
    X_cond = normalize(np.stack(cdf[cond_col].values), norm='l2')  # Shape: (N, 1024)
    y = cdf["label"].values.astype(np.float32)  # Convert labels to float for MSELoss

    # Split data into training and testing sets
    X_cond_train, X_cond_test, y_cond_train, y_cond_test = train_test_split(
        X_cond, y, test_size=0.15, random_state=42, shuffle=True, stratify=y
    )

    # Convert features and labels to PyTorch tensors
    X_cond_train = torch.tensor(X_cond_train, dtype=torch.float32).to(device)
    X_cond_test  = torch.tensor(X_cond_test, dtype=torch.float32).to(device)
    y_cond_train = torch.tensor(y_cond_train, dtype=torch.float32).to(device)
    y_cond_test  = torch.tensor(y_cond_test, dtype=torch.float32).to(device)

    # Ensure labels have the correct shape (N, 1) since MSELoss expects predictions to match the shape of targets
    y_cond_train = y_cond_train.unsqueeze(1)
    y_cond_test = y_cond_test.unsqueeze(1)

    # Define mini-batch size and create DataLoaders
    batch_size = 1024

    def get_loader(X, y, batch_size):
        dataset = TensorDataset(X, y)
        return DataLoader(dataset, batch_size=batch_size, shuffle=True)

    # Create loaders for one of the conditions (repeat similarly for the other)
    cond_train_loader = get_loader(X_cond_train, y_cond_train, batch_size)
    cond_test_loader  = get_loader(X_cond_test, y_cond_test, batch_size)

    # Initialize the model, optimizer, and MSE loss.
    model = SimpleNN(input_size=input_size, num_classes=num_classes).to(device)
    optimizer = optim.Adam(model.parameters(), lr=0.005)
    loss_fn = nn.MSELoss()

    num_epochs = 250

    # Training loop using MSELoss with one-hot targets
    for epoch in range(num_epochs):
        model.train()
        running_loss = 0.0
        for X_batch, y_batch in cond_train_loader:
            optimizer.zero_grad()
            outputs = model(X_batch)  # Raw outputs: [batch_size, num_classes]
            # Optionally, you could apply a softmax if you want the outputs to behave like probabilities:
            # outputs = torch.softmax(outputs, dim=1)
            loss = loss_fn(outputs, y_batch)  # Compare to one-hot target
            loss.backward()
            optimizer.step()
            running_loss += loss.item() * X_batch.size(0)
        if (epoch + 1) % 10== 0:
            epoch_loss = running_loss / len(cond_train_loader.dataset)
            print(f"Epoch [{epoch+1}/{num_epochs}], Loss: {epoch_loss:.8f}")
    loss_list.append(epoch_loss) #Loss at the end of training loop
    # Evaluation: compute validation loss and accuracy
    model.eval()
    correct = 0
    total = 0
    val_loss = 0.0

    with torch.no_grad():
        for X_batch, y_batch in cond_test_loader:
            outputs = model(X_batch)
            # Compute the batch loss (MSELoss)
            loss = loss_fn(outputs, y_batch)
            # Multiply by the batch size to accumulate the total loss
            val_loss += loss.item() * X_batch.size(0)

            # Apply sign function to threshold the outputs to -1 or 1
            predicted = sign(outputs)
            total += y_batch.size(0)
            correct += torch.eq(predicted, y_batch).sum().item()
            # Optional: print the actual and predicted values for each batch
            #print("Actual:", y_batch.cpu().numpy().squeeze(), "Predicted:", predicted.cpu().numpy().squeeze())

    # Calculate average loss and accuracy over the validation set
    val_loss /= total
    accuracy = correct / total
    print("--- Layer: ", i, "---")
    print(f"Accuracy (Conditional): {accuracy:.8f}")
    print(f"Validation Loss (Conditional): {val_loss:.8f}")

    acc_list.append(accuracy)
    vloss_list.append(val_loss)
    # Store the full weight matrix instead of flattening
    weights_dict[i] = model.linear.weight.detach().cpu().numpy()


In [24]:
import numpy as np
weights_array = np.array([weights_dict[i+1] for i in range(len(weights_dict))])
print(weights_array.shape)

(24, 4, 1024)


In [28]:
np.save('weights.npy', weights_array)