### Preprocessing Data


In [None]:
def load_audio(audio_path, target_duration=60.0, target_sample_rate=32000):
    try:
        waveform, sr = librosa.load(audio_path, sr=None, mono=True)
        if sr != target_sample_rate:
            waveform = librosa.resample(waveform, orig_sr=sr, target_sr=target_sample_rate)

        target_samples = int(target_duration * target_sample_rate)
        if waveform.shape[0] > target_samples:
            waveform = waveform[:target_samples]  # Truncate
        else:
            waveform = np.pad(waveform, (0, target_samples - waveform.shape[0]))  # Pad

        return torch.tensor(waveform, dtype=torch.float16).to(device)

    except Exception as e:
        print(f"Skipping {audio_path} due to error: {e}")
        return torch.zeros(int(target_duration * target_sample_rate), dtype=torch.float16).to(device)


In [None]:
# This is just the composer and their song
from google.colab import drive
drive.mount('/content/drive', force_remount=True)
import os
import pandas as pd
import pandas as pd
import torch
import numpy as np
import librosa

csv_path = "/content/drive/MyDrive/cs229Project/229Dataset/musicnet_metadata.csv"
data_df = pd.read_csv(csv_path, delimiter=";", on_bad_lines="skip")

audio_folder = "/content/drive/MyDrive/cs229Project/229Dataset/musicnet/musicnet/train_data"


file_map = {file.split('.')[0]: os.path.join(audio_folder, file)
             for file in os.listdir(audio_folder) if file.endswith('.wav')}

data_df['audio'] = data_df['id'].astype(str).map(file_map)

if 'composition' not in data_df.columns:
    raise ValueError("Composer column not found in the dataset!")

updated_data_df = data_df[['id', 'composer', 'composition', 'audio']]

processed_audio_paths = []

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

numpy_save_folder = "/content/drive/MyDrive/cs229Project/229Dataset/musicnet/processed_audio/"
os.makedirs(numpy_save_folder, exist_ok=True)

for idx, row in data_df.iterrows():
    audio_path = row['audio']
    if isinstance(audio_path, str) and os.path.exists(audio_path):
        processed_waveform = load_audio(audio_path).cpu().numpy()
        npy_path = os.path.join(numpy_save_folder, f"{row['id']}.npy")
        np.save(npy_path, processed_waveform)
        processed_audio_paths.append(npy_path)
    else:
        processed_audio_paths.append(None)

updated_data_df['audio'] = processed_audio_paths

# Save the updated CSV
updated_csv_path = "/content/drive/MyDrive/cs229Project/229Dataset/musicnet/meta_audiopath.csv"
updated_data_df.to_csv(updated_csv_path, index=False)

print(f"Updated CSV saved to: {updated_csv_path}")


Mounted at /content/drive


In [None]:
# Find all the composer paris
import itertools
import pandas as pd

csv_path = updated_csv_path
df = pd.read_csv(csv_path)

composers = df["composer"].unique()

composer_pairs = list(itertools.permutations(composers, 2))

print(composer_pairs[:5])


[('Schubert', 'Mozart'), ('Schubert', 'Dvorak'), ('Schubert', 'Cambini'), ('Schubert', 'Haydn'), ('Schubert', 'Brahms')]


In [None]:
# For finetune
import os
import pandas as pd
import itertools

# Load dataset
csv_path = "/content/drive/MyDrive/cs229Project/229Dataset/musicnet/meta_audiopath.csv"
df = pd.read_csv(csv_path)

# Get unique composers
composers = df["composer"].unique()

composer_pairs = list(itertools.permutations(composers, 2))
print(f"Total composer pairs: {len(composer_pairs)}")

dataset_pairs = []

for source_composer, target_composer in composer_pairs:
    source_df = df[df["composer"] == source_composer]
    target_df = df[df["composer"] == target_composer]

    for _, source_row in source_df.iterrows():
        if pd.isna(source_row["audio"]) or not os.path.exists(source_row["audio"]):
            print(f"Skipping missing source file: {source_row['audio']}")
            continue

        target_row = target_df.sample(n=1).iloc[0]

        dataset_pairs.append({
            "audio_path": source_row['audio'],
            "text_prompt": f"Song Title: {source_row['composition']}. Convert this classical song to {target_composer}'s style."
        })

# Convert to DataFrame
expanded_df = pd.DataFrame(dataset_pairs)

# Save the dataset
expanded_csv_path = "/content/drive/MyDrive/cs229Project/229Dataset/musicnet/expanded_musicnet_dataset.csv"
expanded_df.to_csv(expanded_csv_path, index=False)

print(f"Dataset saved: {expanded_csv_path}")
print(expanded_df.shape)
print(expanded_df["audio_path"].iloc[0])
print(expanded_df["text_prompt"].iloc[1])


In [None]:
# For eval
import os
import pandas as pd
import itertools

# Load dataset
csv_path = "/content/drive/MyDrive/cs229Project/229Dataset/musicnet/meta_audiopath.csv"
df = pd.read_csv(csv_path)

composers = df["composer"].unique()

composer_pairs = list(itertools.permutations(composers, 2))
print(f"Total composer pairs: {len(composer_pairs)}")

dataset_pairs = []

for source_composer, target_composer in composer_pairs:
    source_df = df[df["composer"] == source_composer]
    target_df = df[df["composer"] == target_composer]

    for _, source_row in source_df.iterrows():
        if pd.isna(source_row["audio"]) or not os.path.exists(source_row["audio"]):
            print(f"Skipping missing source file: {source_row['audio']}")
            continue

        target_row = target_df.sample(n=1).iloc[0]

        dataset_pairs.append({
            "source_composer": source_composer,
            "target_composer": target_composer,
            "audio_path": source_row['audio'],
            "text_prompt": f"Song Title: {source_row['composition']}. Convert this classical song to {target_composer}'s style."
        })

expanded_df = pd.DataFrame(dataset_pairs)

expanded_csv_path = "/content/drive/MyDrive/cs229Project/229Dataset/musicnet/expanded_musicnet_dataset_eval.csv"
expanded_df.to_csv(expanded_csv_path, index=False)

In [None]:
from google.colab import drive
drive.mount('/content/drive', force_remount=True)

### Style Encoder

In [None]:
import torch
import torch.nn as nn
import torchaudio
import torchaudio.transforms as T
from torch.utils.data import DataLoader, Dataset

class StyleEncoder(nn.Module):
    def __init__(self, embedding_dim=128, num_composers=10):
        super(StyleEncoder, self).__init__()
        self.cnn = nn.Sequential(
            nn.Conv1d(1, 32, kernel_size=5, stride=2, padding=2),
            nn.ReLU(),
            nn.Conv1d(32, 64, kernel_size=5, stride=2, padding=2),
            nn.ReLU(),
            nn.Conv1d(64, 128, kernel_size=5, stride=2, padding=2),
            nn.ReLU()
        )
        self.temporal = nn.Sequential(
            nn.Conv1d(128, 256, kernel_size=3, padding=1),
            nn.ReLU(),
            nn.AdaptiveAvgPool1d(1)
        )
        self.projection = nn.Linear(256, embedding_dim)
        self.composer_embedding = nn.Embedding(num_composers, embedding_dim)

    def forward(self, x, composer_id):
        x = self.cnn(x)
        x = self.temporal(x).squeeze(-1)
        composer_emb = self.composer_embedding(composer_id)
        return self.projection(x) + composer_emb  # Style embedding

def info_nce_loss(anchor, positive, negatives, temperature=0.07):
    pos_sim = torch.cosine_similarity(anchor, positive, dim=-1)
    neg_sim = torch.cosine_similarity(anchor.unsqueeze(1), negatives.detach(), dim=-1).mean(dim=-1)

    numerator = torch.exp(pos_sim / temperature)
    denominator = numerator + torch.sum(torch.exp(neg_sim / temperature), dim=-1)

    return -torch.log(numerator / denominator).mean()

import torch
import torchaudio.transforms as T
import librosa
import numpy as np
from torch.utils.data import Dataset

def load_audio(audio_path, target_duration=30.0, target_sample_rate=16000):
    try:
        waveform, sr = librosa.load(audio_path, sr=None, mono=True)
        if sr != target_sample_rate:
            waveform = librosa.resample(waveform, orig_sr=sr, target_sr=target_sample_rate)

        target_samples = int(target_duration * target_sample_rate)
        if waveform.shape[0] > target_samples:
            waveform = waveform[:target_samples]  # Truncate
        else:
            waveform = np.pad(waveform, (0, target_samples - waveform.shape[0]))  # Pad

        return torch.tensor(waveform, dtype=torch.float32)

    except Exception as e:
        print(f"Skipping {audio_path} due to error: {e}")
        return torch.zeros(int(target_duration * target_sample_rate), dtype=torch.float32)


class MusicDataset(Dataset):
    def __init__(self, df, sample_rate=32000, target_duration=30.0):
        self.df = df.dropna(subset=['audio'])
        self.sample_rate = sample_rate
        self.target_duration = target_duration
        self.composer_to_id = {composer: i for i, composer in enumerate(df['composer'].unique())}

    def __len__(self):
        return len(self.df)

    def __getitem__(self, idx):
        row = self.df.iloc[idx]
        audio_path = row['audio_path']
        waveform = load_audio(audio_path, target_duration=self.target_duration, target_sample_rate=self.sample_rate)

        composer_id = self.composer_to_id[row['composer']]
        return waveform.unsqueeze(0), torch.tensor(composer_id, dtype=torch.long)  # (1, T), (1,)

from torch.cuda.amp import autocast, GradScaler

scaler = GradScaler()

def train_style_encoder(model, train_loader, optimizer, device):
    model.train()
    total_loss = 0

    for waveform, composer_id in train_loader:
        waveform, composer_id = waveform.to(device), composer_id.to(device)

        with autocast():
            anchor = model(waveform, composer_id)
            positive = model(waveform, composer_id)
            negatives = model(waveform, composer_id)

            loss = info_nce_loss(anchor, positive, negatives)

        optimizer.zero_grad()
        scaler.scale(loss).backward()
        scaler.step(optimizer)
        scaler.update()
        total_loss += loss.item()

        # Free memory
        del waveform, composer_id, anchor, positive, negatives
        torch.cuda.empty_cache()

    return total_loss / len(train_loader)


def compute_mean_embeddings(model, dataset, device):
    model.eval()
    composer_embeddings = {i: [] for i in range(len(dataset.composer_to_id))}

    with torch.no_grad():
        for waveform, composer_id in dataset:
            waveform, composer_id = waveform.to(device), composer_id.to(device)
            embedding = model(waveform.unsqueeze(0), composer_id.unsqueeze(0))
            composer_embeddings[composer_id.item()].append(embedding.cpu())

    return {k: torch.mean(torch.stack(v), dim=0) for k, v in composer_embeddings.items()}

if __name__ == "__main__":
    import pandas as pd
    from torch.utils.data import DataLoader

    dataset = MusicDataset(data_df)
    train_loader = DataLoader(dataset, batch_size=8, shuffle=True)

    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

    model = StyleEncoder(num_composers=len(dataset.composer_to_id)).to(device)
    optimizer = torch.optim.Adam(model.parameters(), lr=1e-4)

    # Training loop
    for epoch in range(10):
        loss = train_style_encoder(model, train_loader, optimizer, device)
        print(f"Epoch {epoch+1}, Loss: {loss:.4f}")

    # Save the model
    torch.save(model.state_dict(), "style_encoder.pth")




KeyboardInterrupt: 

# Music Gen


In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
import torch
import torch.nn as nn

composer_map = {
    "Schubert": 0,
    "Mozart": 1,
    "Dvorak": 2,
    "Cambini": 3,
    "Haydn": 4,
    "Brahms": 5,
    "Faure": 6,
    "Ravel": 7,
    "Bach": 8,
    "Beethoven": 9
}

class StyleEncoder(nn.Module):
    def __init__(self, output_dim=128, num_composers=10):
        super().__init__()

        self.cnn = nn.Sequential(
            nn.Conv1d(1, 32, kernel_size=5, stride=2, padding=2),
            nn.ReLU(),
            nn.Conv1d(32, 64, kernel_size=5, stride=2, padding=2),
            nn.ReLU(),
            nn.Conv1d(64, 128, kernel_size=5, stride=2, padding=2),
            nn.ReLU()
        )

        self.temporal = nn.LSTM(128, 128, batch_first=True)

        self.projection = nn.Linear(256, output_dim)

        self.composer_embedding = nn.Embedding(num_composers, output_dim)

    def extract_composer_id(self, text_prompt):
      for composer, composer_id in self.composer_map.items():
          if f"convert to {composer.lower()}" in text_prompt.lower():
              print(f"Detected composer: {composer}, ID: {composer_id}")
              return torch.tensor([composer_id], dtype=torch.long, device=device)
      return None  # No conversion requested


    def forward(self, waveform, composer_id):
        x = self.cnn(waveform)
        x, _ = self.temporal(x)
        x = self.projection(x[:, -1, :])

        composer_embed = self.composer_embedding(composer_id)
        return x + composer_embed


style_encoder = StyleEncoder()
checkpoint = torch.load("/content/drive/MyDrive/cs229Project1/style_encoder.pth", map_location="cpu")

filtered_checkpoint = {k: v for k, v in checkpoint.items() if k in style_encoder.state_dict()}

style_encoder.load_state_dict(filtered_checkpoint, strict=False)
style_encoder.eval()


StyleEncoder(
  (cnn): Sequential(
    (0): Conv1d(1, 32, kernel_size=(5,), stride=(2,), padding=(2,))
    (1): ReLU()
    (2): Conv1d(32, 64, kernel_size=(5,), stride=(2,), padding=(2,))
    (3): ReLU()
    (4): Conv1d(64, 128, kernel_size=(5,), stride=(2,), padding=(2,))
    (5): ReLU()
  )
  (temporal): LSTM(128, 128, batch_first=True)
  (projection): Linear(in_features=256, out_features=128, bias=True)
  (composer_embedding): Embedding(10, 128)
)

In [None]:

composer_embedding_weights = style_encoder.composer_embedding.weight.data

# Map composer names to their corresponding embeddings
composer_embeddings_dict = {
    composer: composer_embedding_weights[composer_id].detach().cpu().numpy()
    for composer, composer_id in composer_map.items()
}
for composer, embedding in composer_embeddings_dict.items():
    print(f"{composer}: {embedding[:5]}...")  # Print the first 5 values for brevity


Schubert: [ 1.0189674  -0.03704567 -0.62848276  0.5494599   0.57478607]...
Mozart: [-0.22611332  0.53072536 -2.9945054   1.186264   -0.8996345 ]...
Dvorak: [-0.11956131  0.57687473 -0.14041474 -0.21242541  0.2801856 ]...
Cambini: [-0.08229057  1.303195    0.40165764 -1.3665297  -1.1813307 ]...
Haydn: [-0.255268    0.17314005 -0.38424298 -1.2285194   0.93682635]...
Brahms: [ 0.06443194  0.12758182  0.17273441 -2.7166872   0.5232171 ]...
Faure: [-1.0088236   1.2965635   0.25496018  0.25001827  0.48092383]...
Ravel: [ 0.05876034  2.000688   -1.9079921  -1.1833687  -0.9618606 ]...
Bach: [-0.37951747 -2.3222933   1.1933309  -0.44496727 -0.07125596]...
Beethoven: [-0.0399006 -1.4581578  0.7540325 -0.3136256 -0.7847596]...


In [None]:
import torch
import torchaudio
import torchaudio.transforms as T

from torch import nn
from transformers import AutoProcessor, MusicgenForConditionalGeneration
import scipy.io.wavfile
import librosa

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

class EnhancedMusicGenerator(nn.Module):
    def __init__(self):
        super().__init__()
        self.style_encoder = StyleEncoder()  # Updated to use composer IDs
        self.model = MusicgenForConditionalGeneration.from_pretrained(
            "facebook/musicgen-small", torch_dtype=torch.float16
        ).to(device)
        self.processor = AutoProcessor.from_pretrained("facebook/musicgen-small")

        self.composer_map = {
            "Schubert": 9, "Mozart": 7, "Dvorak": 4, "Cambini": 3,
            "Haydn": 6, "Brahms": 2, "Faure": 5, "Ravel": 8,
            "Bach": 0, "Beethoven": 1
        }
        self.composer_mean_embeddings = self.get_composer_mean_embeddings()

    def get_composer_mean_embeddings(self):
        composer_embedding_weights = self.style_encoder.composer_embedding.weight.data
        return {
            composer: composer_embedding_weights[composer_id].detach().cpu().numpy()
            for composer, composer_id in self.composer_map.items()
        }

    def extract_composer_id(self, text_prompt):
        for composer, composer_id in self.composer_map.items():
            if f"Convert this classical song to {composer}'s style." in text_prompt.lower():
                print(f"Detected composer: {composer}, ID: {composer_id}")
                return composer, composer_id
        return None, None

    def generate_music(self, text_prompt, audio_waveform, output_path="generated_music.wav", target_samples=640000):
        composer, composer_id = self.extract_composer_id(text_prompt)

        if composer is not None:
            composer_mean_embedding = self.composer_mean_embeddings[composer]
            composer_embedding_str = " ".join(map(str, composer_mean_embedding))
            text_prompt += f" Composer Style Embedding: {composer_embedding_str}"

        if audio_waveform.shape[0] > target_samples:
            audio_waveform = audio_waveform[:target_samples]  # Truncate
        elif audio_waveform.shape[0] < target_samples:
            pad_size = target_samples - audio_waveform.shape[0]
            audio_waveform = torch.cat([audio_waveform, torch.zeros(pad_size, dtype=torch.float16, device=device)], dim=0)

        audio_waveform = audio_waveform.to(dtype=torch.float16)

        print(f"Input waveform shape BEFORE MusicGen: {audio_waveform.shape}, dtype={audio_waveform.dtype}")

        input_ids = torch.tensor([ord(c) for c in text_prompt], dtype=torch.long, device=device).unsqueeze(0)

        with torch.no_grad():
            generated_audio = self.model.generate(input_ids=input_ids, guidance_scale=3, max_new_tokens=3000)

        print(f"Generated raw audio shape: {generated_audio.shape}, dtype={generated_audio.dtype}")

        generated_audio = generated_audio.to(dtype=torch.float32).cpu().numpy()

        if generated_audio.shape[-1] != target_samples:
            if generated_audio.shape[-1] > target_samples:
                generated_audio = generated_audio[..., :target_samples]  # Truncate
            else:
                pad_size = target_samples - generated_audio.shape[-1]
                generated_audio = np.pad(generated_audio, (0, pad_size), mode='constant')

        print(f"Final processed audio shape: {generated_audio.shape}")
        print(f"Generated shape: {generated_audio.shape}")

        scipy.io.wavfile.write(output_path, rate=32000, data=generated_audio)

        return output_path


In [None]:
import pandas as pd
import os
csv_path = "/content/drive/MyDrive/cs229Project/229Dataset/musicnet/expanded_musicnet_dataset_eval.csv"
df = pd.read_csv(csv_path)

# Save the generated output from composer A to composer B
output_audio_dir = "/content/drive/MyDrive/cs229Project/229Dataset/musicnet/generated_audio"
os.makedirs(output_audio_dir, exist_ok=True)
print(df[:5])

  source_composer target_composer  \
0        Schubert          Mozart   
1        Schubert          Mozart   
2        Schubert          Mozart   
3        Schubert          Mozart   
4        Schubert          Mozart   

                                          audio_path  \
0  /content/drive/MyDrive/cs229Project/229Dataset...   
1  /content/drive/MyDrive/cs229Project/229Dataset...   
2  /content/drive/MyDrive/cs229Project/229Dataset...   
3  /content/drive/MyDrive/cs229Project/229Dataset...   
4  /content/drive/MyDrive/cs229Project/229Dataset...   

                                         text_prompt  
0  Song Title: Piano Quintet in A major. Convert ...  
1  Song Title: Piano Quintet in A major. Convert ...  
2  Song Title: Piano Quintet in A major. Convert ...  
3  Song Title: Piano Quintet in A major. Convert ...  
4  Song Title: Piano Sonata in A major. Convert t...  


In [None]:
import torch

In [None]:
model_path = "/content/drive/MyDrive/enhanced_musicgen_peft.pth"
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

enhanced_music_generator = EnhancedMusicGenerator()

try:
    checkpoint = torch.load(model_path, map_location=device)
    enhanced_music_generator.load_state_dict(checkpoint)
    print(f"Model loaded successfully from {model_path}")
    enhanced_music_generator.to(device)
    enhanced_music_generator.eval()
except FileNotFoundError:
    print(f"Error: Model file not found at {model_path}")
except Exception as e:
    print(f"An error occurred while loading the model: {e}")


NameError: name 'EnhancedMusicGenerator' is not defined

In [None]:
music_gen = EnhancedMusicGenerator()

  self.register_buffer("padding_total", torch.tensor(kernel_size - stride, dtype=torch.int64), persistent=False)
Config of the text_encoder: <class 'transformers.models.t5.modeling_t5.T5EncoderModel'> is overwritten by shared text_encoder config: T5Config {
  "_name_or_path": "t5-base",
  "architectures": [
    "T5ForConditionalGeneration"
  ],
  "classifier_dropout": 0.0,
  "d_ff": 3072,
  "d_kv": 64,
  "d_model": 768,
  "decoder_start_token_id": 0,
  "dense_act_fn": "relu",
  "dropout_rate": 0.1,
  "eos_token_id": 1,
  "feed_forward_proj": "relu",
  "initializer_factor": 1.0,
  "is_encoder_decoder": true,
  "is_gated_act": false,
  "layer_norm_epsilon": 1e-06,
  "model_type": "t5",
  "n_positions": 512,
  "num_decoder_layers": 12,
  "num_heads": 12,
  "num_layers": 12,
  "output_past": true,
  "pad_token_id": 0,
  "relative_attention_max_distance": 128,
  "relative_attention_num_buckets": 32,
  "task_specific_params": {
    "summarization": {
      "early_stopping": true,
      "leng

# Evaluation Pipeline


In [None]:
composer_map = {
    "Schubert": 9, "Mozart": 7, "Dvorak": 4, "Cambini": 3,
    "Haydn": 6, "Brahms": 2, "Faure": 5, "Ravel": 8,
    "Bach": 0, "Beethoven": 1
}
reverse_composer_map = {v: k for k, v in composer_map.items()}


In [None]:
import tensorflow_hub as hub

yamnet_model = hub.load('https://tfhub.dev/google/yamnet/1')

def extract_yamnet_features(audio_path, sample_rate=16000):
    audio_waveform, sr = librosa.load(audio_path, sr=sample_rate)

    audio_tensor = tf.convert_to_tensor(audio_waveform, dtype=tf.float32)

    if len(audio_tensor.shape) > 1:
        audio_tensor = tf.squeeze(audio_tensor)

    scores, embeddings, spectrogram = yamnet_model(audio_tensor)

    return np.mean(embeddings.numpy(), axis=0)




In [None]:
from tqdm import tqdm
import pandas as pd
import numpy as np
import torch

def classify_composer(audio_features):
    predictions = classifier_model.predict(np.expand_dims(audio_features, axis=0))
    top_1 = np.argmax(predictions)
    top_2 = np.argsort(predictions[0])[-2:]
    return top_1, top_2, predictions

def evaluate_musicgen():
    df = pd.read_csv(eval_csv)

    sampled_df = df.sample(n=2, random_state=42).reset_index(drop=True)

    top_1_correct = 0
    top_2_correct = 0
    total_samples = 0
    y_true, y_pred = [], []

    with tqdm(total=len(sampled_df), desc="Evaluating MusicGen") as pbar:
        for idx, row in enumerate(sampled_df.itertuples(index=False)):
            source_composer = row.source_composer
            target_composer = row.target_composer
            audio_path = row.audio_path
            text_prompt = row.text_prompt

            audio_waveform = torch.tensor(np.load(audio_path, allow_pickle=True)).to(device)

            generated_audio_path = music_gen.generate_music(text_prompt, audio_waveform)

            audio_features = extract_yamnet_features(generated_audio_path)

            top_1_pred, top_2_preds, _ = classify_composer(audio_features)

            y_true.append(composer_map[target_composer])
            y_pred.append(top_1_pred)

            if top_1_pred == composer_map[target_composer]:
                top_1_correct += 1

            if composer_map[target_composer] in top_2_preds:
                top_2_correct += 1

            total_samples += 1
            pbar.update(1)

            if (idx + 1) % 50 == 0:
                top_1_acc = top_1_correct / total_samples
                top_2_acc = top_2_correct / total_samples

    top_1_accuracy = top_1_correct / total_samples
    top_2_accuracy = top_2_correct / total_samples

    return top_1_accuracy, top_2_accuracy


In [None]:
eval_csv="/content/drive/MyDrive/cs229Project/229Dataset/musicnet/expanded_musicnet_dataset_eval.csv"

In [None]:
top_1, top_2 = evaluate_musicgen()

Evaluating MusicGen:   0%|          | 0/1 [00:00<?, ?it/s]

Input waveform shape BEFORE MusicGen: torch.Size([640000]), dtype=torch.float16


Evaluating MusicGen:   0%|          | 0/1 [04:33<?, ?it/s]


IndexError: index out of range in self

In [None]:
print(f"Top-1 Accuracy: {top_1}")
print(f"Top-2 Accuracy: {top_2}")

Top-1 Accuracy: 16.00
Top-2 Accuracy: 23.00
