In [1]:
%cd /home/DAVIDSON/dutuller/Workspace/DRI1/MusicGen/

import numpy as np
from sklearn.model_selection import KFold
from sklearn.metrics import f1_score, confusion_matrix
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LogisticRegression
import pandas as pd
from embeddings.h5_processor import H5DataProcessor, DatasetConfig
import re
import yaml

# Load dataset configurations
with open("universal_music/NHS_full.yaml", 'r') as f:
    config = yaml.safe_load(f)

# Process datasets to get all embeddings
processor = H5DataProcessor(verbose=True)
all_datasets = []

# Process each dataset
for dataset_config in config['datasets']:
    dataset = processor.process_h5_file(
        processor.get_embedding_path(DatasetConfig(**dataset_config)),
        DatasetConfig(**dataset_config)
    )
    all_datasets.append(dataset)

# Combine datasets
all_embeddings = np.vstack([d.embeddings for d in all_datasets])
all_labels = [l for d in all_datasets for l in d.labels]
all_filenames = [f for d in all_datasets for f in d.filenames]

# Extract song IDs from filenames
all_song_ids = []
for filename in all_filenames:
    match = re.search(r"Discography-(\d+)_\d+.wav", filename)
    if match:
        all_song_ids.append(int(match.group(1)))
    else:
        all_song_ids.append(None)

# Extract unique song IDs (to ensure we split by song)
unique_song_ids = np.unique([id for id in all_song_ids if id is not None])

# Load human ratings
df = pd.read_csv('universal_music/FFfull.csv', low_memory=False)
web_df = df[df['study'] == 'web'].copy()

# Load the ratings policy data
web_df['generous'] = np.load("universal_music/web_survey_ratings_generous.npy")
web_df['random'] = np.load("universal_music/web_survey_ratings_random.npy")
web_df['strict'] = np.load("universal_music/web_survey_ratings_strict.npy")

# Now load the sample clip embeddings (what humans actually heard)
with open("universal_music/NHS_samples.yaml", 'r') as f:
    samples_config = yaml.safe_load(f)

dataset_config = DatasetConfig(**samples_config['datasets'][0])
embedding_filename = processor.get_embedding_path(dataset_config)
samples_dataset = processor.process_h5_file(embedding_filename, dataset_config)

sample_filenames = samples_dataset.filenames
sample_embeddings = samples_dataset.embeddings
sample_song_ids = []

for filename in sample_filenames:
    match = re.search(r"NAIV-(\d+).wav", filename)
    if match:
        sample_song_ids.append(int(match.group(1)))
    else:
        sample_song_ids.append(None)

# Create unique label mapping
unique_labels = sorted(set(all_labels))
label_to_idx = {label: idx for idx, label in enumerate(unique_labels)}

# Prepare for k-fold cross-validation
k_folds = 5
kf = KFold(n_splits=k_folds, shuffle=True, random_state=42)

# Prepare results collection
results = {
    'generous': [],
    'random': [], 
    'strict': []
}

# Perform k-fold cross-validation on unique song IDs
for fold, (train_idx, test_idx) in enumerate(kf.split(unique_song_ids)):
    print(f"Processing fold {fold+1}/{k_folds}")
    
    # Get song IDs for this fold
    train_song_ids = unique_song_ids[train_idx]
    test_song_ids = unique_song_ids[test_idx]
    
    # Get embeddings for training
    train_mask = np.array([id in train_song_ids for id in all_song_ids])
    X_train = all_embeddings[train_mask]
    y_train = np.array([label_to_idx[label] for label in np.array(all_labels)[train_mask]])
    
    # Scale the features
    scaler = StandardScaler()
    X_train_scaled = scaler.fit_transform(X_train)
    
    # Train the model
    model = LogisticRegression(max_iter=1000, random_state=42)
    model.fit(X_train_scaled, y_train)
    
    # Process the test songs
    test_sample_idx = np.array([i for i, id in enumerate(sample_song_ids) if id in test_song_ids])
    
    if len(test_sample_idx) == 0:
        print(f"No matching samples found for test songs in fold {fold+1}")
        continue
        
    X_test_sample = sample_embeddings[test_sample_idx]
    test_sample_song_ids = np.array(sample_song_ids)[test_sample_idx]
    
    # Scale test data
    X_test_scaled = scaler.transform(X_test_sample)
    
    # Predict
    y_pred_sample = model.predict(X_test_scaled)
    
    # Create mapping from song ID to prediction
    id_to_pred = dict(zip(test_sample_song_ids, y_pred_sample))
    
    # Filter human ratings to only include test songs
    fold_web_df = web_df[web_df['song'].isin(test_sample_song_ids)].copy()
    
    # Add model predictions
    fold_web_df['model_pred'] = fold_web_df['song'].map(id_to_pred)
    fold_web_df = fold_web_df.dropna(subset=['model_pred'])
    
    # Calculate metrics for each policy
    for policy in ['generous', 'random', 'strict']:
        cm = confusion_matrix(fold_web_df['model_pred'], fold_web_df[policy])
        f1 = f1_score(fold_web_df['model_pred'], fold_web_df[policy], average='macro')
        
        results[policy].append({
            'fold': fold + 1,
            'confusion_matrix': cm,
            'f1_score': f1
        })
        
        print(f"Fold {fold+1} - {policy.capitalize()} Policy:")
        print(f"  F1 Score: {f1:.4f}")
        print(f"  Confusion Matrix:\n{cm}")

# Calculate average results across folds
for policy in results:
    f1_scores = [r['f1_score'] for r in results[policy]]
    avg_f1 = np.mean(f1_scores)
    std_f1 = np.std(f1_scores)
    
    print(f"\n{policy.capitalize()} Policy - Cross-validation results:")
    print(f"  Average F1 Score: {avg_f1:.4f} ± {std_f1:.4f}")

/home/DAVIDSON/dutuller/Workspace/DRI1/MusicGen


  self.shell.db['dhist'] = compress_dhist(dhist)[-100:]



Processing file: embeddings/NHS/Dance/s15-t15/last_embeddings.h5

Processed embeddings/NHS/Dance/s15-t15/last_embeddings.h5:
  embeddings/NHS/Dance: 408 samples

Processing file: embeddings/NHS/Love/s15-t15/last_embeddings.h5

Processed embeddings/NHS/Love/s15-t15/last_embeddings.h5:
  embeddings/NHS/Love: 365 samples

Processing file: embeddings/NHS/Lullaby/s15-t15/last_embeddings.h5

Processed embeddings/NHS/Lullaby/s15-t15/last_embeddings.h5:
  embeddings/NHS/Lullaby: 158 samples

Processing file: embeddings/NHS/Healing/s15-t15/last_embeddings.h5

Processed embeddings/NHS/Healing/s15-t15/last_embeddings.h5:
  embeddings/NHS/Healing: 527 samples

Processing file: embeddings/NHS/samples/wav/last_embeddings.h5

Processed embeddings/NHS/samples/wav/last_embeddings.h5:
  embeddings/NHS/samples: 118 samples
Processing fold 1/5
Fold 1 - Generous Policy:
  F1 Score: 0.1920
  Confusion Matrix:
[[1507 1723 1176 1181]
 [3602 3326 3589 2786]
 [1863 1242  954  339]
 [   0    0    0    0]]
Fold 