In [3]:
# Notebook 5: Model Deployment
import torch
import numpy as np
from transformers import AutoTokenizer, AutoModelForSequenceClassification

# Setup device
if torch.backends.mps.is_available():
    device = torch.device("mps")
elif torch.cuda.is_available():
    device = torch.device("cuda")
else:
    device = torch.device("cpu")

print(f"Using device: {device}")

# Load trained model
model = AutoModelForSequenceClassification.from_pretrained('./final_model')
tokenizer = AutoTokenizer.from_pretrained('./final_model')
label_classes = np.load('label_classes.npy', allow_pickle=True)

model = model.to(device)
model.eval()
print("Model loaded successfully!")

# Prediction function
def predict_theme(text):
    tokens = tokenizer(
        text,
        truncation=True,
        padding='max_length',
        max_length=512,
        return_tensors='pt'
    )
    tokens = {k: v.to(device) for k, v in tokens.items()}
    
    with torch.no_grad():
        outputs = model(**tokens)
        probs = torch.softmax(outputs.logits, dim=-1)
        prediction = torch.argmax(outputs.logits, dim=-1).cpu().item()
        confidence = torch.max(probs, dim=-1)[0].cpu().item()
        
        # Get top 3 predictions
        top_probs, top_indices = torch.topk(probs, 3)
        top_themes = [
            (label_classes[idx.item()], prob.item()) 
            for idx, prob in zip(top_indices[0], top_probs[0])
        ]
    
    return label_classes[prediction], confidence, top_themes

# Test examples (including multilingual)
test_examples = [
    "Album: Songs of Heritage | Artist: Indigenous Artist | Genre: Folk | Description: Traditional songs celebrating cultural identity and ancestral connections",
    "Album: Corazón Roto | Artist: Latino Singer | Genre: Latin Pop | Description: Canciones románticas sobre el amor perdido y la esperanza",
    "Album: Digital Soundscapes | Artist: Electronic Producer | Genre: Electronic | Description: Experimental ambient music exploring technology and consciousness",
    "Album: 愛の歌 | Artist: J-Pop Artist | Genre: J-Pop | Description: Songs about love and relationships in modern Tokyo",
    "Album: Street Voices | Artist: Hip-Hop Artist | Genre: Hip-Hop | Description: Powerful lyrics about social justice and community empowerment",
    "Album: Chanson d'Amour | Artist: French Singer | Genre: Chanson | Description: Poésie musicale sur l'amour et la mélancolie parisienne"
]

print("\nTest Predictions (Multilingual):")
for i, text in enumerate(test_examples, 1):
    theme, conf, top_3 = predict_theme(text)
    print(f"\n{i}. {text[:60]}...")
    print(f"   Predicted: {theme} (confidence: {conf:.3f})")
    print(f"   Top 3: {', '.join([f'{t}({p:.2f})' for t, p in top_3])}")



Using device: mps
Model loaded successfully!

Test Predictions (Multilingual):

1. Album: Songs of Heritage | Artist: Indigenous Artist | Genre...
   Predicted: Identity & Heritage (confidence: 0.699)
   Top 3: Identity & Heritage(0.70), Place & Landscape(0.17), Introspection & Philosophy(0.04)

2. Album: Corazón Roto | Artist: Latino Singer | Genre: Latin P...
   Predicted: Love & Relationships (confidence: 0.709)
   Top 3: Love & Relationships(0.71), Introspection & Philosophy(0.18), Experimental & Abstract(0.04)

3. Album: Digital Soundscapes | Artist: Electronic Producer | G...
   Predicted: Experimental & Abstract (confidence: 0.506)
   Top 3: Experimental & Abstract(0.51), Introspection & Philosophy(0.19), Love & Relationships(0.11)

4. Album: 愛の歌 | Artist: J-Pop Artist | Genre: J-Pop | Descripti...
   Predicted: Love & Relationships (confidence: 0.724)
   Top 3: Love & Relationships(0.72), Introspection & Philosophy(0.17), Experimental & Abstract(0.04)

5. Album: Street Voices |

In [4]:
# Test 2025 Polaris Prize Data
import pandas as pd
import numpy as np
import torch
from transformers import AutoTokenizer, AutoModelForSequenceClassification

print("=== Testing 2025 Polaris Prize Data ===")

# Load 2025 test data
test_df = pd.read_csv('../polaris_2025_test_set.csv')
print(f"Loaded {len(test_df)} albums from 2025")

# Setup device
if torch.backends.mps.is_available():
    device = torch.device("mps")
elif torch.cuda.is_available():
    device = torch.device("cuda")
else:
    device = torch.device("cpu")

print(f"Using device: {device}")

# Load trained model
model = AutoModelForSequenceClassification.from_pretrained('./final_model')
tokenizer = AutoTokenizer.from_pretrained('./final_model')
label_classes = np.load('label_classes.npy', allow_pickle=True)

model = model.to(device)
model.eval()
print("Model loaded successfully!")

# Create text features
def create_text_features(row):
    parts = []
    if pd.notna(row.get('album', '')):
        parts.append(f"Album: {row['album']}")
    if pd.notna(row.get('artist', '')):
        parts.append(f"Artist: {row['artist']}")
    if pd.notna(row.get('genre', '')):
        parts.append(f"Genre: {row['genre']}")
    if pd.notna(row.get('description', '')) and str(row['description']) != 'nan':
        parts.append(f"Description: {row['description']}")
    return " | ".join(parts)

test_df['text'] = test_df.apply(create_text_features, axis=1)
print(f"Created text features for {len(test_df)} albums")

# Prediction function
def predict_theme(text):
    tokens = tokenizer(
        text,
        truncation=True,
        padding='max_length',
        max_length=512,
        return_tensors='pt'
    )
    tokens = {k: v.to(device) for k, v in tokens.items()}
    
    with torch.no_grad():
        outputs = model(**tokens)
        probs = torch.softmax(outputs.logits, dim=-1)
        prediction = torch.argmax(outputs.logits, dim=-1).cpu().item()
        confidence = torch.max(probs, dim=-1)[0].cpu().item()
    
    return label_classes[prediction], confidence

# Make predictions
print("Making predictions...")
predictions = []
confidences = []

for idx, row in test_df.iterrows():
    theme, conf = predict_theme(row['text'])
    predictions.append(theme)
    confidences.append(conf)

test_df['predicted_theme'] = predictions
test_df['confidence'] = confidences

print(f"Made predictions for all {len(test_df)} albums")

# Show results
print("\n=== Results Summary ===")
theme_distribution = test_df['predicted_theme'].value_counts()
print("Predicted themes:")
for theme, count in theme_distribution.items():
    pct = (count / len(test_df)) * 100
    print(f"  {theme}: {count} albums ({pct:.1f}%)")

print(f"\nAverage confidence: {test_df['confidence'].mean():.3f}")

# Show sample predictions
print("\n=== Sample Predictions ===")
for i in range(min(5, len(test_df))):
    row = test_df.iloc[i]
    print(f"{i+1}. '{row['album']}' by {row.get('artist', 'Unknown')}")
    print(f"   Predicted: {row['predicted_theme']} (confidence: {row['confidence']:.3f})")

# Save results
test_df.to_csv('polaris_2025_predictions.csv', index=False)
print(f"\nSaved predictions to polaris_2025_predictions.csv")

print("\n2025 Polaris Prize prediction complete!")

=== Testing 2025 Polaris Prize Data ===
Loaded 40 albums from 2025
Using device: mps
Model loaded successfully!
Created text features for 40 albums
Making predictions...
Made predictions for all 40 albums

=== Results Summary ===
Predicted themes:
  Experimental & Abstract: 10 albums (25.0%)
  Love & Relationships: 9 albums (22.5%)
  Identity & Heritage: 7 albums (17.5%)
  Introspection & Philosophy: 7 albums (17.5%)
  Social Commentary: 5 albums (12.5%)
  Place & Landscape: 2 albums (5.0%)

Average confidence: 0.541

=== Sample Predictions ===
1. 'Serene Demon' by Art d'Ecco
   Predicted: Love & Relationships (confidence: 0.371)
2. 'Only Dust Remains' by Backxwash
   Predicted: Social Commentary (confidence: 0.731)
3. 'CODE NOIR' by Quinton Barnes
   Predicted: Identity & Heritage (confidence: 0.618)
4. 'Feu de garde' by Bibi Club
   Predicted: Love & Relationships (confidence: 0.350)
5. 'Basia's Palace' by Basia Bulat
   Predicted: Experimental & Abstract (confidence: 0.639)

Saved p