In [1]:
import pandas as pd
import numpy as np
import random
from sklearn.preprocessing import MultiLabelBinarizer
from sklearn.metrics import f1_score

In [2]:
# Load your dataset
df = pd.read_csv('../Datasets/pokedex_final.csv')  # Update this path if needed

# Combine type1 and type2 into a list
df['type_list'] = df[['type1', 'type2']].values.tolist()
df['type_list'] = df['type_list'].apply(lambda x: [t for t in x if pd.notna(t) and t != ''])

# Get list of all 18 types from the dataset
all_types = sorted(list({t for types in df['type_list'] for t in types}))

# Binarize the true labels
mlb = MultiLabelBinarizer(classes=all_types)
y_true = mlb.fit_transform(df['type_list'])


In [3]:
# Function to randomly assign 1 or 2 types
def random_types():
    num_types = random.choice([1, 2])  # mono or dual type
    return random.sample(all_types, num_types)

# Generate random predictions
df['random_pred'] = [random_types() for _ in range(len(df))]

# Binarize the random predictions
y_pred = mlb.transform(df['random_pred'])

In [4]:
# Exact Match Accuracy
correct = sum([set(np.where(p == 1)[0]) == set(np.where(t == 1)[0]) for p, t in zip(y_pred, y_true)])
accuracy = correct / len(y_true)
print(f"Exact Match Accuracy: {accuracy:.4f}")

# Macro F1 Score
f1 = f1_score(y_true, y_pred, average='macro')
print(f"Macro F1 Score: {f1:.4f}")

# Top-2 Accuracy
# Since predictions are randomly assigned, we need to fake "probabilities" for Top-2
# Here, we'll just treat the random picks as the Top-2 guesses

correct_top2 = 0
for true_labels, pred_labels in zip(y_true, y_pred):
    true_types = np.where(true_labels == 1)[0]
    pred_types = np.where(pred_labels == 1)[0]
    if any(t in pred_types for t in true_types):
        correct_top2 += 1

top2_accuracy = correct_top2 / len(y_true)
print(f"Top-2 Accuracy: {top2_accuracy:.4f}")

Exact Match Accuracy: 0.0146
Macro F1 Score: 0.0819
Top-2 Accuracy: 0.1268
