In [3]:
import pandas as pd
import glob
import os
import json
import zipfile
from sentence_transformers import CrossEncoder, SentenceTransformer
from huggingface_hub import snapshot_download # This helps us show download bars
import torch

# --- 1. HARDWARE SETUP ---
if torch.backends.mps.is_available():
    device = 'mps'
    print("üöÄ POWER UNLEASHED: Using Apple M4 (MPS) Acceleration")
else:
    device = 'cpu'
    print("‚ö†Ô∏è WARNING: Running on CPU.")

# --- 2. FIND FILES ---
print("\nüîç Scanning for Test Data...")
jsonl_files = glob.glob("*.jsonl") + glob.glob("data/*.jsonl") + glob.glob("SemEval_Task4/*.jsonl")
input_a, input_b = None, None

for f in jsonl_files:
    try:
        count = sum(1 for line in open(f))
        if count == 400: input_a = f
        elif count == 849: input_b = f
    except: pass

if not input_a or not input_b:
    input_a, input_b = 'test_track_a.jsonl', 'test_track_b.jsonl'
    print("‚ö†Ô∏è Using manual filenames (Auto-detection failed)")
else:
    print(f"   ‚úÖ Track A File: {input_a}")
    print(f"   ‚úÖ Track B File: {input_b}")

# --- 3. DOWNLOAD MODELS WITH PROGRESS BARS ---
print("\n‚¨áÔ∏è STARTING DOWNLOADS (This ensures you see progress)...")

# Model 1: DeBERTa v3 Large
print("   1. Downloading DeBERTa-v3-Large (~800MB)...")
model_a_id = 'cross-encoder/nli-deberta-v3-large'
snapshot_download(repo_id=model_a_id) # This triggers the bar

# Model 2: GTE Large
print("   2. Downloading GTE-Large-v1.5 (~1.5GB)...")
model_b_id = 'Alibaba-NLP/gte-large-en-v1.5'
snapshot_download(repo_id=model_b_id) # This triggers the bar

print("\n‚úÖ Downloads Complete. Loading into Memory...")
model_a = CrossEncoder(model_a_id, device=device)
model_b = SentenceTransformer(model_b_id, trust_remote_code=True, device=device)

# --- 4. EXECUTE TRACK A ---
print(f"\nüß† SCORING TRACK A ({input_a})...")
df_a = pd.read_json(input_a, lines=True)

# Column detection
anc_col = next((c for c in ['anchor_text', 'anchor'] if c in df_a.columns), 'anchor')
a_col = next((c for c in ['text_a', 'a'] if c in df_a.columns), 'a')
b_col = next((c for c in ['text_b', 'b'] if c in df_a.columns), 'b')

# Create pairs
pairs_a = df_a[[anc_col, a_col]].values.tolist()
pairs_b = df_a[[anc_col, b_col]].values.tolist()

# INFERENCE (Progress bar included)
scores_a = model_a.predict(pairs_a, batch_size=4, show_progress_bar=True)
scores_b = model_a.predict(pairs_b, batch_size=4, show_progress_bar=True)
preds_a = scores_a > scores_b

# --- 5. EXECUTE TRACK B ---
print(f"\nüß† EMBEDDING TRACK B ({input_b})...")
df_b = pd.read_json(input_b, lines=True)
text_col = next((c for c in ['text', 'story', 'anchor', 'anchor_text'] if c in df_b.columns), None)

if text_col:
    # INFERENCE (Progress bar included)
    embeddings = model_b.encode(
        df_b[text_col].tolist(), 
        batch_size=4, 
        show_progress_bar=True, 
        device=device,
        convert_to_numpy=True
    )
    embeddings_list = embeddings.tolist()
else:
    embeddings_list = []

# --- 6. ZIP IT UP ---
print("\nüì¶ Zipping Final Submission...")
os.makedirs('outputs', exist_ok=True)

with open('outputs/track_a.jsonl', 'w') as f:
    for val in preds_a:
        json.dump({"text_a_is_closer": bool(val)}, f)
        f.write('\n')

with open('outputs/track_b.jsonl', 'w') as f:
    for emb in embeddings_list:
        json.dump({"embedding": emb}, f)
        f.write('\n')

zip_name = 'submission_SOTA_FINAL.zip'
with zipfile.ZipFile(zip_name, 'w', zipfile.ZIP_DEFLATED) as zipf:
    zipf.write('outputs/track_a.jsonl', arcname='track_a.jsonl')
    zipf.write('outputs/track_b.jsonl', arcname='track_b.jsonl')

print(f"\nüèÜ READY! Upload '{zip_name}' to CodaBench Testing Phase.")

üöÄ POWER UNLEASHED: Using Apple M4 (MPS) Acceleration

üîç Scanning for Test Data...
   ‚úÖ Track A File: test_track_a.jsonl
   ‚úÖ Track B File: test_track_b.jsonl

‚¨áÔ∏è STARTING DOWNLOADS (This ensures you see progress)...
   1. Downloading DeBERTa-v3-Large (~800MB)...


Fetching 17 files:  29%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñç               | 5/17 [26:16<1:03:03, 315.32s/it]


KeyboardInterrupt: 

In [4]:
import pandas as pd
import glob
import os
import json
import zipfile
from sentence_transformers import CrossEncoder, SentenceTransformer
import torch

# --- 1. HARDWARE SETUP ---
if torch.backends.mps.is_available():
    device = 'mps'
    print("üöÄ Using Apple M4 (MPS) - Ready for SOTA Models")
else:
    device = 'cpu'
    print("‚ö†Ô∏è Using CPU (Slow)")

# --- 2. FIND TEST FILES ---
print("üîç Scanning for Test Data...")
# We look for files with the correct line counts (400 and 849)
jsonl_files = glob.glob("*.jsonl") + glob.glob("data/*.jsonl") + glob.glob("SemEval_Task4/*.jsonl")
input_a, input_b = None, None

for f in jsonl_files:
    try:
        count = sum(1 for line in open(f))
        if count == 400: input_a = f
        elif count == 849: input_b = f
    except: pass

# Fallback if auto-detection fails
if not input_a or not input_b: 
    input_a, input_b = 'test_track_a.jsonl', 'test_track_b.jsonl'

print(f"   Track A File: {input_a}\n   Track B File: {input_b}")

# --- 3. LOAD MODELS FROM LOCAL FOLDERS ---
print("\nüìÇ Loading Models from your local download...")

# These match the folder names from the terminal command
path_a = './nli-deberta-v3-large'
path_b = './gte-large-en-v1.5'

# Load Model A (DeBERTa)
if os.path.exists(path_a):
    print("   ‚úÖ Found local DeBERTa model! Loading...")
    model_a = CrossEncoder(path_a, device=device)
else:
    print(f"   ‚ùå Error: Could not find folder '{path_a}'. Check where you ran the terminal command.")

# Load Model B (GTE)
if os.path.exists(path_b):
    print("   ‚úÖ Found local GTE model! Loading...")
    model_b = SentenceTransformer(path_b, trust_remote_code=True, device=device)
else:
    print(f"   ‚ùå Error: Could not find folder '{path_b}'.")

# --- 4. RUN TRACK A (SCORING) ---
print(f"\nüß† Scoring Track A (DeBERTa)...")
df_a = pd.read_json(input_a, lines=True)

# Column detection
anc_col = next((c for c in ['anchor_text', 'anchor'] if c in df_a.columns), 'anchor')
a_col = next((c for c in ['text_a', 'a'] if c in df_a.columns), 'a')
b_col = next((c for c in ['text_b', 'b'] if c in df_a.columns), 'b')

pairs_a = df_a[[anc_col, a_col]].values.tolist()
pairs_b = df_a[[anc_col, b_col]].values.tolist()

# Batch size 8 is safe for M4 with these local models
scores_a = model_a.predict(pairs_a, batch_size=8, show_progress_bar=True)
scores_b = model_a.predict(pairs_b, batch_size=8, show_progress_bar=True)
preds_a = scores_a > scores_b

# --- 5. RUN TRACK B (EMBEDDING) ---
print(f"\nüß† Embedding Track B (GTE)...")
df_b = pd.read_json(input_b, lines=True)
text_col = next((c for c in ['text', 'story', 'anchor', 'anchor_text'] if c in df_b.columns), None)

if text_col:
    embeddings = model_b.encode(
        df_b[text_col].tolist(), 
        batch_size=4, # GTE Large is big, keep batch small
        show_progress_bar=True, 
        device=device,
        convert_to_numpy=True
    )
    embeddings_list = embeddings.tolist()
else:
    embeddings_list = []

# --- 6. ZIP AND FINISH ---
print("\nüì¶ Zipping Submission...")
os.makedirs('outputs', exist_ok=True)

with open('outputs/track_a.jsonl', 'w') as f:
    for val in preds_a:
        json.dump({"text_a_is_closer": bool(val)}, f)
        f.write('\n')

with open('outputs/track_b.jsonl', 'w') as f:
    for emb in embeddings_list:
        json.dump({"embedding": emb}, f)
        f.write('\n')

zip_name = 'submission_SOTA_LOCAL.zip'
with zipfile.ZipFile(zip_name, 'w', zipfile.ZIP_DEFLATED) as zipf:
    zipf.write('outputs/track_a.jsonl', arcname='track_a.jsonl')
    zipf.write('outputs/track_b.jsonl', arcname='track_b.jsonl')

print(f"\nüèÜ READY! Upload '{zip_name}' to CodaBench Testing Phase.")

üöÄ Using Apple M4 (MPS) - Ready for SOTA Models
üîç Scanning for Test Data...
   Track A File: test_track_a.jsonl
   Track B File: test_track_b.jsonl

üìÇ Loading Models from your local download...
   ‚úÖ Found local DeBERTa model! Loading...
   ‚úÖ Found local GTE model! Loading...


A new version of the following files was downloaded from https://huggingface.co/Alibaba-NLP/new-impl:
- configuration.py
. Make sure to double-check they do not contain any added malicious code. To avoid downloading new versions of the code file, you can pin a revision.
A new version of the following files was downloaded from https://huggingface.co/Alibaba-NLP/new-impl:
- modeling.py
. Make sure to double-check they do not contain any added malicious code. To avoid downloading new versions of the code file, you can pin a revision.



üß† Scoring Track A (DeBERTa)...


Batches: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 50/50 [03:17<00:00,  3.94s/it]
Batches: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 50/50 [03:59<00:00,  4.79s/it]



üß† Embedding Track B (GTE)...


Batches: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 213/213 [01:24<00:00,  2.53it/s]



üì¶ Zipping Submission...


ValueError: The truth value of an array with more than one element is ambiguous. Use a.any() or a.all()

In [5]:
import numpy as np
import json
import zipfile
import os

print("üõ†Ô∏è Applying Fix for NLI Model Output...")

# 1. FIX THE SCORES
# The model output shape is likely (400, 3). We want column 1 (Entailment).
# We check the shape to be safe.
if len(scores_a.shape) > 1 and scores_a.shape[1] >= 2:
    print(f"   - Detected multi-column scores {scores_a.shape}. Extracting 'Entailment' (Index 1)...")
    final_scores_a = scores_a[:, 1]
    final_scores_b = scores_b[:, 1]
else:
    # Fallback if it was already 1D
    final_scores_a = scores_a
    final_scores_b = scores_b

# 2. RE-CALCULATE PREDICTIONS
# Now we compare single numbers, so we get a clean True/False list
preds_a = final_scores_a > final_scores_b
print(f"   - Re-calculated {len(preds_a)} predictions.")

# 3. SAVE & ZIP (Standard Routine)
print("üì¶ Zipping Final Submission...")
os.makedirs('outputs', exist_ok=True)

with open('outputs/track_a.jsonl', 'w') as f:
    for val in preds_a:
        # This will now work because 'val' is a simple Python boolean
        json.dump({"text_a_is_closer": bool(val)}, f)
        f.write('\n')

# We re-save Track B just to be sure (it was already fine, but good to keep in sync)
with open('outputs/track_b.jsonl', 'w') as f:
    for emb in embeddings_list:
        json.dump({"embedding": emb}, f)
        f.write('\n')

zip_name = 'submission_SOTA_FIXED.zip'
with zipfile.ZipFile(zip_name, 'w', zipfile.ZIP_DEFLATED) as zipf:
    zipf.write('outputs/track_a.jsonl', arcname='track_a.jsonl')
    zipf.write('outputs/track_b.jsonl', arcname='track_b.jsonl')

print(f"\nüèÜ SUCCESS! Upload '{zip_name}' to CodaBench Testing Phase.")

üõ†Ô∏è Applying Fix for NLI Model Output...
   - Detected multi-column scores (400, 3). Extracting 'Entailment' (Index 1)...
   - Re-calculated 400 predictions.
üì¶ Zipping Final Submission...

üèÜ SUCCESS! Upload 'submission_SOTA_FIXED.zip' to CodaBench Testing Phase.


In [8]:
import json
import zipfile
import os

print("üîç FINAL VERIFICATION PROTOCOL INITIATED...\n")

# 1. Verify Track A
print("Checking Track A (outputs/track_a.jsonl)...")
try:
    with open('outputs/track_a.jsonl', 'r') as f:
        lines = f.readlines()
        count_a = len(lines)
        first_line = json.loads(lines[0])
        
        # Check 1: Key Name
        if "text_a_is_closer" in first_line:
            print(f"   ‚úÖ Key 'text_a_is_closer' found.")
        else:
            print(f"   ‚ùå CRITICAL: Wrong key in Track A. Found: {first_line.keys()}")
            
        # Check 2: Value Type
        val = first_line["text_a_is_closer"]
        if isinstance(val, bool):
            print(f"   ‚úÖ Value type is BOOLEAN ({val}).")
        else:
            print(f"   ‚ùå CRITICAL: Wrong type. Expected bool, got {type(val)}.")
            
        # Check 3: Count
        if count_a == 400:
            print(f"   ‚úÖ Line count is exactly 400.")
        else:
            print(f"   ‚ö†Ô∏è WARNING: Line count is {count_a} (Expected 400).")

except Exception as e:
    print(f"   ‚ùå Error reading Track A: {e}")

# 2. Verify Track B
print("\nChecking Track B (outputs/track_b.jsonl)...")
try:
    with open('outputs/track_b.jsonl', 'r') as f:
        lines = f.readlines()
        count_b = len(lines)
        first_line = json.loads(lines[0])
        
        # Check 1: Key Name
        if "embedding" in first_line:
            print(f"   ‚úÖ Key 'embedding' (singular) found.")
        else:
            print(f"   ‚ùå CRITICAL: Wrong key. Found: {first_line.keys()}")
            
        # Check 2: Value Type & Shape
        emb = first_line["embedding"]
        if isinstance(emb, list) and len(emb) > 10:
            print(f"   ‚úÖ Value is a LIST of floats (Length: {len(emb)}).")
        else:
            print(f"   ‚ùå CRITICAL: Invalid embedding format.")
            
        # Check 3: Count
        if count_b == 849:
            print(f"   ‚úÖ Line count is exactly 849.")
        else:
            print(f"   ‚ö†Ô∏è WARNING: Line count is {count_b} (Expected ~849).")

except Exception as e:
    print(f"   ‚ùå Error reading Track B: {e}")

# 3. Verify Zip File
print("\nChecking Zip Archive (submission_SOTA_FIXED.zip)...")
try:
    with zipfile.ZipFile('submission_BGE.zip', 'r') as z:
        files = z.namelist()
        if 'track_a.jsonl' in files and 'track_b.jsonl' in files:
            print(f"   ‚úÖ Zip contains correct files: {files}")
        else:
            print(f"   ‚ùå CRITICAL: Zip is missing files. Found: {files}")
except Exception as e:
    print(f"   ‚ùå Error checking Zip: {e}")

print("\nüöÄ VERIFICATION COMPLETE. If all ticks are Green, you are safe to upload.")

üîç FINAL VERIFICATION PROTOCOL INITIATED...

Checking Track A (outputs/track_a.jsonl)...
   ‚úÖ Key 'text_a_is_closer' found.
   ‚úÖ Value type is BOOLEAN (False).
   ‚úÖ Line count is exactly 400.

Checking Track B (outputs/track_b.jsonl)...
   ‚úÖ Key 'embedding' (singular) found.
   ‚úÖ Value is a LIST of floats (Length: 1024).
   ‚úÖ Line count is exactly 849.

Checking Zip Archive (submission_SOTA_FIXED.zip)...
   ‚úÖ Zip contains correct files: ['track_a.jsonl', 'track_b.jsonl']

üöÄ VERIFICATION COMPLETE. If all ticks are Green, you are safe to upload.


In [7]:
import pandas as pd
import glob
import os
import json
import zipfile
import torch
from sentence_transformers import SentenceTransformer
from transformers import AutoModelForSequenceClassification, AutoTokenizer

# --- 1. HARDWARE ---
if torch.backends.mps.is_available():
    device = 'mps'
    print("üöÄ Using Apple M4 (MPS) - BGE Edition")
else:
    device = 'cpu'

# --- 2. FIND TEST FILES ---
print("üîç Finding Test Data...")
jsonl_files = glob.glob("*.jsonl") + glob.glob("data/*.jsonl") + glob.glob("SemEval_Task4/*.jsonl")
input_a, input_b = None, None
for f in jsonl_files:
    try:
        count = sum(1 for line in open(f))
        if count == 400: input_a = f
        elif count == 849: input_b = f
    except: pass
if not input_a or not input_b: input_a, input_b = 'test_track_a.jsonl', 'test_track_b.jsonl'
print(f"   Track A: {input_a}\n   Track B: {input_b}")

# --- 3. LOAD LOCAL BGE MODELS ---
print("\nüìÇ Loading BGE Models...")

# Path A: Reranker
path_a = './bge-reranker-large'
if os.path.exists(path_a):
    print("   ‚úÖ Loading Reranker (Track A)...")
    # Rerankers are loaded slightly differently than CrossEncoders
    tokenizer_a = AutoTokenizer.from_pretrained(path_a)
    model_a = AutoModelForSequenceClassification.from_pretrained(path_a).to(device)
    model_a.eval()
else:
    print(f"   ‚ùå Error: '{path_a}' not found. Did you run the terminal command?")

# Path B: Embedding
path_b = './bge-large-en-v1.5'
if os.path.exists(path_b):
    print("   ‚úÖ Loading Embedder (Track B)...")
    model_b = SentenceTransformer(path_b, device=device)
else:
    print(f"   ‚ùå Error: '{path_b}' not found.")

# --- 4. RUN TRACK A (RERANKING) ---
print(f"\nüß† Scoring Track A (BGE Reranker)...")
df_a = pd.read_json(input_a, lines=True)

anc_col = next((c for c in ['anchor_text', 'anchor'] if c in df_a.columns), 'anchor')
a_col = next((c for c in ['text_a', 'a'] if c in df_a.columns), 'a')
b_col = next((c for c in ['text_b', 'b'] if c in df_a.columns), 'b')

# Reranker expects simple pairs: [Anchor, A] and [Anchor, B]
pairs_a = df_a[[anc_col, a_col]].values.tolist()
pairs_b = df_a[[anc_col, b_col]].values.tolist()

# Helper function for Reranker Inference
def predict_reranker(pairs, model, tokenizer, batch_size=8):
    scores = []
    # Process in chunks
    for i in range(0, len(pairs), batch_size):
        batch = pairs[i:i+batch_size]
        # Tokenize
        inputs = tokenizer(batch, padding=True, truncation=True, return_tensors='pt', max_length=512).to(device)
        with torch.no_grad():
            # Get logits (score)
            output = model(**inputs).logits.view(-1).float()
            scores.extend(output.cpu().numpy())
    return scores

print("   - Calculating scores...")
scores_a = predict_reranker(pairs_a, model_a, tokenizer_a)
scores_b = predict_reranker(pairs_b, model_a, tokenizer_a)

# Logic: Higher score = Better match
preds_a = [s_a > s_b for s_a, s_b in zip(scores_a, scores_b)]

# --- 5. RUN TRACK B (EMBEDDING) ---
print(f"\nüß† Embedding Track B (BGE Large)...")
df_b = pd.read_json(input_b, lines=True)
text_col = next((c for c in ['text', 'story', 'anchor', 'anchor_text'] if c in df_b.columns), None)

if text_col:
    # BGE works best with a prompt for asymmetric tasks, but for symmetric story similarity
    # we usually keep it raw. However, adding "Represent this story:" can sometimes help.
    # Let's stick to raw for safety unless specified.
    embeddings = model_b.encode(
        df_b[text_col].tolist(), 
        batch_size=8, 
        show_progress_bar=True, 
        device=device,
        normalize_embeddings=True # BGE requires normalized embeddings
    )
    embeddings_list = embeddings.tolist()
else:
    embeddings_list = []

# --- 6. SAVE & ZIP ---
print("\nüì¶ Zipping BGE Submission...")
os.makedirs('outputs', exist_ok=True)

with open('outputs/track_a.jsonl', 'w') as f:
    for val in preds_a:
        json.dump({"text_a_is_closer": bool(val)}, f)
        f.write('\n')

with open('outputs/track_b.jsonl', 'w') as f:
    for emb in embeddings_list:
        json.dump({"embedding": emb}, f)
        f.write('\n')

zip_name = 'submission_BGE.zip'
with zipfile.ZipFile(zip_name, 'w', zipfile.ZIP_DEFLATED) as zipf:
    zipf.write('outputs/track_a.jsonl', arcname='track_a.jsonl')
    zipf.write('outputs/track_b.jsonl', arcname='track_b.jsonl')

print(f"\nüèÜ READY! Upload '{zip_name}' to CodaBench.")

üöÄ Using Apple M4 (MPS) - BGE Edition
üîç Finding Test Data...
   Track A: test_track_a.jsonl
   Track B: test_track_b.jsonl

üìÇ Loading BGE Models...
   ‚úÖ Loading Reranker (Track A)...
   ‚úÖ Loading Embedder (Track B)...

üß† Scoring Track A (BGE Reranker)...
   - Calculating scores...

üß† Embedding Track B (BGE Large)...


Batches: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 107/107 [01:04<00:00,  1.65it/s]



üì¶ Zipping BGE Submission...

üèÜ READY! Upload 'submission_BGE.zip' to CodaBench.


In [9]:
import json
import zipfile
import os

zip_filename = 'submission_BGE.zip'

print(f"üîç VERIFYING: {zip_filename} ...\n")

if not os.path.exists(zip_filename):
    print(f"‚ùå CRITICAL ERROR: File '{zip_filename}' not found!")
else:
    try:
        with zipfile.ZipFile(zip_filename, 'r') as z:
            files = z.namelist()
            
            # --- CHECK 1: FILE STRUCTURE ---
            if 'track_a.jsonl' in files and 'track_b.jsonl' in files:
                print(f"   ‚úÖ ZIP Structure: OK (Found both jsonl files)")
            else:
                print(f"   ‚ùå ZIP ERROR: Missing files. Found: {files}")

            # --- CHECK 2: TRACK A CONTENT ---
            with z.open('track_a.jsonl') as f:
                lines = f.readlines()
                count = len(lines)
                first = json.loads(lines[0])
                
                # Check Count
                if count == 400:
                    print(f"   ‚úÖ Track A Count: OK (400 items)")
                else:
                    print(f"   ‚ö†Ô∏è Track A Count: WARNING ({count} items - Expected 400)")

                # Check Key & Type
                if "text_a_is_closer" in first and isinstance(first["text_a_is_closer"], bool):
                    print(f"   ‚úÖ Track A Format: OK (Key 'text_a_is_closer' is Boolean)")
                else:
                    print(f"   ‚ùå Track A ERROR: Invalid JSON format: {first}")

            # --- CHECK 3: TRACK B CONTENT ---
            with z.open('track_b.jsonl') as f:
                lines = f.readlines()
                count = len(lines)
                first = json.loads(lines[0])
                
                # Check Count
                if count == 849:
                    print(f"   ‚úÖ Track B Count: OK (849 items)")
                else:
                    print(f"   ‚ö†Ô∏è Track B Count: WARNING ({count} items - Expected 849)")

                # Check Key (The most common error)
                if "embedding" in first:
                    print(f"   ‚úÖ Track B Key: OK (Found singular 'embedding')")
                else:
                    print(f"   ‚ùå Track B ERROR: Key mismatch! Found: {list(first.keys())} (Expected 'embedding')")

                # Check Vector Size (BGE-Large should be 1024)
                vec_len = len(first["embedding"])
                if vec_len == 1024:
                    print(f"   ‚úÖ Track B Dimensions: OK (1024 for BGE-Large)")
                else:
                    print(f"   ‚ÑπÔ∏è Track B Dimensions: {vec_len} (Just FYI)")

        print("\nüöÄ STATUS: READY TO UPLOAD.")

    except Exception as e:
        print(f"\n‚ùå SCRIPT CRASHED: {e}")

üîç VERIFYING: submission_BGE.zip ...

   ‚úÖ ZIP Structure: OK (Found both jsonl files)
   ‚úÖ Track A Count: OK (400 items)
   ‚úÖ Track A Format: OK (Key 'text_a_is_closer' is Boolean)
   ‚úÖ Track B Count: OK (849 items)
   ‚úÖ Track B Key: OK (Found singular 'embedding')
   ‚úÖ Track B Dimensions: OK (1024 for BGE-Large)

üöÄ STATUS: READY TO UPLOAD.
