File này giúp thí sinh có cái nhìn tổng quan về cách tính điểm số trên leaderboard của BTC

In [24]:
import os
import numpy as np
import pandas as pd
import math
from sklearn.metrics import mean_squared_error, f1_score
from typing import Dict, List, Tuple, Optional, Any

ROOT = os.path.join(os.getcwd(), 'sample_data')

# ALGO Task
ALGO_TASK_DIR = os.path.join(ROOT, 'ALGO_Task')
ALGO_DATASET = os.path.join(ALGO_TASK_DIR, 'dataset.txt')
ALGO_PRED = os.path.join(ALGO_TASK_DIR, 'pred.csv')

# CV Task
CV_TASK_DIR = os.path.join(ROOT, 'CV_Task')
CV_GOLD = os.path.join(CV_TASK_DIR, 'gold.csv')
CV_PRED = os.path.join(CV_TASK_DIR, 'pred.csv')
CV_TEST = os.path.join(CV_TASK_DIR, 'test.csv')

# NLP Task
NLP_TASK_DIR = os.path.join(ROOT, 'NLP_Task')
NLP_GOLD = os.path.join(NLP_TASK_DIR, 'gold.csv')
NLP_PRED = os.path.join(NLP_TASK_DIR, 'pred.csv')
NLP_TEST = os.path.join(NLP_TASK_DIR, 'test.csv')

# CV Task

In [25]:

def cv_task_score(gold_file, pred_file):
    # Đọc dữ liệu từ file CSV
    gold_df = pd.read_csv(gold_file)
    pred_df = pd.read_csv(pred_file)
    
    # Merge hai dataframe dựa trên cột ID để đảm bảo khớp dữ liệu
    merged_df = pd.merge(gold_df, pred_df, on='ID', suffixes=('_gold', '_pred'))
    
    y_true = merged_df['Age_gold']
    y_pred = merged_df['Age_pred']
    
    # Tính RMSE
    mse = mean_squared_error(y_true, y_pred)
    rmse = np.sqrt(mse)
    
    return rmse

In [26]:
cv_score = cv_task_score(CV_GOLD, CV_PRED)
print(f"Điểm RMSE: {cv_score}")

Điểm RMSE: 2.0


# NLP Task

In [27]:
def nlp_task_score(gold_file, pred_file):
    # Đọc file, ép kiểu ID thành string
    gold_df = pd.read_csv(gold_file, dtype={'ID': str})
    pred_df = pd.read_csv(pred_file, dtype={'ID': str})
    
    # Merge để so khớp ID
    merged = pd.merge(gold_df, pred_df, on='ID', suffixes=('_gold', '_pred'))
    
    # Tính Macro F1
    valid_labels = ["anger", "disgust", "enjoyment", "fear", "sadness", "surprise", "other"]
    
    score = f1_score(
        y_true=merged['Emotion_gold'], 
        y_pred=merged['Emotion_pred'], 
        labels=valid_labels, 
        average='macro',
        zero_division=0
    )
    
    return score

In [28]:
nlp_score = nlp_task_score(NLP_GOLD, NLP_PRED)
print(f"NLP Score: {nlp_score:.5}")

NLP Score: 0.20952


# ALGO Task

In [29]:
def normalize_id(val: Any) -> str:
    v_str = str(val).strip()
    try:
        return str(int(float(v_str)))
    except ValueError:
        return v_str

class ProblemInstance:
    def __init__(self, test_id: int):
        self.test_id = test_id
        self.vehicles = {}
        self.orders = {}
        self.min_vehicle_price = float('inf')
    def add_vehicle(self, v_id, capacity, price, start_x, start_y):
        self.vehicles[normalize_id(v_id)] = {
            "capacity": capacity, "price": price, "start": (start_x, start_y)
        }
        if price < self.min_vehicle_price: self.min_vehicle_price = price
    def add_order(self, o_id, weight, p_x, p_y, d_x, d_y):
        self.orders[normalize_id(o_id)] = {
            "weight": weight, "pickup": (p_x, p_y), "delivery": (d_x, d_y)
        }
    def get_lower_bound(self) -> float:
        if not self.vehicles or not self.orders: return 0.0
        dist = sum(math.sqrt((o["pickup"][0]-o["delivery"][0])**2 + (o["pickup"][1]-o["delivery"][1])**2) 
                   for o in self.orders.values())
        return dist * self.min_vehicle_price

class InputParser:
    @staticmethod
    def parse(filepath: str) -> Dict[int, ProblemInstance]:
        if not os.path.exists(filepath): raise FileNotFoundError(f"Dataset not found: {filepath}")
        tests = {}
        current_test = None
        section = None
        with open(filepath, "r") as f:
            for line in f:
                line = line.strip()
                if not line: continue
                if line.startswith("Test "):
                    try: 
                        t_id = int(line.split(" ")[1])
                        current_test = ProblemInstance(t_id)
                        tests[t_id] = current_test
                        section = None
                    except: pass
                    continue
                if line == "Vehicle": section = "Vehicle"; continue
                if line == "Order": section = "Order"; continue
                if line.startswith("ID,"): continue
                
                if current_test:
                    p = line.split(",")
                    if section == "Vehicle" and len(p)>=5:
                        current_test.add_vehicle(p[0], float(p[1]), float(p[2]), float(p[3]), float(p[4]))
                    elif section == "Order" and len(p)>=6:
                        current_test.add_order(p[0], float(p[1]), float(p[2]), float(p[3]), float(p[4]), float(p[5]))
        return tests

class RouteValidator:
    @staticmethod
    def dist(p1, p2): return math.sqrt((p1[0]-p2[0])**2 + (p1[1]-p2[1])**2)
    @staticmethod
    def evaluate(instance, sub_df):
        df = sub_df[sub_df['Test'] == instance.test_id].copy()
        if df.empty: return float('inf')
        
        # Sort by Vehicle -> Stop_Order
        df['Stop_Order'] = pd.to_numeric(df['Stop_Order'], errors='coerce')
        if df['Stop_Order'].isnull().any(): return float('inf')
        df = df.sort_values(['Vehicle_ID', 'Stop_Order'])
        
        states = {oid: "PENDING" for oid in instance.orders}
        v_owner = {}
        total_cost = 0.0
        for v_id, route in df.groupby('Vehicle_ID'):
            if v_id not in instance.vehicles: return float('inf')
            v_data = instance.vehicles[v_id]
            curr_load, curr_pos = 0.0, v_data['start']
            
            for _, row in route.iterrows():
                o_id, act = row['Order_ID'], str(row['Type']).strip()
                if o_id not in instance.orders: return float('inf')
                o_data = instance.orders[o_id]
                
                target = None
                if act == 'Pickup':
                    if states[o_id] != "PENDING": return float('inf')
                    target, curr_load = o_data['pickup'], curr_load + o_data['weight']
                    states[o_id], v_owner[o_id] = "PICKED", v_id
                elif act == 'Delivery':
                    if states[o_id] != "PICKED" or v_owner.get(o_id) != v_id: return float('inf')
                    target, curr_load = o_data['delivery'], curr_load - o_data['weight']
                    states[o_id] = "DELIVERED"
                else: return float('inf')
                if curr_load > v_data['capacity'] + 1e-9: return float('inf')
                total_cost += RouteValidator.dist(curr_pos, target) * v_data['price']
                curr_pos = target
        if any(s != "DELIVERED" for s in states.values()): return float('inf')
        return total_cost
        
def algo_task_score(dataset_path, pred_file):
    if not os.path.exists(dataset_path):
        print(f"[ALGO-Error] Dataset not found: {dataset_path}")
        return None
        
    try:
        # Load Data
        data = InputParser.parse(dataset_path)
        submission = pd.read_csv(pred_file)
        
        # Preprocessing
        if 'Test' in submission.columns: 
            submission['Test'] = pd.to_numeric(submission['Test'], errors='coerce').fillna(-1).astype(int)
        for col in ['Vehicle_ID', 'Order_ID']:
            if col in submission.columns: submission[col] = submission[col].apply(normalize_id)
        target_tests = set(data.keys()) # Score all tests in dataset
        total_eff, valid_count = 0.0, 0
        
        for t_id in target_tests:
            if t_id not in data: continue
            instance = data[t_id]
            lb = instance.get_lower_bound()
            if lb <= 1e-9: continue
            
            cost = RouteValidator.evaluate(instance, submission)
            if cost != float('inf'):
                total_eff += (cost / lb)
                valid_count += 1
                
        return total_eff / valid_count if valid_count > 0 else float('inf')
    except Exception as e:
        print(f"[ALGO-Error] {str(e)}")
        return 0.0

In [30]:
algo_score = algo_task_score(ALGO_DATASET, ALGO_PRED)
print(f"ALGO Score: {algo_score:.5}")

ALGO Score: 0.93823
