In [None]:
# ================================================================
# üéØ COMPREHENSIVE GRADING SYSTEM (UPDATED METRICS)
# ================================================================
# METRICS: MAE, MSE, RMSE, R2 Score, QWK (Quadratic Weighted Kappa)
# FITUR: Auto-search path, Hybrid Scoring, LLM Integration
# ================================================================

import pandas as pd
import numpy as np
import torch
import torch.nn as nn
from sentence_transformers import SentenceTransformer
from sklearn.metrics.pairwise import cosine_similarity
# --- TAMBAHAN IMPORT UNTUK METRICS ---
from sklearn.metrics import mean_squared_error, r2_score, cohen_kappa_score, mean_absolute_error
import openai
import json
import joblib
import os
from xgboost import XGBRegressor

print("="*80)
print("üéØ INITIALIZING COMPREHENSIVE GRADING COMPARISON SYSTEM")
print("="*80)

# =====================================================
# üìä STEP 1: INPUT DATA (Manual Dictionary)
# =====================================================

def input_manual():
    return {
        "soal": """Write a response that explains how the features of the setting affect the cyclist. In your response, include examples from the essay that support your conclusion.""",
        
        "kunci_jawaban": """ROUGH ROAD AHEAD: Do Not Exceed Posted Speed Limit by Joe Kurmaskie... (Truncated for brevity) ...And I promised myself right then that I'd always stick to it in the future.""",
        
        "max_score": 3,
        
        "data_siswa": [
            {"nama": "Siswa 1", "jawaban": "The features of the setting affect the cyclist in many ways...", "skor_asli": 1},
            {"nama": "Siswa 2", "jawaban": "The features of the setting affected the cyclist in a negative way...", "skor_asli": 2},
            {"nama": "Siswa 3", "jawaban": "Everyone travels to unfamiliar places...", "skor_asli": 1},
            {"nama": "Siswa 4", "jawaban": "I believe the features of the cyclist affected him because he was impatient...", "skor_asli": 1},
            {"nama": "Siswa 5", "jawaban": "The setting effects the cyclist because of the setting were diffrent...", "skor_asli": 2},
            {"nama": "Siswa 6", "jawaban": "There were many features of the setting that affected the cyclist...", "skor_asli": 1},
            {"nama": "Siswa 7", "jawaban": "The cyclist was riding through a tower when he stopped for directions...", "skor_asli": 1},
            {"nama": "Siswa 8", "jawaban": "The affects of the cyclist is if it does not change...", "skor_asli": 0},
            {"nama": "Siswa 9", "jawaban": "The essay 'Rough Road Ahead' describes a man's bicycle ride...", "skor_asli": 2},
            {"nama": "Siswa 10", "jawaban": "In the story, 'Rough Road Ahead' written by Joe Kurmaskie...", "skor_asli": 3},
        ]
    }

# =====================================================
# üîß STEP 2: LOAD MODELS (SMART SEARCH)
# =====================================================

print("\nüîÑ Loading SBERT model...")
try:
    encoder = SentenceTransformer("paraphrase-MiniLM-L6-v2")
    print("‚úÖ SBERT loaded!")
except Exception as e:
    print(f"‚ùå SBERT Error: {e}")
    encoder = None

print("\nüîÑ Loading XGBoost Model...")

class MyModel(torch.nn.Module):
    def __init__(self, input_dim=384, hidden_dim=128, output_dim=1):
        super().__init__()
        self.fc1 = nn.Linear(input_dim, hidden_dim)
        self.relu = nn.ReLU()
        self.fc2 = nn.Linear(hidden_dim, output_dim)
    def forward(self, x):
        return self.fc2(self.relu(self.fc1(x)))

def find_model_path(filename):
    if os.path.exists(filename): return filename
    common_dirs = ['/kaggle/input', '/kaggle/working', '/content', 'models']
    for d in common_dirs:
        if os.path.exists(d):
            for root, dirs, files in os.walk(d):
                if filename in files: return os.path.join(root, filename)
    return None

def load_best_model():
    target_filename = "eXtreme_Gradient_Boosting_(XGBoost).pkl" 
    print(f"üîé Mencari file: {target_filename} ...")
    model_path = find_model_path(target_filename)
    
    if model_path:
        print(f"‚úÖ Model ditemukan di: {model_path}")
        try:
            data = joblib.load(model_path)
            model = data.get('model', data) if isinstance(data, dict) else data
            m_type = "dl" if isinstance(model, (torch.nn.Module, MyModel)) else "ml"
            return model, m_type, 12.0 
        except Exception as e:
            print(f"‚ùå Error loading pickle: {e}")
    else:
        print(f"‚ö†Ô∏è Model '{target_filename}' TIDAK DITEMUKAN.")

    print("‚ö†Ô∏è Menggunakan DUMMY Model (Random XGBoost).")
    dummy = XGBRegressor(random_state=42)
    dummy.fit(np.random.rand(10, 384), np.random.rand(10) * 12)
    return dummy, "ml", 12.0

best_model, model_type, model_max_score = load_best_model()

# =====================================================
# üß† STEP 3: GRADING FUNCTIONS
# =====================================================

def grade_with_model_only(jawaban, kunci_jawaban):
    if encoder is None: return 0.0
    try:
        emb_jawaban = encoder.encode([jawaban])
        emb_kunci = encoder.encode([kunci_jawaban])
        similarity = cosine_similarity(emb_jawaban, emb_kunci)[0][0]
        
        if model_type == "ml":
            raw_pred = best_model.predict(emb_jawaban)[0]
        else:
            t = torch.tensor(emb_jawaban, dtype=torch.float32)
            with torch.no_grad(): raw_pred = best_model(t).item()
        
        score_model = (float(raw_pred) / model_max_score) * 100
        score_model = max(0.0, min(100.0, score_model))
        final_score = (score_model * 0.5) + ((similarity * 100) * 0.5)
        return round(final_score, 2)
    except Exception as e:
        return 0.0

def grade_with_llm_only(api_key, soal, kunci_jawaban, jawaban):
    if not api_key or "sk-" not in api_key: return 0.0
    try:
        client = openai.OpenAI(base_url="https://openrouter.ai/api/v1", api_key=api_key)
        prompt = f"""Kamu adalah Penilai Esai Otomatis.
SOAL: {soal}
KUNCI: {kunci_jawaban}
JAWABAN SISWA: {jawaban}
Output HARUS JSON: {{"skor": <0-100>}}"""
        response = client.chat.completions.create(
            model="mistralai/mistral-7b-instruct:free",
            messages=[{"role": "user", "content": prompt}],
            response_format={"type": "json_object"}, temperature=0.1
        )
        return float(json.loads(response.choices[0].message.content.strip()).get('skor', 0))
    except: return 0.0

def grade_hybrid_50_50(api_key, soal, kunci_jawaban, jawaban):
    s_model = grade_with_model_only(jawaban, kunci_jawaban)
    s_llm = grade_with_llm_only(api_key, soal, kunci_jawaban, jawaban)
    return round((s_model * 0.5) + (s_llm * 0.5), 2) if s_llm > 0 else s_model

# =====================================================
# üìä STEP 4: ADVANCED EVALUATION (MAE, MSE, RMSE, R2, QWK)
# =====================================================

def calculate_detailed_metrics(y_true_100, y_pred_100, max_score_original):
    """
    Menghitung 5 Metric Utama: MAE, MSE, RMSE, R2, QWK
    """
    # 1. Konversi ke Numpy Array
    y_true = np.array(y_true_100)
    y_pred = np.array(y_pred_100)
    
    # 2. Metrics Continuous (Skala 0-100)
    mae = mean_absolute_error(y_true, y_pred)
    mse = mean_squared_error(y_true, y_pred)
    rmse = np.sqrt(mse)
    r2 = r2_score(y_true, y_pred)
    
    # 3. Metrics Categorical/Ordinal (QWK)
    # Kita harus convert balik 0-100 ke 0-3 (integer) untuk QWK
    y_true_int = np.round((y_true / 100) * max_score_original).astype(int)
    y_pred_int = np.round((y_pred / 100) * max_score_original).astype(int)
    
    # Clip agar tidak ada nilai di luar range (misal -1 atau 4)
    y_pred_int = np.clip(y_pred_int, 0, max_score_original)
    
    qwk = cohen_kappa_score(y_true_int, y_pred_int, weights='quadratic')
    
    return mae, mse, rmse, r2, qwk

def evaluate_all_schemes(data, api_key):
    soal = data["soal"]
    kunci = data["kunci_jawaban"]
    max_score = data["max_score"]
    siswa_data = data["data_siswa"]
    
    # Lists untuk menampung semua nilai prediksi vs asli
    y_true_list = []
    y_pred_model = []
    y_pred_llm = []
    y_pred_hybrid = []
    
    results_table = []
    
    print("\n" + "="*80)
    print("üöÄ STARTING EVALUATION LOOP...")
    print("="*80)
    
    for i, siswa in enumerate(siswa_data, 1):
        nama = siswa["nama"]
        jawaban = siswa["jawaban"]
        skor_asli = siswa["skor_asli"]
        skor_asli_100 = (skor_asli / max_score) * 100
        
        print(f"[{i}/{len(siswa_data)}] Grading {nama}...")
        
        # Grading
        val_model = grade_with_model_only(jawaban, kunci)
        val_llm = grade_with_llm_only(api_key, soal, kunci, jawaban)
        val_hybrid = grade_hybrid_50_50(api_key, soal, kunci, jawaban)
        
        # Simpan ke List untuk perhitungan global
        y_true_list.append(skor_asli_100)
        y_pred_model.append(val_model)
        y_pred_llm.append(val_llm)
        y_pred_hybrid.append(val_hybrid)
        
        results_table.append({
            "Nama": nama,
            "Real (0-100)": round(skor_asli_100, 1),
            "Model": val_model,
            "LLM": val_llm,
            "Hybrid": val_hybrid
        })

    # --- HITUNG METRICS GLOBAL ---
    metrics_data = []
    
    schemes = {
        "Model Only": y_pred_model,
        "LLM Only": y_pred_llm,
        "Hybrid": y_pred_hybrid
    }
    
    for name, preds in schemes.items():
        mae, mse, rmse, r2, qwk = calculate_detailed_metrics(y_true_list, preds, max_score)
        metrics_data.append({
            "Scheme": name,
            "MAE (üìâ)": round(mae, 2),
            "MSE (üìâ)": round(mse, 2),
            "RMSE (üìâ)": round(rmse, 2),
            "R2 Score (üìà)": round(r2, 3),
            "QWK (üìà)": round(qwk, 3)
        })
    
    # Display Tables
    df_results = pd.DataFrame(results_table)
    df_metrics = pd.DataFrame(metrics_data)
    
    print("\n" + "="*80)
    print("üìä DETAIL NILAI PER SISWA")
    print("="*80)
    print(df_results)
    
    print("\n" + "="*80)
    print("üèÜ FINAL PERFORMANCE METRICS COMPARSION")
    print("="*80)
    print("Keterangan:")
    print("üìâ : Semakin RENDAH semakin baik (Error)")
    print("üìà : Semakin TINGGI semakin baik (Akurasi/Korelasi)")
    print("-" * 80)
    print(df_metrics.to_string(index=False))
    print("-" * 80)
    
    # Penentuan Pemenang berdasarkan QWK (Standar AES)
    best_scheme = df_metrics.loc[df_metrics['QWK (üìà)'].idxmax()]
    print(f"\nüéâ WINNER (Based on QWK): {best_scheme['Scheme']}")
    print(f"   QWK Score: {best_scheme['QWK (üìà)']}")
    
    return df_metrics

# =====================================================
# üöÄ MAIN EXECUTION
# =====================================================

MY_API_KEY = "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" 

if __name__ == "__main__":
    data_input = input_manual()
    df_final = evaluate_all_schemes(data_input, MY_API_KEY)

In [None]:
# =======================================================
# üéØ HYBRID AI GRADING SYSTEM (50-50 VERSION)
# =======================================================

import gradio as gr
import joblib
import pandas as pd
import numpy as np
from sklearn.metrics.pairwise import cosine_similarity
import openai
import json
import os
import torch
import torch.nn as nn
from sentence_transformers import SentenceTransformer
from xgboost import XGBRegressor
from typing import Tuple, Dict, Any

print("="*60)
print("üöÄ INITIALIZING HYBRID 50-50 GRADING SYSTEM")
print("="*60)

# =====================================================
# üîß 1. LOAD SBERT MODEL
# =====================================================

print("\nüîÑ Loading SBERT model...")
try:
    # Menggunakan model yang relatif ringan dan cepat
    encoder = SentenceTransformer("paraphrase-MiniLM-L6-v2")
    print("‚úÖ SBERT model loaded successfully!")
except Exception as e:
    print(f"‚ùå Error loading SBERT: {e}")
    # Fatal jika encoder gagal dimuat karena diperlukan untuk Technical Score
    encoder = None
    exit()

# =====================================================
# üîß 2. PLACEHOLDER DL MODEL CLASS
# =====================================================

class MyModel(torch.nn.Module):
    """Model placeholder untuk PyTorch/DL"""
    def __init__(self, input_dim=384, hidden_dim=128, output_dim=1):
        super().__init__()
        self.fc1 = nn.Linear(input_dim, hidden_dim)
        self.relu = nn.ReLU()
        self.fc2 = nn.Linear(hidden_dim, output_dim)
        
    def forward(self, x):
        x = self.fc1(x)
        x = self.relu(x)
        return self.fc2(x)

# =====================================================
# üîß 3. LOAD TRAINED MODEL (ML/DL)
# =====================================================

def get_dummy_model() -> Tuple[Any, str, float]:
    """Fallback jika model tidak ditemukan (XGBoost sebagai placeholder)"""
    print("‚ö†Ô∏è Model asli tidak ditemukan, menggunakan DUMMY Model")
    dummy = XGBRegressor(random_state=42)
    # Fit model dummy dengan data random (384 = SBERT embedding dim)
    dummy.fit(np.random.rand(10, 384), np.random.rand(10) * 12)
    # Model dummy memprediksi skor maksimal 12.0
    return dummy, "ml", 12.0

def load_custom_model() -> Tuple[Any, str, float]:
    """Memuat model terbaik (.pkl untuk ML, .pth untuk DL)"""
    model_dir = "models"
    
    if not os.path.exists(model_dir):
        os.makedirs(model_dir, exist_ok=True)
        return get_dummy_model()
        
    try:
        files = os.listdir(model_dir)
        
        # Cari file model terbaik (.pkl atau .pth)
        best_pkl = next((os.path.join(model_dir, f) for f in files if "BEST_MODEL" in f and f.endswith(".pkl")), None)
        best_pth = next((os.path.join(model_dir, f) for f in files if "BEST_MODEL" in f and f.endswith(".pth")), None)
        
        if best_pkl:
            print(f"üöÄ Loading ML Model: {best_pkl}")
            data = joblib.load(best_pkl)
            # Menangani jika joblib menyimpan dict (mis. {'model': model})
            model = data.get('model', data.get('estimator', data)) if isinstance(data, dict) else data
            if model is None:
                print("‚ùå Model tidak ditemukan dalam dict")
                return get_dummy_model()
            print("‚úÖ ML Model loaded successfully!")
            # Asumsi skor maksimal model ini adalah 100.0 (sesuaikan dengan model training Anda!)
            return model, "ml", 100.0 
            
        if best_pth:
            print(f"üöÄ Loading DL Model: {best_pth}")
            model = MyModel() # Inisiasi model DL
            checkpoint = torch.load(best_pth, map_location="cpu")
            # Menangani jika torch.save menyimpan dict (mis. {'model_state_dict': state})
            state_dict = checkpoint.get('model_state_dict', checkpoint) if isinstance(checkpoint, dict) else checkpoint
            model.load_state_dict(state_dict)
            model.eval()
            print("‚úÖ DL Model loaded successfully!")
            # Asumsi skor maksimal model ini adalah 100.0 (sesuaikan dengan model training Anda!)
            return model, "dl", 100.0
            
        return get_dummy_model()
        
    except Exception as e:
        print(f"‚ùå Error loading model: {e}")
        return get_dummy_model()

# Load model global
best_model, model_type, model_max_score = load_custom_model()
print(f"\nüìä Model Type: {model_type.upper()}")
print(f"üìä Max Score Range: 0-{model_max_score}")

# =====================================================
# üß† 4. OPENROUTER LLM CHECKER
# =====================================================

def cek_konteks_llm(api_key: str, soal: str, kunci: str, jawaban_mhs: str) -> Tuple[float, str]:
    """Menggunakan LLM (OpenRouter) untuk penilaian kontekstual/logika."""
    if not api_key or not api_key.strip() or not jawaban_mhs.strip():
        return 0.0, "‚ö†Ô∏è API Key/Jawaban kosong. Mode Offline/Fallback aktif."
    
    try:
        client = openai.OpenAI(
            base_url="https://openrouter.ai/api/v1",
            api_key=api_key,
        )

        # Menggunakan model Mistral yang gratis
        OPENROUTER_MODEL = "mistralai/mistral-7b-instruct:free" 
        
        prompt = f"""
Kamu adalah Dosen Penilai yang Objektif dan Teliti.

**SOAL:**
{soal}

**KUNCI JAWABAN (Reference):**
{kunci}

**JAWABAN MAHASISWA:**
{jawaban_mhs}

**TUGAS:**
Berikan penilaian objektif berdasarkan kriteria: Relevansi (40%), Kebenaran (40%), Kelengkapan (20%).
Hanya beri skor rendah jika benar-benar salah/tidak relevan.

**OUTPUT (WAJIB FORMAT JSON):**
{{"skor": <integer 0-100>, "alasan": "<penjelasan singkat 1-2 kalimat>"}}
"""
        
        response = client.chat.completions.create(
            model=OPENROUTER_MODEL,
            messages=[
                {"role": "system", "content": "You are an objective essay grader. Your output must be a single JSON object. Only output the JSON."},
                {"role": "user", "content": prompt}
            ],
            response_format={"type": "json_object"}, 
            temperature=0.3
        )
        
        text_res = response.choices[0].message.content.strip()
        data = json.loads(text_res)
        skor = max(0.0, min(100.0, float(data.get('skor', 0))))
        alasan = data.get('alasan', 'Tidak ada feedback')
        return skor, alasan
        
    except json.JSONDecodeError:
        import re
        # Fallback parsing jika JSON response tidak sempurna
        match = re.search(r'"skor"\s*:\s*(\d+)', text_res)
        if match:
            skor = max(0.0, min(100.0, float(match.group(1))))
            return skor, "Feedback parsing error, skor diekstrak manual."
        else:
            return 0.0, f"‚ö†Ô∏è Gagal parse response OpenRouter: {text_res[:100]}... | Error: JSONDecodeError"
            
    except openai.APIError as e:
        error_msg = str(e)
        if "rate limit" in error_msg.lower() or "429" in error_msg:
             return 0.0, "‚ùå OpenRouter Error: Quota/Rate Limit habis. Mode fallback aktif."
        elif "authentication" in error_msg.lower() or "401" in error_msg:
             return 0.0, "‚ùå OpenRouter Error: API Key tidak valid. Mode fallback aktif."
        print(f"‚ùå OpenRouter API Error: {error_msg}")
        return 0.0, f"‚ùå OpenRouter Error: {error_msg[:50]}... | Mode fallback aktif."

    except Exception as e:
        error_msg = str(e)
        print(f"‚ùå Unexpected OpenRouter Error: {error_msg}")
        return 0.0, f"‚ùå Error tak terduga: {error_msg[:50]}... | Mode fallback aktif."


# =====================================================
# üéØ 5. HYBRID 50-50 GRADING FUNCTION
# =====================================================

def grade_essay_hybrid(api_key: str, jawaban_mahasiswa: str, current_state: Dict[str, Any]) -> Tuple[float, float, float, str, str]:
    """Fungsi utama untuk penilaian hybrid 50-50."""
    if not jawaban_mahasiswa or not jawaban_mahasiswa.strip():
        return 0.0, 0.0, 0.0, "Jawaban kosong", "N/A"
        
    soal = current_state.get("soal", "")
    kunci_jawaban = current_state.get("jawaban_benar", "")
    max_score_dosen = current_state.get("max_score", 100.0)
    
    score_technical = 0.0
    similarity = 0.0
    
    # STEP 1: TECHNICAL SCORING (Model-based)
    if not kunci_jawaban or not kunci_jawaban.strip():
        # Jika kunci jawaban kosong, Technical Score dan Similarity tidak dihitung (dianggap 0)
        print("‚ö†Ô∏è Kunci Jawaban kosong. Technical Score/Similarity diatur ke 0.")
    else:
        try:
            emb_mhs = encoder.encode([jawaban_mahasiswa])
            emb_kunci = encoder.encode([kunci_jawaban])
            
            # 1. Similarity
            similarity = cosine_similarity(emb_mhs, emb_kunci)[0][0]
            similarity = max(0.0, similarity)
            
            # 2. Model Prediction
            if model_type == "ml":
                raw_pred = best_model.predict(emb_mhs)[0]
            else:
                t_mhs = torch.tensor(emb_mhs, dtype=torch.float32)
                with torch.no_grad():
                    raw_pred = best_model(t_mhs).item()
            
            # 3. Normalisasi Skor Teknis ke skala 0-100
            score_technical = (float(raw_pred) / model_max_score) * 100
            score_technical = max(0.0, min(100.0, score_technical))
            
            # Jika similarity mendekati sempurna, beri skor 100.0
            if similarity > 0.99:
                score_technical = 100.0
        
        except Exception as e:
            print(f"‚ùå Technical scoring error: {e}")
            score_technical = 0.0
            similarity = 0.0
            
    # STEP 2: LOGICAL SCORING (LLM-based)
    score_llm, feedback_llm = cek_konteks_llm(api_key, soal, kunci_jawaban, jawaban_mahasiswa)
    
    # STEP 3: FINAL SCORE CALCULATION (50-50)
    
    # LLM dianggap sukses jika API Key ada dan tidak ada error fatal/auth/rate limit di feedback
    is_llm_success = not ("Error" in feedback_llm or "Offline/Fallback aktif" in feedback_llm or "Key tidak valid" in feedback_llm) and api_key.strip()
    
    if is_llm_success:
        # üéØ HYBRID MODE 50-50: 50% Model + 50% LLM
        final_score_raw = (score_technical * 0.5) + (score_llm * 0.5)
        mode_used = "üü¢ Hybrid 50-50 (Model+AI)"
    else:
        # OFFLINE MODE (Fallback tanpa LLM)
        
        # Hitung Similarity Score (0-100)
        similarity_score = similarity * 100
        
        if not kunci_jawaban or not kunci_jawaban.strip():
            # Jika kunci jawaban kosong, hanya andalkan Technical Score (Model ML/DL)
            final_score_raw = score_technical
            mode_used = "üü° Full Offline (Tech Model Only)"
        else:
            # Fallback normal: 50% Technical Score + 50% Similarity Score
            final_score_raw = (score_technical * 0.5) + (similarity_score * 0.5)
            mode_used = "üü° Offline (Model+Sim)"
            
    # Scale final score (0-100) ke max_score dosen
    final_score = (final_score_raw / 100.0) * max_score_dosen
    final_score = max(0.0, min(max_score_dosen, final_score))
    
    return (
        round(final_score, 2),
        round(score_technical, 2), # Technical score (0-100)
        round(score_llm, 2),       # LLM score (0-100)
        feedback_llm,
        mode_used
    )

# =====================================================
# üóÇÔ∏è 6. GRADIO UI LOGIC FUNCTIONS
# =====================================================

def simpan_soal(soal: str, jawaban_benar: str, max_score: float, state: Dict[str, Any]) -> Tuple[Dict[str, Any], str, pd.DataFrame]:
    """Handler untuk tombol Simpan Soal"""
    if not soal or not soal.strip() or not jawaban_benar or not jawaban_benar.strip():
        empty_df = pd.DataFrame(columns=["Nama", "Total", "Teknis", "Logika(AI)", "Mode", "Feedback"])
        return state, "‚ùå Soal/Kunci Jawaban tidak boleh kosong!", empty_df
    
    try:
        max_score_float = float(max_score)
    except (ValueError, TypeError):
        max_score_float = 100.0

    new_state = {
        "soal": soal,
        "jawaban_benar": jawaban_benar,
        "max_score": max_score_float,
        "leaderboard": pd.DataFrame(columns=["Nama", "Total", "Teknis", "Logika(AI)", "Mode", "Feedback"])
    }
    
    status_msg = f"‚úÖ Soal tersimpan!\nüìù Soal: {soal[:50]}...\nüéØ Max Score: {new_state['max_score']}"
    
    return new_state, status_msg, new_state["leaderboard"]


def submit_jawaban_hybrid(api_key: str, nama: str, jawaban: str, state: Dict[str, Any]) -> Tuple[Dict[str, Any], pd.DataFrame, str]:
    """Handler untuk tombol Submit Jawaban"""
    # 1. Cek Ketersediaan Soal
    if not state or "soal" not in state or not state["soal"]:
        return state, state.get("leaderboard", pd.DataFrame()), "‚ö†Ô∏è **Error:** Dosen belum membuat soal!"
    
    # 2. Cek Input Mahasiswa
    if not nama or not nama.strip():
        return state, state.get("leaderboard", pd.DataFrame()), "‚ö†Ô∏è **Error:** Nama harus diisi!"
    
    if not jawaban or not jawaban.strip():
        return state, state.get("leaderboard", pd.DataFrame()), "‚ö†Ô∏è **Error:** Jawaban harus diisi!"
    
    # 3. Grading
    total, teknis, logika, feedback, mode = grade_essay_hybrid(api_key, jawaban, state)
    
    # 4. Update leaderboard
    df = state.get("leaderboard", pd.DataFrame(columns=["Nama", "Total", "Teknis", "Logika(AI)", "Mode", "Feedback"]))
    
    new_row = {
        "Nama": nama, "Total": total, "Teknis": teknis, 
        "Logika(AI)": logika, "Mode": mode, "Feedback": feedback
    }
    
    # Cek dan update/tambah row
    if not df.empty and nama in df["Nama"].values:
        df.loc[df["Nama"] == nama, list(new_row.keys())] = list(new_row.values())
    else:
        new_df = pd.DataFrame([new_row])
        df = pd.concat([df, new_df], ignore_index=True)
    
    # Urutkan berdasarkan skor tertinggi
    df = df.sort_values(by="Total", ascending=False).reset_index(drop=True)
    state["leaderboard"] = df
    
    # 5. Format feedback message
    max_score = state.get("max_score", 100.0)
    emoji = "üìö"
    grade = "Perlu Belajar"
    if total >= max_score * 0.9:
        emoji = "üåü"
        grade = "Sempurna!"
    elif total >= max_score * 0.7:
        emoji = "‚úÖ"
        grade = "Bagus!"
    elif total >= max_score * 0.5:
        emoji = "üëç"
        grade = "Cukup"
    
    msg = f"""
### {emoji} Hasil Penilaian: **{nama}**
---
**üèÜ SKOR AKHIR: {total} / {max_score}** ({grade})
üìä **Rincian Penilaian (Skala 0-100):**
- ü§ñ **Teknis (Model):** {teknis} (Bobot 50%)
- üß† **Logika (AI):** {logika} (Bobot 50%)
---
üí° **Feedback AI:** > {feedback}
üîß **Mode:** {mode}
---
"""
    # Kolom Feedback disembunyikan di leaderboard mahasiswa
    return state, df.drop(columns=["Feedback"], errors='ignore'), msg


def tampilkan_soal(state: Dict[str, Any]) -> str:
    """Tampilkan soal di tab mahasiswa"""
    if state and "soal" in state and state["soal"]:
        return f"üìã **Soal:**\n\n{state['soal']}"
    return "‚ö†Ô∏è _(Dosen belum membuat soal)_"

# =====================================================
# üé® 7. GRADIO UI
# =====================================================

with gr.Blocks(title="Hybrid 50-50 Grader") as demo:
    initial_state = {
        "soal": None, "jawaban_benar": None, "max_score": 100.0,
        "leaderboard": pd.DataFrame(columns=["Nama", "Total", "Teknis", "Logika(AI)", "Mode", "Feedback"])
    }
    state = gr.State(initial_state)
    
    gr.Markdown("""
    # üöÄ Hybrid 50-50 Essay Grading System
    ### Sistem Penilaian Esai dengan Bobot Seimbang: 50% Model + 50% LLM
    ---
    """)
    
    # API Key Setup
    with gr.Accordion("üîë Setup API Key OpenRouter (Required for Hybrid Mode)", open=False):
        gr.Markdown("""
        **Cara Mendapatkan API Key OpenRouter:** [OpenRouter Dashboard](https://openrouter.ai/keys)
        ‚ö†Ô∏è **Tanpa API Key:** Sistem akan fallback ke Model + Similarity.
        """)
        api_key_input = gr.Textbox(
            label="OpenRouter API Key", type="password", placeholder="sk-or-v1-...", 
            info="Key ini TIDAK akan disimpan."
        )
    
    with gr.Tabs():
        # TAB 1: PORTAL DOSEN
        with gr.Tab("üë®‚Äçüè´ Portal Dosen"):
            gr.Markdown("### Buat Soal & Kunci Jawaban")
            
            with gr.Row():
                with gr.Column(scale=2):
                    soal_in = gr.Textbox(label="üìù Soal Essay", lines=3, placeholder="Jelaskan dampak AI pada pendidikan modern...")
                    kunci_in = gr.Textbox(label="‚úÖ Kunci Jawaban (Referensi Ideal)", lines=8, placeholder="Tuliskan jawaban ideal...")
                
                with gr.Column(scale=1):
                    max_score = gr.Number(value=100, label="üéØ Skor Maksimal")
                    btn_save = gr.Button("üíæ Simpan Soal", variant="primary", size="lg")
                    status_dosen = gr.Textbox(label="üìä Status", interactive=False, lines=3)
            
            gr.Markdown("---")
            gr.Markdown("### üìä Preview Leaderboard (Dosen)")
            leaderboard_dosen = gr.DataFrame(label="Leaderboard (Live)", interactive=False)
            
            btn_save.click(
                fn=simpan_soal,
                inputs=[soal_in, kunci_in, max_score, state],
                outputs=[state, status_dosen, leaderboard_dosen]
            )
        
        # TAB 2: PORTAL MAHASISWA
        with gr.Tab("üßë‚Äçüéì Portal Mahasiswa") as mahasiswa_tab:
            gr.Markdown("### Lihat Soal & Submit Jawaban")
            
            soal_display = gr.Markdown(value="‚ö†Ô∏è _(Dosen belum membuat soal)_")
            
            with gr.Row():
                with gr.Column(scale=2):
                    nama_in = gr.Textbox(label="üë§ Nama Lengkap", placeholder="Contoh: Budi Santoso")
                    jawab_in = gr.Textbox(label="‚úçÔ∏è Jawaban Kamu", lines=10, placeholder="Tuliskan jawabanmu di sini...")
                    btn_submit = gr.Button("üöÄ Kirim Jawaban", variant="primary", size="lg")
                
                with gr.Column(scale=1):
                    gr.Markdown("### üèÜ Live Leaderboard")
                    # Kolom Feedback disembunyikan di tampilan Mahasiswa
                    lb_mhs = gr.DataFrame(label="Top Scores", interactive=False, headers=["Nama", "Total", "Teknis", "Logika(AI)"])
            
            gr.Markdown("---")
            status_mhs = gr.Markdown(value="_Isi form di atas dan klik 'Kirim Jawaban'_")
            
            btn_submit.click(
                fn=submit_jawaban_hybrid,
                inputs=[api_key_input, nama_in, jawab_in, state],
                outputs=[state, lb_mhs, status_mhs]
            )
            
            # Update tampilan soal saat tab Mahasiswa dipilih
            mahasiswa_tab.select(
                fn=tampilkan_soal,
                inputs=[state],
                outputs=[soal_display]
            )

# =====================================================
# üöÄ 8. LAUNCH APPLICATION
# =====================================================

print("\n" + "="*60)
print("‚úÖ SYSTEM READY!")
print("="*60)
print(f"üìä Model Type: {model_type.upper()}")
print(f"ü§ñ SBERT: {'‚úÖ Loaded' if encoder else '‚ùå Failed'}")
print(f"üß† LLM: OpenRouter (via Mistral 7B)")
print(f"‚öñÔ∏è Scoring Weight: 50% Model + 50% LLM")
print("="*60)
print("\nüöÄ Launching Gradio interface...\n")

demo.launch(share=True, debug=True)