In [2]:
import ollama
import time
import pandas as pd
from IPython.display import display 


In [3]:

MODELS = ['yi-coder:1.5b', 'qwen2.5-coder:1.5b', 'llama3.2:1b']


TEST_SUITE = [
   
    {"id": "PY_01", "lang": "Python", "type": "Logic", "name": "Off-by-one Error", 
     "code": "def get_items(arr):\n    # BUG: Index out of range\n    for i in range(len(arr) + 1):\n        print(arr[i])"},
    
    {"id": "PY_02", "lang": "Python", "type": "Security", "name": "SQL Injection", 
     "code": "def login(u, p):\n    # BUG: SQL Injection vulnerability\n    q = f\"SELECT * FROM users WHERE u='{u}' AND p='{p}'\"\n    db.execute(q)"},
    
    {"id": "PY_03", "lang": "Python", "type": "Runtime", "name": "Zero Division", 
     "code": "def average(nums):\n    total = sum(nums)\n    # BUG: Crash if nums is empty\n    return total / len(nums)"},
    
    {"id": "PY_04", "lang": "Python", "type": "Logic", "name": "Mutable Default Arg", 
     "code": "def append_to(num, target=[]):\n    # BUG: target retains value between calls\n    target.append(num)\n    return target"},

    # --- JAVA CASES ---
    {"id": "JAVA_01", "lang": "Java", "type": "Logic", "name": "String Comparison", 
     "code": "public boolean check(String s) {\n    // BUG: Using == instead of .equals\n    if (s == \"password\") return true;\n    return false;\n}"},
    
    {"id": "JAVA_02", "lang": "Java", "type": "Runtime", "name": "Null Pointer", 
     "code": "public int getLen(String s) {\n    // BUG: No null check\n    return s.length();\n}"},
    
    {"id": "JAVA_03", "lang": "Java", "type": "Logic", "name": "Infinite Loop", 
     "code": "public void loop() {\n    // BUG: Decrementing instead of incrementing\n    for (int i = 0; i < 10; i--) {\n        System.out.println(i);\n    }\n}"},
    
    {"id": "JAVA_04", "lang": "Java", "type": "Concurrency", "name": "Race Condition", 
     "code": "public class Counter {\n    private int count = 0;\n    // BUG: Not synchronized\n    public void increment() { count++; }\n}"},

    # --- C/C++ CASES ---
    {"id": "C_01", "lang": "C", "type": "Memory", "name": "Buffer Overflow", 
     "code": "void copy(char *s) {\n    char buf[5];\n    // BUG: Unsafe copy\n    strcpy(buf, s);\n}"},
    
    {"id": "C_02", "lang": "C", "type": "Memory", "name": "Memory Leak", 
     "code": "void process() {\n    int *p = malloc(sizeof(int)*10);\n    p[0] = 1;\n    // BUG: No free(p)\n}"},
    
    {"id": "C_03", "lang": "C", "type": "Logic", "name": "Integer Overflow", 
     "code": "int add(int a, int b) {\n    // BUG: Can overflow if a+b > MAX_INT\n    return a + b;\n}"},
    
    {"id": "C_04", "lang": "C", "type": "Pointer", "name": "Use After Free", 
     "code": "void func() {\n    char *p = malloc(10);\n    free(p);\n    // BUG: Accessing freed memory\n    *p = 'a';\n}"}
]



In [None]:
# --- 3. H√ÄM CH·∫†Y BENCHMARK ---
def run_benchmark():
    results = []
    total_tasks = len(TEST_SUITE) * len(MODELS)
    current_task = 0

    print(f" BENCHMARK: {len(TEST_SUITE)} b√†i test x {len(MODELS)} model")
    print("-" * 60)

    for case in TEST_SUITE:
        print(f" [{case['lang']}] {case['name']}...")
        
        for model in MODELS:
            current_task += 1
            print(f"   [{current_task}/{total_tasks}] Running {model}...", end=" ", flush=True)
            
            try:
                start_time = time.time()
                
                
                prompt = f"Fix this {case['lang']} code. Return ONLY the fixed code block:\n```\n{case['code']}\n```"
                
                
                response = ollama.chat(model=model, keep_alive=0, messages=[
                    {'role': 'user', 'content': prompt}
                ])
                
                duration = round(time.time() - start_time, 2)
                output = response['message']['content']
                
                print(f"Xong ({duration}s)")
                
                results.append({
                    "Model": model,
                    "Ng√¥n ng·ªØ": case["lang"],
                    "Lo·∫°i l·ªói": case["type"],
                    "B√†i to√°n": case["name"],
                    "Th·ªùi gian (s)": duration,
                    "Code S·ª≠a": output 
                })
                
            except Exception as e:
                print(f"‚ùå L·ªói: {e}")
                results.append({
                    "Model": model,
                    "Ng√¥n ng·ªØ": case["lang"],
                    "Lo·∫°i l·ªói": case["type"],
                    "B√†i to√°n": case["name"],
                    "Th·ªùi gian (s)": 0,
                    "Code S·ª≠a": str(e)
                })

    
    print("\n" + "="*60)
    print(" K·∫æT QU·∫¢ CHI TI·∫æT")
    df = pd.DataFrame(results)
    
 
    display(df) 

    print("\n" + "="*60)
    print(" T√ìM T·∫ÆT T·ªêC ƒê·ªò TRUNG B√åNH (Gi√¢y)")
    
  
    summary_df = df.pivot_table(index="Model", columns="Ng√¥n ng·ªØ", values="Th·ªùi gian (s)", aggfunc="mean")
    
  
    display(summary_df)

   
    print("\n" + "="*60)
    print(" BI·ªÇU ƒê·ªí SO S√ÅNH")
    try:
        summary_df.plot(kind='bar', figsize=(10, 6), title="T·ªëc ƒë·ªô x·ª≠ l√Ω trung b√¨nh theo Ng√¥n ng·ªØ (Th·∫•p h∆°n l√† t·ªët h∆°n)")
    except ImportError:
        print("C·∫ßn c√†i matplotlib ƒë·ªÉ v·∫Ω bi·ªÉu ƒë·ªì: pip install matplotlib")



In [None]:
# Ch·∫°y h√†m
run_benchmark()

üî• B·∫ÆT ƒê·∫¶U BENCHMARK: 12 b√†i test x 3 model
------------------------------------------------------------
üìÇ ƒêang x·ª≠ l√Ω: [Python] Off-by-one Error...
   [1/36] Running yi-coder:1.5b... ‚úÖ Xong (16.57s)
   [2/36] Running qwen2.5-coder:1.5b... ‚úÖ Xong (9.09s)
   [3/36] Running llama3.2:1b... ‚úÖ Xong (6.76s)
üìÇ ƒêang x·ª≠ l√Ω: [Python] SQL Injection...
   [4/36] Running yi-coder:1.5b... ‚úÖ Xong (35.31s)
   [5/36] Running qwen2.5-coder:1.5b... ‚úÖ Xong (9.06s)
   [6/36] Running llama3.2:1b... ‚úÖ Xong (14.75s)
üìÇ ƒêang x·ª≠ l√Ω: [Python] Zero Division...
   [7/36] Running yi-coder:1.5b... ‚úÖ Xong (11.31s)
   [8/36] Running qwen2.5-coder:1.5b... ‚úÖ Xong (16.09s)
   [9/36] Running llama3.2:1b... ‚úÖ Xong (14.53s)
üìÇ ƒêang x·ª≠ l√Ω: [Python] Mutable Default Arg...
   [10/36] Running yi-coder:1.5b... ‚úÖ Xong (16.87s)
   [11/36] Running qwen2.5-coder:1.5b... ‚úÖ Xong (6.27s)
   [12/36] Running llama3.2:1b... ‚úÖ Xong (7.33s)
üìÇ ƒêang x·ª≠ l√Ω: [Java] String Compariso