# Full Pipeline Evaluation

Evaluates the end-to-end detection capability using the Orchestrator.
**Metrics**: Total Latency, Throughput, Detection Accuracy by Type.

In [None]:
import sys
import os
import time
import asyncio
import pandas as pd

sys.path.append(os.path.abspath("../.."))
os.environ["REDIS_URL"] = "redis://localhost:6379/0"

from apps.cipas.app.pipeline.orchestrator import orchestrator
from apps.cipas.app.models.submission import Submission

## 1. Setup Data
Create a mix of Type 1-4 clones.

In [None]:
base_code = "public int add(int a, int b) { return a + b; }"
type1 = "public int add(int a, int b) { return a + b; }"
type2 = "public int sum(int x, int y) { return x + y; }"
type3 = "public int add(int a, int b) { int res = a + b; return res; }"
type4 = "public int add(int a, int b) { if(b==0) return a; return add(a+1, b-1); }" # recursive, dif algo but checks

dataset = [
    ("sub_base", base_code),
    ("sub_t1", type1),
    ("sub_t2", type2),
    ("sub_t3", type3),
    ("sub_t4", type4),
]

# Pre-index the 'base' submission so others can find it
base_sub = Submission(id="sub_base", student_id="s0", assignment_id="a1", code=base_code)
await orchestrator.processed_submission(base_sub)
print("Base submission indexed.")

## 2. Run Pipeline & Measure Latency

In [None]:
results = []
detailed_logs = []

for sid, code in dataset[1:]: # Skip base
    start_t = time.time()
    
    sub = Submission(id=sid, student_id=sid, assignment_id="a1", code=code)
    res = await orchestrator.processed_submission(sub)
    
    end_t = time.time()
    latency = end_t - start_t
    
    # Get top match regarding base
    top_match = next((m for m in res['matches'] if m['submission_id'] == 'sub_base'), None)
    
    results.append({
        "submission_id": sid,
        "latency_ms": latency * 1000,
        "found_match": top_match is not None,
        "clone_type": top_match['clone_type'] if top_match else "None",
        "score": top_match['final_score'] if top_match else 0.0
    })

df_res = pd.DataFrame(results)
print(df_res)

## 3. Evaluation Metrics

In [None]:
avg_latency = df_res['latency_ms'].mean()
max_latency = df_res['latency_ms'].max()

print(f"Average Latency: {avg_latency:.2f} ms")
print(f"Max Latency: {max_latency:.2f} ms")

# Check correctness (Expectations)
# sub_t1 -> Type-1/2
# sub_t2 -> Type-1/2 or Type-2
# sub_t3 -> Type-3
# sub_t4 -> Type-4

print("\nPerformance Summary:")
print(df_res[['submission_id', 'clone_type', 'score']])