# Tier 2: Syntactic Detection Evaluation

Evaluates local AST feature extraction and comparison, designed for Type-3 (Near-miss) clones.

In [None]:
import sys
import os
import json
import asyncio

sys.path.append(os.path.abspath("../.."))
os.environ["REDIS_URL"] = "redis://localhost:6379/0"

from apps.cipas.app.pipeline.tier2_syntactic import tier2
from apps.cipas.app.features.ast_analysis import extract_ast_features

## 1. Feature Extraction Analysis
Visualize AST features for different code snippets.

In [None]:
code_1 = """
public int sum(int n) {
    int s = 0;
    for(int i=0; i<n; i++) {
        s += i;
    }
    return s;
}
"""

# Code 2: While loop instead of For (Structural difference)
code_2 = """
public int sum(int n) {
    int s = 0;
    int i = 0;
    while(i < n) {
        s += i;
        i++;
    }
    return s;
}
"""

feat_1 = extract_ast_features(code_1)
feat_2 = extract_ast_features(code_2)

print("Features 1:", feat_1)
print("Features 2:", feat_2)

## 2. Similarity Calculation
Compute Cosine Similarity between the features.

In [None]:
sim = tier2.cosine_similarity(feat_1, feat_2)
print(f"Syntactic Similarity: {sim:.4f}")

# Check against threshold
is_candidate = sim >= 0.7
print(f"Is Valid Candidate? {is_candidate}")

## 3. Tier 2 Pipeline Integration Test
Simulate the refinement step.

In [None]:
# Mock Candidates from Tier 1
candidates = [{"submission_id": "s_target", "similarity": 0.5, "tier": 1}]

# We need to manually populate Redis with the target's AST features first
# because Tier 2 .process() expects candidates to have features in DB
import redis
r = redis.Redis.from_url("redis://localhost:6379/0")
r.set("ast:s_target", json.dumps(feat_2))

refined = await tier2.process("s_query", code_1, candidates)
print("Refined Results:", refined)