In [2]:
import glob
import pandas as pd
import tqdm
import json
SCENARIOS_DIR = "/shared_workspace_mfs/kirill/code_architect/LoCoBench/data/output/scenarios"

In [3]:
# Get all the architecture_understanding samples together
arch_q_fnames = []
arch_q_fnames = [fname for fname in glob.glob(f"{SCENARIOS_DIR}/*architectural_understanding*")]

df = pd.DataFrame()
for fname in tqdm.tqdm(arch_q_fnames):
    with open(fname, 'r', encoding='utf-8') as f:
        data = json.load(f)
    
    raw = pd.json_normalize(data, sep='_')
    raw['fpath'] = fname

    df = pd.concat([df, raw], ignore_index=True, axis=0)

 24%|██▍       | 244/1000 [00:00<00:01, 631.77it/s]

100%|██████████| 1000/1000 [00:05<00:00, 185.77it/s]


In [4]:
# filter only most relevant columns
df = df[['id', 'task_category', 'difficulty', 'title', 'description', 'context_files', 'context_length', 'task_prompt', 'expected_approach', 'ground_truth',
    'evaluation_criteria', 'metadata_context_length', 'metadata_files_count', 'metadata_information_coverage', 'metadata_coverage_range',
    'metadata_generation_timestamp']]

In [None]:
import requests
import json

def llm_request(sys_prompt, usr_prompt, model="deepseek/deepseek-v3.2"):
    """
    OpenRouter LLM
    """
    # First API call with reasoning
    response = requests.post(
    url="https://openrouter.ai/api/v1/chat/completions",
    headers={
        "Authorization": "Bearer KEY",
        "Content-Type": "application/json",
    },
    data=json.dumps({
        "model": model,
        "messages": [
            {
                "role": "system",
                "content": sys_prompt
            },
            {
            "role": "user",
            "content": usr_prompt
            }
        ],
        "reasoning": {"enabled": True}
    })
    )
    
    # Extract the assistant message with reasoning_details
    response = response.json()
    response = response['choices'][0]['message']

    return response

In [50]:
sys_prompt = "You are a software engineer who is an expert in software architecture and design."

usr_prompt = f"I have a set of software architecture questions, and I would like you to analyze them and generate a taxonomy. The goal is to categorize these questions into relevant themes or topics that reflect different aspects of software architecture. The taxonomy should help identify common categories and subcategories of questions, showing relationships between them."
usr_prompt += f"\nHere are the questions: \n\n{',\n'.join(df['title'].to_list())}"

In [58]:
resp = llm_request(sys_prompt, usr_prompt)

In [60]:
print(resp['content'])

After analyzing the extensive list of software architecture questions, I've developed a comprehensive taxonomy that organizes them into meaningful categories and subcategories. This taxonomy reveals patterns in architectural concerns and relationships between different aspects of system design.

## Software Architecture Question Taxonomy

### 1. **Architectural Analysis & Understanding**
*1.1 Flow Tracing & Documentation*
- Data Flow Tracing (telemetry, transactions, requests)
- Event Flow Analysis (assessment completion, user actions)
- Workflow Documentation (order creation, medication orders)
- Request Lifecycle Tracing (authentication, GraphQL queries)

*1.2 Pattern Identification & Explanation*
- Architectural Pattern Analysis (CQRS, Hexagonal, Repository)
- Design Pattern Recognition (Observer, Factory, Strategy)
- System Communication Patterns (RPC vs Event Bus, REST vs GraphQL)

*1.3 System Mapping & Component Analysis*
- Component Responsibility Mapping
- Feature-to-Module Map

In [None]:
# save arch understanding questions to jsonl
df.to_json("/shared_workspace_mfs/kirill/code_architect/LoCoBench/data_analysis/arch_understanding_questions.jsonl", lines=True, orient='records')