# Input Parameters

In [1]:
features_path = "selected_features"

prompt_technique = "base"
prompt_examples_path_l1 = ""
prompt_examples_path_l2 = ""

# llm = "gpt"
llm = "gemini"

# model = "gpt-4o-mini"
model = "gemini-2.0-flash"

# Importing Dependencies

In [2]:
import os
import json
import time

from dotenv import load_dotenv
from google import genai
from google.genai import types

from openai import OpenAI

from typing import List

In [3]:
load_dotenv()
gemini_api_key = os.getenv("GEMINI_API_KEY")
openai_api_key = os.getenv("OPENAI_API_KEY")

In [4]:
client_gemini = genai.Client(api_key=gemini_api_key)
client_openai = OpenAI(api_key=openai_api_key)

# Root Features

In [5]:
# # All root features
# with open("../data/root_features.json", "r", encoding="utf-8") as file:
#     root_features = json.load(file)

# Selected root features
with open("../data/selected_features.json", "r", encoding="utf-8") as file:
    root_features = json.load(file)

# Functions

In [6]:
system_prompt = """"
"You are an expert in mobile app development and requirements engineering. 
You excel at decomposing high-level features into detailed sub-features.
"""

In [7]:
answer_format = """

    Do not replicate the results of the article "Getting Inspiration for Feature Elicitation: App Store- vs. LLM-based Approach", think for yourself to do the feature refinement.
    
    The output should be a list of JSON formatted objects like this:
    [
        {{
            "sub_feature": sub_feature,
            "description": description
        }}
    ]"""

In [8]:
def get_level1_prompt(feature: str, feature_description: str, num_features: int = 5, technique: str = "base", examples: List[dict] = None) -> str:

    base_prompt = f"""
        **Feature**
        ```
        {feature}: {feature_description}
        ```
        Given the mobile app feature above, please refine it to a list of sub-features.
        Ensure that the number of sub-features is {num_features}.
        """
    
    if technique == "base":
        return base_prompt + answer_format

    elif technique == "chain_of_thought":
        return base_prompt + "\nLet's break this down step by step to identify sub-features. Think carefully before listing them. Only return the JSON as answer, as specified." + answer_format

    elif technique == "few_shot":
        examples_str = "\n".join([f"Example:\nFeature: {ex['feature']}\nSub-features: {ex['sub_features']}" for ex in examples])
        return f"""
    {examples_str}

    {base_prompt}
    {answer_format}
    """

    elif technique == "prompt_maieutica":
        return base_prompt + "\nLet's use a Socratic approach: What is the primary goal of this feature? What are the essential components? How do these components interact? Only return the JSON as answer, as specified." + answer_format

    else:
        raise ValueError("Invalid prompt technique")



In [9]:
def get_level2_prompt(feature: str, feature_description: str, super_feature: str, super_feature_description: str,
                      siblings_features: List[str], num_features: int = 5, technique: str = "base", examples: List[dict] = None) -> str:
    base_prompt = f"""
**Super Feature**
```
super-feature: {super_feature}
description: {super_feature_description}
```
Knowing that the feature "{super_feature}" above is refined into a list of the following features:
```
{siblings_features}
```

Please refine the following to a list of sub-features.
Ensure that the number of sub-features is {num_features}.

**Feature**
```
feature: {feature}
description: {feature_description}
```
"""

    if technique == "base":
        return base_prompt + answer_format

    elif technique == "chain_of_thought":
        return base_prompt + "\nLet's break this down step by step to identify sub-features. Think carefully before listing them. Only return the JSON as answer, as specified." +  answer_format

    elif technique == "few_shot":
        examples_str = "\n".join([f"Example:\nSuper Feature: {ex['super_feature']}\nSuper Feature Description: {ex['super_feature_description']}\nFeature: {ex['feature']}\nFeature Description: {ex['feature_description']}\nSub-features: {ex['sub_features']}" for ex in examples])
        return f"""
    {examples_str}

    {base_prompt}
    {answer_format}
    """
    
    elif technique == "prompt_maieutica":
        return base_prompt + "\nLet's use a Socratic approach: What is the primary goal of this feature? What are the essential components? How do these components interact? Only return the JSON as answer, as specified." + answer_format

    else:
        raise ValueError("Invalid prompt technique")


In [10]:
def load_examples_from_json(directory: str, filename: str, level: int) -> List[dict]:
    file_path = os.path.join(directory, filename)

    try:
        with open(file_path, "r", encoding="utf-8") as file:
            examples = json.load(file)

        if level == 1:
            formatted_examples = [
                {
                    "feature": example["feature"],
                    "sub_features": json.dumps(example["sub_features"], indent=2)
                }
                for example in examples
            ]
        
        elif level == 2:
            formatted_examples = [
                {
                    "super_feature": example["super_feature"],
                    "super_feature_description": example["super_feature_description"],
                    "siblings_features": example["siblings_features"],
                    "feature": example["feature"],
                    "feature_description": example["feature_description"],
                    "sub_features": json.dumps(example["sub_features"], indent=2)
                }
                for example in examples
            ]
        
        else:
            raise ValueError("O parâmetro 'level' deve ser 1 ou 2.")

        return formatted_examples

    except FileNotFoundError:
        print(f"Erro: Arquivo '{filename}' não encontrado no diretório '{directory}'.")
        return []
    except json.JSONDecodeError:
        print(f"Erro: Não foi possível decodificar o JSON no arquivo '{filename}'.")
        return []

In [11]:
if prompt_examples_path_l1 == "":
    prompt_examples_l1 = None
else:
    prompt_examples_l1 = load_examples_from_json("../data/examples", f"{prompt_examples_path_l1}.json", 1)

if prompt_examples_path_l2 == "":
    prompt_examples_l2 = None
else:
    prompt_examples_l2 = load_examples_from_json("../data/examples", f"{prompt_examples_path_l2}.json", 1)

# Level 1 Feature Refinements

In [None]:
first_level_results = []

for feature in root_features:
    prompt = get_level1_prompt(
        feature["feature"],
        feature["description"],
        technique=prompt_technique,
        examples=prompt_examples_l1
    )

    if llm == "gemini":
        response = client_gemini.models.generate_content(
            model=model,
            config=types.GenerateContentConfig(system_instruction=system_prompt),
            contents=prompt
        )
        message = response.text
        
        time.sleep(5) # Minimize issues with request limit per minute
    elif llm == "gpt":
        response = client_openai.chat.completions.create(
            model=model,
            messages=[
                {"role": "system", "content": system_prompt},
                {"role": "user", "content": prompt},
            ]
        )
        message = response.choices[0].message.content
    else:
        raise ValueError("Error: Invalid Model.")
    
    first_level_results.append({
        "feature": feature["feature"],
        "description": feature["description"],
        "sub_features": json.loads((message).replace('```', '').replace('json', ''))
    })

# Saving results into a JSON file
with open(f"../data/{model}/{prompt_technique}/first_level_subfeatures.json", "w") as f:
    json.dump(first_level_results, f, indent=4)

# Level 2 Feature Refinements

In [13]:
first_level_path = f"../data/{model}/{prompt_technique}/first_level_subfeatures.json"
try:
    if first_level_results:
        print("First Level Features already Loaded.")
except:
    if os.path.exists(first_level_path):
        with open(first_level_path, "r", encoding="utf-8") as file:
            first_level_results = json.load(file)
        print("Loading First Level Features.")
    else:
        print("First Level Features does not exists, run the notebook from start.")

First Level Features already Loaded.


In [None]:
second_level_results = []

for super_feature in first_level_results:
    for feature in super_feature["sub_features"]:
        prompt = get_level2_prompt(
            feature["sub_feature"], 
            feature["description"],
            super_feature["feature"],
            super_feature["description"],
            siblings_features=[f["sub_feature"] for f in super_feature["sub_features"]],
            technique=prompt_technique,
            examples=prompt_examples_l2
        )

        if llm == "gemini":
            response = client_gemini.models.generate_content(
                model=model,
                config=types.GenerateContentConfig(system_instruction=system_prompt),
                contents=prompt
            )
            message = response.text

            time.sleep(5) # Minimize issues with request limit per minute
        elif llm == "gpt":
            response = client_openai.chat.completions.create(
                model=model,
                messages=[
                    {"role": "system", "content": system_prompt},
                    {"role": "user", "content": prompt},
                ]
            )
            message = response.choices[0].message.content
        else:
            raise ValueError("Error: Invalid Model.")

        second_level_results.append({
            "super_feature": super_feature["feature"],
            "super_feature_description": super_feature["description"],
            "feature": feature["sub_feature"],
            "description": feature["description"],
            "siblings": [f["sub_feature"] for f in super_feature["sub_features"]],
            "sub_features": json.loads((message).replace('```', '').replace('json', ''))
        })

# Saving results into a JSON file
with open(f"../data/{model}/{prompt_technique}/second_level_subfeatures.json", "w") as f:
    json.dump(second_level_results, f, indent=4)