In this notebook, we make API requests to OpenRouter, to generate Gherkins from user stories using various models and various prompting approaches.

In [1]:
import requests
import json
from dotenv import load_dotenv
import pandas as pd
import chardet # To detect file encodings
import asyncio
import httpx

import os
from pathlib import Path
import shutil

from datetime import datetime

load_dotenv()

or_token = os.getenv("openrouter_token")

In [2]:
us_data_dir = Path("../data/user_stories/sample_data")
experiment_dir = Path("../data/gherkins/sample_data/test")

#### Load user story data.

In [3]:
# Function to parse user stories from a .txt file or all .txt files in a folder
def parse_user_stories(path):
    user_stories = {}
    
    if os.path.isfile(path):
        files = [path] # Single .txt file
    elif os.path.isdir(path):
        files = [os.path.join(path, f) for f in os.listdir(path) if f.endswith('.txt')] # All .txt files in folder
    else:
        raise ValueError(f"Path '{path}' is not a valid file or folder.")
        
    for filepath in files:
        filename = os.path.splitext(os.path.basename(filepath))[0] # Remove extension

        with open(filepath, 'r', encoding='utf-8') as f:
            for i, line in enumerate(f, start=1):
                us = line.strip()
                if us:  # Skip empty lines
                    us_id = f"{filename}_{i}"
                    user_stories[us_id] = us

    return user_stories

In [4]:
# Create user story dictionary, creating ID keys based on filename and line number
us_dict = parse_user_stories(us_data_dir / "g04-recycling.txt")

In [5]:
len(us_dict)

51

In [6]:
# Check for duplicate user stories
us_list = list(us_dict.values())

len(us_list) == len(set(us_list))

True

In [7]:
# Create sample subset of user stories for testing
sample_us_dict = dict(list(us_dict.items())[:3])

sample_us_dict

{'g04-recycling_1': 'As a user, I want to click on the address, so that it takes me to a new tab with Google Maps.',
 'g04-recycling_2': 'As a user, I want to be able to anonymously view public information, so that I know about recycling centers near me before creating an account.',
 'g04-recycling_3': 'As a user, I want to be able to enter my zip code and get a list of nearby recycling facilities, so that I can determine which ones I should consider.'}

#### Create functions to make single-turn API requests.

Simulates creating a new chat for each user story, so each user story is processed by the LLM in isolation.

In [8]:
# Function to format model name
def format_model_name(model):
    return model.replace('/', '-')

In [12]:
# Function to set up request parameters
def build_openrouter_request_data(model, or_token, us_text, system_prompt=None, temperature=0.8):
    url = "https://openrouter.ai/api/v1/chat/completions"
    
    headers = {
        "Authorization": f"Bearer {or_token}",
        "Content-Type": "application/json"
    }

    data = {
        "model": model,
        "messages": [],
        "temperature": temperature,
        "provider": {
            "data_collection": "deny"
        }
    }

    if system_prompt is not None:
        data["messages"].append({"role": "system", "content": system_prompt})

    data["messages"].append({"role": "user", "content": f"User story: {us_text}"})

    return url, headers, data

In [13]:
# Function to make OpenRouter API request 
# NOTE: 1) this function uses the standard Requests library, so is blocking/synchronous, i.e., each request waits for previous to complete
# NOTE: 2) this function simulates creating a new chat for each user story
def openrouter_request(model, or_token, us_id, us_text, system_prompt=None, temperature=0.8):
    url, headers, data = build_openrouter_request_data(model, or_token, us_text, system_prompt, temperature)

    try:
        response = requests.post(url, headers=headers, json=data)
        response.raise_for_status()

        created = response.json().get("created", "")
        response_content = response.json().get("choices")[0]["message"]["content"]
        prompt_tokens = response.json().get("usage", {}).get("prompt_tokens", 0)
        completion_tokens = response.json().get("usage", {}).get("completion_tokens", 0)

        return {
            "model": format_model_name(model),
            "created": created,
            "us_id": us_id,
            "user_story": us_text,
            "raw_response": response_content,
            "prompt_tokens": prompt_tokens,
            "completion_tokens": completion_tokens,
        }
            
    except Exception as e:
        print(f"Sync error: {e}")
        return None


In [14]:
# Function to make asynchronous OpenRouter API request
# NOTE: 1) this function uses the httpx library for async requests, i.e., it does not block while waiting for a response
# NOTE: 2) as above, this function simulates creating a new chat for each user story
async def openrouter_request_async(model, or_token, us_id, us_text, system_prompt=None, temperature=0.8):
    url, headers, data = build_openrouter_request_data(model, or_token, us_text, system_prompt, temperature)

    try:
        async with httpx.AsyncClient() as client:
            response = await client.post(url, headers=headers, json=data)
            response.raise_for_status()

            json_data = response.json()
            created = json_data.get("created", "")
            response_content = json_data.get("choices")[0]["message"]["content"]
            prompt_tokens = json_data.get("usage", {}).get("prompt_tokens", 0)
            completion_tokens = json_data.get("usage", {}).get("completion_tokens", 0)

            return {
                "model": format_model_name(model),
                "created": created,
                "us_id": us_id,
                "user_story": us_text,
                "raw_response": response_content,
                "prompt_tokens": prompt_tokens,
                "completion_tokens": completion_tokens
            }
        
    except Exception as e:
        print(f"Async error: {e}")
        return None

In [15]:
semaphore = asyncio.Semaphore(5)  # Limit concurrent requests to 5 to avoid rate limiting

# Function to make limited concurrent asynchronous OpenRouter API requests
async def limited_openrouter_request(model, or_token, us_id, us_text, system_prompt=None, temperature=0.8):
    async with semaphore:
        return await openrouter_request_async(model, or_token, us_id, us_text, system_prompt, temperature)


In [16]:
system_prompt = "You are a QA Engineer. Please generate a complete Gherkin feature file with 3-5 realistic, testable scenarios for the user story below. Please return the Gherkin only, without comments or explanations."

In [17]:
# llms = ["openai/gpt-4o-mini", "meta-llama/llama-3.1-70b-instruct"]
llms = ["openai/gpt-4o-mini", "google/gemini-2.0-flash-001"]

# Main function to orchestrate asynchronous OpenRouter API requests
async def main():
    tasks = [
        limited_openrouter_request(model, or_token, us_id, user_story, system_prompt=system_prompt)
        for us_id, user_story in sample_us_dict.items()
        for model in llms
    ]

    results = await asyncio.gather(*tasks)
    return results

results = await main()

In [18]:
df = pd.DataFrame.from_records(results)

In [19]:
df.head()

Unnamed: 0,model,created,us_id,user_story,raw_response,prompt_tokens,completion_tokens
0,openai-gpt-4o-mini,1762354426,g04-recycling_1,"As a user, I want to click on the address, so ...",```gherkin\nFeature: Open address in Google Ma...,83,262
1,google-gemini-2.0-flash-001,1762354426,g04-recycling_1,"As a user, I want to click on the address, so ...",```gherkin\nFeature: Address Link Opens Google...,70,310
2,openai-gpt-4o-mini,1762354426,g04-recycling_2,"As a user, I want to be able to anonymously vi...",```gherkin\nFeature: Anonymous Viewing of Publ...,87,307
3,google-gemini-2.0-flash-001,1762354426,g04-recycling_2,"As a user, I want to be able to anonymously vi...",```gherkin\nFeature: Anonymous User Views Publ...,74,333
4,openai-gpt-4o-mini,1762354426,g04-recycling_3,"As a user, I want to be able to enter my zip c...",```gherkin\nFeature: Nearby Recycling Faciliti...,92,374


In [20]:
df.to_csv(f"../single_turn_test_5-11.csv", index=False)

#### Create functions to make multi-turn API requests.

Simulating an ongoing LLM chat interaction, where the chat history is appended to the start of each new user chat turn &mdash; so multiple user stories can be process in the same chat interaction.

In [21]:
# Create chat logs directory
chat_log_dir = experiment_dir / "chat_logs"
os.makedirs(chat_log_dir, exist_ok=True)

In [22]:
# # Get path for backup chat log file with timestamp
# def backup_file_path(chat_log_dir, model):
#     timestamp = datetime.now().strftime('%Y%m%d_%H%M%S')
#     file_path = chat_log_dir / model / f"chat_log_{timestamp}.json"

#     return file_path

In [23]:
# Helper function to save multi-turn chat to active log file
def save_conversation(log_dir, conversation):
    filepath = log_dir / "active.json"

    if not os.path.exists(log_dir):
        os.makedirs(log_dir, exist_ok=True)
        
    with open(filepath, "w", encoding="utf-8") as f:
        json.dump(conversation, f, indent=2, ensure_ascii=False)

    return filepath

In [24]:
# Helper function to back up active chat log file
def backup_active_file(log_dir):
    active_file_path = log_dir / "active.json"

    if os.path.exists(active_file_path):
        backup_file_path = log_dir / f"chat_log_{datetime.now().strftime('%Y%m%d_%H%M%S')}.json"
        shutil.copy(active_file_path, backup_file_path)

        return backup_file_path
    
    return None

In [25]:
# prompt = "You are a QA Engineer. For each user story I give you, please generate a complete Gherkin feature file with at least three realistic, testable scenarios. Try to cover: 1. The happy path (expected successful flow), 2. At least one edge case, 3. At least one error or failure condition. Please return the Gherkin only, without comments or explanation."
# reminder = "Reminder: your task is to generate a complete Gherkin feature file with at least three realistic, testable scenarios for the user story I give you, returning only the Gherkin."

In [28]:
system_prompt = "You are a QA Engineer. For each user story I give you, please generate a complete Gherkin feature file with 3-5 realistic, testable scenarios. Please return the Gherkin only, without comments or explanation."
reminder = None

In [52]:
# Function to conduct multi-turn chat with model over multiple user stories and log the conversation
async def chat_with_model(model, or_token, log_dir, user_stories, system_prompt=None, reminder=None, temperature=0.8):
    # Create model-specific log directory, if it doesn't exist
    model_log_dir = log_dir / format_model_name(model)
    os.makedirs(model_log_dir, exist_ok=True)

    headers = {
        "Authorization": f"Bearer {or_token}",
        "Content-Type": "application/json"
    }

    print("Starting chat for model:", model)

    # Messages list will hold the full conversation history, updated after each turn
    messages = []

    # Conversation log dict will hold the structured log to be saved to file
    conversation_log = {
        "model": model,
        "system_prompt": system_prompt,
        "conversation_turns": []
    }

    # Completed stories will hold us_ids of already processed user stories
    completed_stories = []

    # Add system prompt to messages - prompt is sent once at start of conversation
    if system_prompt is not None:
        messages.append({"role": "system", "content": system_prompt})
        conversation_log["system_prompt"] = system_prompt

    async with httpx.AsyncClient() as client:
        for us_id, us_text in user_stories.items():
            if us_id in completed_stories: # This is probably redundant now but will be useful when resuming from file (functionality to be added)
                print(f"Skipping already completed user story: {us_id}")
                continue

            turn = {"user": {}, "assistant": {}}

            if reminder is not None:
                # Add reminder message before each user story
                messages.append({"role": "user", "content": reminder})
                # conversation_log["conversation_turns"].append({"role": "user", "content": reminder}) # Log user messages
                turn["user"]["reminder"] = reminder
            
            user_message = f"User story: {us_text}"
            
            messages.append({"role": "user", "content": user_message})
            # conversation_log["conversation_turns"].append({"role": "user", "content": user_message, "us_id": us_id}) # Log user messages

            turn["user"]["content"] = user_message
            turn["user"]["us_id"] = us_id

            try:
                response = await client.post(
                    url="https://openrouter.ai/api/v1/chat/completions",
                    headers=headers,
                    json={
                        "model": model,
                        "messages": messages,
                        "temperature": temperature,
                        "provider": {"data_collection": "deny"},
                    },
                )
                
                response.raise_for_status()

                data = response.json()
                
                assistant_response = data["choices"][0]["message"]["content"]
                created = data.get("created", "")
                prompt_tokens = data.get("usage", {}).get("prompt_tokens", 0)
                completion_tokens = data.get("usage", {}).get("completion_tokens", 0)

                messages.append({"role": "assistant", "content": assistant_response})
                # conversation_log["conversation_turns"].append({"role": "assistant", "content": assistant_response, "created": created, "prompt_tokens": prompt_tokens, "completion_tokens": completion_tokens}) # Log assistant messages

                turn["assistant"]["content"] = assistant_response
                turn["assistant"]["created"] = created
                turn["assistant"]["prompt_tokens"] = prompt_tokens
                turn["assistant"]["completion_tokens"] = completion_tokens

                completed_stories.append(us_id)

                conversation_log["conversation_turns"].append(turn)

                # Save updated conversation log to active file after each turn
                save_conversation(model_log_dir, conversation_log)
                # print(messages)

            except Exception as e:
                print(f"Error during API request for user story {us_id}: {e}")
                continue

    # Save time-stamped backup of active chat log file
    backup_active_file(model_log_dir)
    
    return conversation_log

In [53]:
experiment_log_dir = experiment_dir / "chat_logs"

In [54]:
async def main():
    results = await asyncio.gather(*[
        chat_with_model(model, or_token, experiment_log_dir, sample_us_dict, system_prompt=system_prompt)
        for model in llms
    ])
    return results

results = await main()

Starting chat for model: openai/gpt-4o-mini
Starting chat for model: google/gemini-2.0-flash-001


In [51]:
results

[{'model': 'openai/gpt-4o-mini',
  'system_prompt': 'You are a QA Engineer. For each user story I give you, please generate a complete Gherkin feature file with 3-5 realistic, testable scenarios. Please return the Gherkin only, without comments or explanation.',
  'conversation_turns': [{'role': 'user',
    'content': 'User story: As a user, I want to click on the address, so that it takes me to a new tab with Google Maps.',
    'us_id': 'g04-recycling_1'},
   {'user': {'content': 'User story: As a user, I want to click on the address, so that it takes me to a new tab with Google Maps.',
     'us_id': 'g04-recycling_1'},
    'assistant': {'content': '```gherkin\nFeature: Address Link Navigation\n\n  Scenario: User clicks on the address link\n    Given the user is on the homepage\n    When the user clicks on the address link\n    Then a new tab should open with Google Maps\n    And the address should be correctly displayed in the URL of the new tab\n\n  Scenario: User clicks on the addr

In [None]:
rows = []

for model_result in results:
    model = model_result.get("model")
    system_prompt = model_result.get("system_prompt")
    conversations = model_result.get("conversation", [])
    
    for conv in conversations:
        if 
        us_id = conv.get("us_id")
        user_story = conv.get("user_story")
        assistant_response = conv.get("assistant_response")

        created = conv.get("raw_response", {}).get("created")
        # Token counts (if available)
        usage = conv.get("raw_response", {}).get("usage", {})
        prompt_tokens = usage.get("prompt_tokens")
        completion_tokens = usage.get("completion_tokens")
        # total_tokens = usage.get("total_tokens")

        rows.append({
            "model": model,
            "timestamp": timestamp,
            "us_id": us_id,
            "user_story": user_story,
            "assistant_response": assistant_response,
            "prompt_tokens": prompt_tokens,
            "completion_tokens": completion_tokens,
            "created": created
            # "total_tokens": total_tokens
        })

# Create DataFrame
df = pd.DataFrame(rows)

# # Optional: order columns
# df = df[
#     [
#         "model",
#         "timestamp",
#         "us_id",
#         "user_story",
#         "assistant_response",
#         "prompt_tokens",
#         "completion_tokens",
#         "total_tokens"
#     ]
# ]

# Preview
df.head()


In [None]:
# # Identify user stories that only appear once in the dataset i.e. those for which a model request failed
# story_counts = df["user_story"].value_counts()
# missing_stories = story_counts[story_counts == 1].index.tolist()

# missing_stories_df = df[df["user_story"].isin(missing_stories)]
# missing_stories_df.head(10)


In [None]:
# # Request missing Gherkin for user stories that only appear once in the dataset
# results = []

# for index, row in missing_stories_df.iterrows():
#     user_story = row['user_story']
#     model = row['model']
#     missing_model = (set(models) - {model}).pop()  # Get the other model

#     print(f"Requesting missing Gherkin for user story: {row['user_story']} using model: {missing_model}")

#     result = openrouter_request(user_story, missing_model, or_token)
#     results.append(result)
