In [None]:
import json
import os
import requests
import random
import re
import tqdm
from transformers import AutoTokenizer

# Generate API URL
gen_url = "http://127.0.0.1:8080/v1/completions"

# Token count API URL
token_count_url = "http://127.0.0.1:8080/api/extra/tokencount"

def token_count(text, send_ids=False):
    # Set the headers
    headers = {
        "Content-Type": "application/json"
    }

    # Set the JSON
    json = {
                "prompt": text
            }

    # Send the request
    response = requests.post(token_count_url, headers=headers, json=json)

    # Expected response
    """
    {
    "value": 9,
    "ids": [
        1,
        22557,
        28725,
        586,
        1141,
        349,
        11952,
        28709,
        28723
    ]
    }
    """

    # Return the response
    if send_ids:
        return response.json()["value"], response.json()["ids"]
    else:
        return response.json()["value"]


def get_completion(prompt, max_tokens=200, temperature=1.0, min_p=0.2, stop_sequence=[], grammar=""):

    # Set the headers
    headers = {
        "Content-Type": "application/json"
    }

    # Set the JSON
    json = {
                "prompt": prompt,
                "max_context_length": 8192,
                "max_length": max_tokens,
                "rep_pen": 1.0,
                "rep_pen_range": 600,
                "rep_pen_slope": 0,
                "temperature": temperature,
                "min_p": min_p,
                "sampler_order": [6, 0, 1, 2, 3, 4, 5],
                "grammar": grammar,
                "stop_sequence": stop_sequence
            }
    
    # Expected response
    """
    {'results': [{'text': '\n1. JavaScript: This language is the most popular among developers and continues to be in high demand'}]}
    """

    # Send the request
    response = requests.post(gen_url, headers=headers, json=json)

    # Return the response
    return response.json()

def get_completion_text(prompt, max_tokens=200, temperature=1.0, min_p=0.2, stop_sequence=[], grammar=""):
    return get_completion(prompt, max_tokens, temperature, min_p, stop_sequence, grammar)["content"]



# Grammar
grammar = """root ::= question "Closed-ended question:" question "Semi-Structured question:" question "Leading question:" question "\n\nInstructions (Imperatives)\n\nShort instruction:" question "Scenario-based instruction:" question "Problem-based instruction:" question "\n\nPrompts\n\nShort prompt (Interrogative/Imperative):" question "Scenario-based prompt (Interrogative/Imperative):" question "Problem-based prompt (Interrogative/Imperative):" question "\n\nRequests (Modal Constructions)\n\nFormal request (using Modals):" question "Informal request (using Modals):" question "Polite request (using Modals):" question "Direct request (using Modals):" question "\n\nCategory: Detailed\n\n\n" questionsubgroup "\n\nCategory: Not directly related\n\n\n" questionsubgroup

# Question
question ::= " " line "\n"

# formatted question subgroup
questionsubgroup ::= "Questions (Interrogatives)\n\nOpen-ended question:" question "Closed-ended question:" question "Semi-Structured question:" question "Leading question:" question "\n\nInstructions (Imperatives)\n\nShort instruction:" question "Scenario-based instruction:" question "Problem-based instruction:" question "\n\nPrompts\n\nShort prompt (Interrogative/Imperative):" question "Scenario-based prompt (Interrogative/Imperative):" question "Problem-based prompt (Interrogative/Imperative):" question "\n\nRequests (Modal Constructions)\n\nFormal request (using Modals):" question "Informal request (using Modals):" question "Polite request (using Modals):" question "Direct request (using Modals):" question

# String
line ::= [^\r\n\x0b\x0c\x85\u2028\u2029|]+
"""

prompt_file = "./generation_prompts/q-gen-mistral.txt"

with open(prompt_file, "r") as f:
    prompt = f.read()

question_seed_file = "./prompt_resources/question_seed.txt"

with open(question_seed_file, "r") as f:
    question_seed = f.read()

# Seperate the seed into lines
question_seed_lines = question_seed.split("\n")

# Seperate the lines ending with a question mark from the lines that do not
question_seed_question_lines = []
question_seed_non_question_lines = []

for line in question_seed_lines:
    if line.endswith("?"):
        question_seed_question_lines.append(line)
    else:
        question_seed_non_question_lines.append(line)

# Shuffle the question lines
random.shuffle(question_seed_question_lines)

# Shuffle the non-question lines
random.shuffle(question_seed_non_question_lines)

# Take the first 10 lines from each
question_seed_question_lines = question_seed_question_lines[:10]
question_seed_non_question_lines = question_seed_non_question_lines[:10]

# Combine the lines
question_seed_lines = question_seed_question_lines + question_seed_non_question_lines

# Join the lines
question_seed = "\n".join(question_seed_lines)

# Replace {{QUESTION_SEED}} with the text
prompt = re.sub(r"{{QUESTION_SEED}}", question_seed, prompt)

text_file = "./q-gen-test-doc.txt"

with open(text_file, "r") as f:
    document = f.read()

# Replace {{DOCUMENT}} with the text
prompt = re.sub(r"{{DOCUMENT}}", document, prompt)

output = get_completion_text(prompt, max_tokens=2000, grammar=grammar, temperature=2, min_p=0.15)

print(output)

# Split into lines
lines = output.split("\n")

# Add each line that begines with "Request:" or "Reversed form:" to a list in addition to the first line
questions = []

# Add the first line
questions.append(lines[0])

# Strip the first line of leading and trailing whitespace
questions[0] = questions[0].strip()

# List of prefixes to look for
prefixes = ["Open-ended question:", "Closed-ended question:", "Semi-Structured question:", "Leading question:", "Short instruction:", "Scenario-based instruction:", "Problem-based instruction:", "Short prompt (Interrogative/Imperative):", "Scenario-based prompt (Interrogative/Imperative):", "Problem-based prompt (Interrogative/Imperative):", "Formal request (using Modals):", "Informal request (using Modals):", "Polite request (using Modals):", "Direct request (using Modals):"]

for line in lines:
    # Check if the line starts with a prefix
    if any([line.startswith(prefix) for prefix in prefixes]):
        # Strip the prefix
        line = line.split(":")[1]

        # Strip the line
        line = line.strip()

        questions.append(line)

# Print the questions
for question in questions:
    print(question)
