# Query and Message Generation Function

In [79]:
import os
import json
import re
from dotenv import load_dotenv
from pinecone import Pinecone
from mistralai import Mistral
from openai import OpenAI

## Load environmental variables

In [None]:
if not load_dotenv(".env"):
    print("An error has occured. Make sure the file exists and is readable")
else:
    print("Loading successful")

Loading successful


## Initialize Pinecone

In [81]:
def initialize_Pinecone():
    try:
        pinecone_api_key=os.getenv("PINECONE_API_KEY")
        if not pinecone_api_key:
            print("An error has occured. Pinecone API key not found")
            return None, None

        pc = Pinecone(api_key=pinecone_api_key)

        index_name= os.getenv("PINECONE_INDEX_NAME")
        if not index_name:
            print("An error has occured. PINECONE_INDEX_NAME not found in environmental variables")
            return None, None

        print(f"Verifying Pinecone Index. Create one if needed.")
        if index_name not in [idx.name for idx in pc.list_indexes()]:
            print(f"Pinecone Index {index_name} not found. Creating one")
            pc.create_index_for_model(
                name=index_name,
                cloud="aws",
                region="us-east-1",
                embed={
                    "model": "text-embedding-multilingual-e5-large",
                    "field_map": {"text": "text"}
                }
            )
            print(f"Index {index_name} created in Pinecone")
        
        else:
            print(f"Index {index_name} already exists in Pinecone")

        index = pc.Index(index_name)
        print(f"Connecting to Pinecone Index object 'index_name'.")

    except Exception as e:
        print(f"An error has occured during Pinecone initialization process: {e}")
        return None, None
    
    return pc, index

## Message generating function

### Initialize LLM clients - Mistral and OpenAI

In [82]:
def initialize_llm_clients():
    if Mistral:
        try:
            mistral_api_key = os.getenv("MISTRAL_API_KEY")
            if not mistral_api_key:
                print("An error has occured. Mistral API key not found.")
            
            else:
                mistral_client = Mistral(api_key=mistral_api_key)
                print("Mistral client successfully initialized.")
        
        except Exception as e:
            print(f"An error has occured during Mistral initialization process: {e}")
            
    else:
        print("Skipping Mistral client initialization - client library not found")

    if OpenAI:
        try:
            openai_api_key = os.getenv("OPENAI_API_KEY")
            if not openai_api_key:
                print("An error has occured. OpenAI API key not found.")
            
            else:
                openai_client = OpenAI(api_key=openai_api_key)
                print("OpenAI client succesfully initialized.")

        except Exception as e:
            print(f"An error has occured during OpenAi initialization process: {e}")

    else:
        print("Skipping OpenAI client initialization - client library not found")

    return mistral_client, openai_client

### Query relevant passages from Pinecone

In [83]:
def query_passage(query, top_k = 3):
    if not pc or not index:
        print("An error has occured. Pinecone client or index cannot be initialize globally")
        return None
    
    try:
        embedding_responses = pc.inference.embed(
            model="multilingual-e5-large",
            inputs=[query],
            parameters={"input_type": "query"}
            )

        if not embedding_responses or not getattr(embedding_responses, 'data', None) or not embedding_responses.data[0]['values']:
            print("An error has occured. Failure in generating query embedding or unexpected response structure")
            return None
        
        query_embedding = embedding_responses.data[0]['values']

        query_responses = index.query(
            vector=query_embedding,
            top_k=top_k,
            include_metadata=True,
            namespace=os.getenv("PINECONE_NAMESPACE")
        )    
        
        passages = [match["metadata"]["text"] for match in query_responses["matches"]]
        return passages
    
    except AttributeError:
        print("An error has occured when trying to access Pinecone embedding response.")
        print("Embedding response object: ", embedding_responses)

    except Exception as e:
        print("An error has occured during Pinecone query or embedding process: {e}")
        return None

### Generate reading passages using selected models

In [84]:
def generate_reading_passages(model_choice, query, passages):
    context = " ".join(passages).strip()

    # Fallback if Pinecone cannot find the suitable passages
    if not context:
        context = f"A passage about: {query}"
        
    prompt = f"""You are an IELTS Reading expert. Based on the following context, generate an IELTS-style academic reading passage.

\"\"\"{context}\"\"\"

Please follow this structure:

Passage Guidelines:
- The passage should be approximately 700–800 words long.
- Use an academic tone, similar to passages found in the IELTS Reading section.
- Organize the content into 4–6 paragraphs.
- Include a title that reflects the main idea of the passage.
- Do NOT include any questions or answers.
- Do NOT add extra instructions, labels, or headings such as “Questions” or “Answers.”

Do not include explanations or justifications unless explicitly asked by the user. Your output must be directly usable in an IELTS reading practice application.
"""
    try:
        if model_choice == "Mistral":
            if not mistral_client:
                raise ValueError("Mistral client is not available")
            
            response = mistral_client.chat.complete(
                model = "mistral-small-latest",
                messages=[
                    {"role": "system", "content": "You are an IELTs Reading tutor"},
                    {"role": "user", "content": prompt}
                ]
            )

            passage_content = response.choices[0].message.content
        
        elif model_choice == "GPT 4.1":
            if not openai_client:
                raise ValueError("OpenAI client is not available")
            
            response = openai_client.chat.completions.create(
                model="gpt-4.1-2025-04-14",
                messages=[
                    {"role": "system", "content": "You are an IELTs Reading tutor."},
                    {"role": "user", "content": prompt}
                ]
            )
            
            passage_content = response.choices[0].message.content
        
        else:
            print(f"An error has occured. Invalid model choice: {model_choice}. Please choose 'Mistral' or 'GPT 4.1'.")
            return None

    except Exception as e:
        print(f"An error has occured during API call for {model_choice}. Please try again.")
        return None
        
    return passage_content

### Generate IELTs Reading structured questions and answers

In [85]:
def generate_questions(model_choice, passage):
    prompt = f"""
You are an IELTS Reading expert that generates IELTs-style reading questions based on a providede passage.
Your task is to output ONLY a valid JSON array containing exactly 10 object questions based on the passage below:

\"\"\"{passage}\"\"\"

Please follow this structure:

Question Types:
Alternate between the following IELTS question types:
1. Multiple choice (4 options: A, B, C, D)
2. Identifying information (True / False / Not Given)
3. Identifying writer’s views/claims (Yes / No / Not Given)
4. Matching headings / information / features / sentence endings
5. Sentence completion
6. Short-answer questions

Instructions:
- Generate a well-balanced mix of these types.
- Ensure questions reflect the style and difficulty of the IELTS Reading section.
- Each question should be clear and concise.
- For multiple choice, label the options clearly as A, B, C, D.
- Do NOT provide answers at the end.

Output Formatting Requirement (MANDATORY):
- Your **ENTIRE** responses MUST be a single, valid JSON array.
- The response MUST start with '[' and end with ']'.
- Each element in the array MUST be a JSON object with the following EXACT keys:
    1. "number": (integer) The question number (01-10).
    2. "type": (string) The specific question types (e.g. "Multiple Choices", "True/False/Not Given, etc.")
    3. "text": (string) The full question texts (including options A, B, C, D for multiple choice questions).
- **CRITICAL**: DO NOT include ANY text, explanation, introduction, section headers (such as "Section 1: Multiple Choices"),
or ANY other content BEFORE the opening '['or AFTER the closing ']'.
- The output MUST be machine-readable JSON only.

Example of a single object within the required JSON array format:
     "number": 1,
     "type": "Multiple choice",
     "text": "What is the primary topic discussed?\\nA) Option A text\\nB) Option B text\\nC) Option C text\\nD) Option D text"

Generate the JSON array based on the provided passage.
     
"""
    try:
        if model_choice == "Mistral":
            if not mistral_client:
                raise ValueError("Mistral client is not available")
            
            response = mistral_client.chat.complete(
                model="mistral-small-latest",
                messages=[
                    {"role": "system", "content": "You are an IELTs Reading tutor."},
                    {"role": "user", "content": prompt}
                ]
            )
            raw_questions = response.choices[0].message.content

        elif model_choice == "GPT 4.1":
            if not openai_client:
                raise ValueError("OpenAI client is not available")
            
            response = openai_client.chat.completions.create(
                model="gpt-4.1-2025-04-14",
                messages=[
                    {"role": "system", "content": "You are an IELTs Reading tutor."},
                    {"role": "user", "content": prompt}
                ]
            )
            
            raw_questions = response.choices[0].message.content

        else:
            print(f"An error has occured. Invalid model choice: {model_choice}. Please choose 'Mistral' or 'GPT 4.1'.")
            return None
        
        if raw_questions:
            cleaned_questions = re.sub(r"^```json\s*|\s*```$", "", raw_questions.strip())

            try:
                questions_list = json.loads(cleaned_questions)
            except json.JSONDecodeError as json_err:
                 print(f"Error decoding JSON response from AI: {json_err}")
                 print("Raw response was:\n", raw_questions)
                 questions_list = None

    except Exception as e:
        print(f"An error has occured during API call for {model_choice}. Please try again.")
        return None
    
    return questions_list

## Main Execution Block

In [86]:
if __name__ == "__main__":
    print("\nExecuting Query and Script Generation process")

    # Declare globally
    mistral_client, openai_client = initialize_llm_clients()
    pc, index = initialize_Pinecone()

    if pc and index:
        try:
            # Generate an example
            query = "Explain the process of photosynthesis"
            chosen_LLM = "Mistral"

            print(f"\n Process the query {query} using {chosen_LLM} model")
            
            retrieved_passages = query_passage(query)
            if retrieved_passages is not None:
                generate_passages = generate_reading_passages(chosen_LLM, query, retrieved_passages)

                if generate_passages:
                    generate_questions = generate_questions(chosen_LLM, generate_passages)

                    print("\n" + "="*30 + " RESULTS " + "="*30)
                    print("\n--- Generated Passage ---")
                    print(generate_passages)
                    print("\n--- Generated Questions (JSON) ---")

                    if generate_questions:
                        print(json.dumps(generate_questions, indent=4))

                    else:
                        print("Failed to generate or parse questions.")

                else:
                    print("Failed to generate passage.")

            else:
                print("Failed to retrieve passages from Pinecone.")

        except Exception as e:
            print(f"An error has occurred during the main execution flow: {e}")

    else:
        print("An error has occured during Pinecone client or index initialization process")

print("Query and Script Generation process completed")



Executing Query and Script Generation process


Mistral client successfully initialized.
OpenAI client succesfully initialized.
Verifying Pinecone Index. Create one if needed.
Index ielts-rag already exists in Pinecone
Connecting to Pinecone Index object 'index_name'.

 Process the query Explain the process of photosynthesis using Mistral model


--- Generated Passage ---
### The Ecological Significance of Light and Seed Production in Plants

The role of light in the natural world is multifaceted and crucial for the survival and growth of various organisms. One of the most well-documented phenomena is the adaptive significance of seed production in plants, particularly in species like bamboo. Bamboo exhibits a unique reproductive strategy where it produces an enormous quantity of seeds simultaneously. This mass seeding event can result in seeds being layered 12 to 15 centimeters deep on the ground. The adaptive significance of this strategy is clear: the sheer volume of seeds overwhelms the capacity of seed-eating animals to consume