# Setup the Environment

In [1]:
''' Edit these variables to pick what course you want generated  '''
# source_language = "العربية العراقية العامية"
target_language = "English"
source_language = "کوردی سۆرانی"
CEFR_level = "A1"
topic_of_interest = "Tourism"
number_of_topics_to_generate = 2

In [2]:
import os

'''  Pick your model and API key over here  '''
models = {
    'deepseek': {
        'name': 'deepseek/deepseek-r1:free',
        'structured_outputs': False,
        'temperature': 0
    },
    'claude 3.7': {
        'name': 'anthropic/claude-3.7-sonnet',
        'structured_outputs': False,
        'temperature': 0
    },
    'gemini 2.0': {
        'name': 'google/gemini-2.0-flash-exp:free',
        'structured_outputs': True,
        'temperature': 0
    },
    'gemini 2.5 exp': {
        'name': 'google/gemini-2.5-pro-exp-03-25:free',
        'structured_outputs': True,
        'temperature': 0
    },
    'gemini 2.5 preview': {
        'name': 'google/gemini-2.5-pro-preview-03-25',
        'structured_outputs': True,
        'temperature': 0
    },
}
default_model = models['claude 3.7']
api_key = ""
with open(".env", "r") as f:
    api_key = f.read().strip().split("=")[1]

# Program Setup

Create the output folder where the course outline and scripts will be stored

In [3]:
output_folder = f"Courses/{source_language}-to-{target_language} at {CEFR_level}-level Lesson Scripts ({topic_of_interest})/"
os.makedirs(os.path.dirname(output_folder), exist_ok=True)

These are some functions that will be used throughout the program

In [4]:
import json

'''
Function to turn a response_format object into a text-based template of the desired response

(this is used for models that don't have strcutured output functions and need the JSON schema to be specified in the prompt)
'''
def schema_to_template(schema_object: dict) -> str:
    def process_schema_type(schema):
        """
        Recursively process different schema types to create template values
        """
        if schema.get("type") == "object":
            # Handle object type
            obj_template = {}
            if "properties" in schema:
                for prop_name, prop_details in schema["properties"].items():
                    # Include description if available
                    description = prop_details.get("description", "")

                    # Recursively process nested types
                    if prop_details.get("type") == "object":
                        obj_template[prop_name] = process_schema_type(prop_details)
                    elif prop_details.get("type") == "array":
                        # Handle array types
                        if "items" in prop_details:
                            items_schema = prop_details["items"]
                            if items_schema.get("type") == "string":
                                obj_template[prop_name] = [f"string ({description})"]
                            elif items_schema.get("type") == "object":
                                obj_template[prop_name] = [process_schema_type(items_schema)]
                            else:
                                obj_template[prop_name] = [f"{items_schema.get('type', 'unknown')} ({description})"]
                        else:
                            obj_template[prop_name] = []
                    elif prop_details.get("type") == "string":
                        obj_template[prop_name] = f"string ({description})"
                    elif prop_details.get("type") == "boolean":
                        obj_template[prop_name] = "boolean"
                    elif prop_details.get("type") == "number":
                        obj_template[prop_name] = "number"
            return obj_template
        elif schema.get("type") == "array":
            # Handle array type
            if "items" in schema:
                items_schema = schema["items"]
                if items_schema.get("type") == "object":
                    return [process_schema_type(items_schema)]
                elif items_schema.get("type") == "string":
                    description = items_schema.get("description", "")
                    return [f"string ({description})"]
            return []

        return schema.get("type", "unknown")

    # Extract the schema from the input object
    schema = schema_object.get("json_schema", {}).get("schema", schema_object)

    # Process the schema
    template = process_schema_type(schema)

    # Convert to JSON string with indentation
    return f'\nThe JSON should be structured like this:\n{json.dumps(template, indent=2)}'

In [5]:
import requests

'''
Function that sends the given prompt to the specified LLM in the desired format.
Returns the text-only response of the LLM if there are no errors, returns None otherwise
'''
def call_LLM(prompt: str, system_prompt:str="", response_format: dict = {"type":"text"}, model_to_use:dict = default_model) -> str:
  messages = []
  if system_prompt != "":
    messages.append({
      "role": "system",
      "content": system_prompt
    })
  messages.append({
    "role": "user",
    "content": prompt + ('' if (model_to_use['structured_outputs'] and response_format["type"] == "json_schema")  else schema_to_template(response_format))
  })
      
  response = requests.post(
    url="https://openrouter.ai/api/v1/chat/completions",
    headers={
      "Authorization": f"Bearer {api_key}",
      "Content-Type": "application/json",
    },
    data=json.dumps({
      "model": model_to_use['name'],
      "temperature": model_to_use['temperature'],
      "structured_outputs": model_to_use['structured_outputs'] and response_format["type"] != "text",
      "messages": messages,
      "response_format": response_format
    })
  )

  if 'error' in response.json():
    print(response.json()['error']['message'])
    return None

  return response.json()['choices'][0]['message']['content']

In [6]:
'''
Function that parses a text-based response from an LLM and converts it into a valid JSON object
'''
def response_to_JSON(text_response: str):
  start_token = ''
  end_token = ''
  for char in text_response:
    if char == '{':
      start_token = '{'
      end_token = '}'
    elif char == '[':
      start_token = '['
      end_token = ']'

    if start_token != '':
      break

  trimmed_response = text_response[text_response.index(start_token):text_response.rindex(end_token)+1]
  return json.loads(trimmed_response)

In [7]:
'''
Function to load the desired prompt in the desired format from their two corresponding files
'''
def load_prompt_and_format(path_to_prompt:str, path_to_format:str):
    prompt = ""
    with open(path_to_prompt, 'r', encoding='utf-8') as file:
        prompt = file.read()
        
    response_format = {
        "type": "json_schema",
        "json_schema": {
            "name": "response",
            "strict": True,
        }
    }
    with open(path_to_format, 'r') as file:
        format = json.loads(file.read())
        response_format['json_schema']['schema'] = format
    
    return prompt, response_format

In [8]:
'''
Function that loops through all the specified tags in a given prompt and replaces them with the appropriate replacements
'''
def replace_tags(prompt:str, replacements:dict):  
    for tag, replacement in replacements.items():
        prompt = prompt.replace(tag, str(replacement))
    
    return prompt

In [9]:
'''
Function to handle errors in calling the LLM and converting the response to JSON by continuously trying again until it succeeds
'''
def continuously_call_LLM(prompt:str, system_prompt:str = "", response_format:dict = {'type': 'text'}, model_to_use:dict = default_model, max_tries:int = 5):
    result = None
    
    number_of_fails = 0
    while result == None:
        # Handling errors with calling the LLM
        try:
            result = call_LLM(prompt, system_prompt, response_format, model_to_use)
        except Exception as e:
            print(f'Calling the LLM faield. {e}. Trying again...')
        
        # Handling errors with converting the LLM's response to a JSON object
        if response_format['type'] == 'json_schema' and result != None:
            try:
                result = response_to_JSON(result)
            except Exception as e:
                result = None
                print(f'Converting response to JSON failed. {e}. Trying again...')
        
        # If the model failed too many times, leave the loop to not get stuck in an infinite cycle
        number_of_fails += 1
        if number_of_fails >= max_tries:
            print(f'-ERROR, Max number of tries reached, exiting the loop-')
            break
    
    return result 

In [10]:
'''
Function to save a generated JSON object to a file in the output folder
'''
def saveJSON(obj: dict, information_name: str) -> None:
  with open(f"{output_folder}{information_name}.json", "w") as f:
    json.dump(obj, f, indent=2)

In [11]:
'''
Function to load a previously generated JSON object from a file in the output folder
'''
def loadJSON(information_name: str) -> None:
  with open(f"{output_folder}{information_name}.json", "r", encoding="utf-8") as f:
    return json.loads(f.read())

# Generate the Grammar Concepts

In [12]:
concepts_list = []
file_name = "Grammar Concepts"
load_existing_grammar_concepts = False

if load_existing_grammar_concepts:
    concepts_list = loadJSON(file_name)

else:
    prompt, response_format = load_prompt_and_format(
        "Prompts/grammar_generator_instructions.txt",
        "Prompts/grammar_generator_format.json"
    )
    replacements = {
        "[target_language]": target_language,
        "[source_language]": source_language,
        "[CEFR_level]": CEFR_level,
    }
    
    prompt = replace_tags(prompt, replacements)
    concepts_list = continuously_call_LLM(prompt, response_format)
    
    saveJSON(concepts_list, file_name)

concepts_as_text = ""
for i, concept in enumerate(concepts_list):
    print(f'{i+1}) {concept["name"]}')
    for component in concept["components"]:
        print(f'  - {component["description"]} ({', '.join(component["examples"])})')
        concepts_as_text += f'{concept["name"]} | {component["description"]} ({', '.join(component["examples"])})\n'
    print('\n')

1) Present Simple Tense
  - بەکارهێنانی کرداری سادە بۆ دەربڕینی ڕاستییەکان و ڕۆتینەکان (I drink coffee every morning., She works in a hospital., They live in London.)
  - بەکارهێنانی کرداری سادە لەگەڵ کەسی سێیەمی تاک (s/es) (He plays football on Sundays., She watches TV in the evening., It costs five dollars.)
  - پرسیارکردن بە کرداری سادە بە بەکارهێنانی do/does (Do you speak English?, Does she like pizza?, Do they come here often?)
  - نەرێنی کردن لە کرداری سادە بە بەکارهێنانی don't/doesn't (I don't eat meat., He doesn't understand French., We don't work on Sundays.)


2) Present Continuous Tense
  - بەکارهێنانی کرداری بەردەوام بۆ دەربڕینی کردارێک کە ئێستا ڕوودەدات (I am reading a book now., She is cooking dinner., They are playing football.)
  - پرسیارکردن بە کرداری بەردەوام (Are you studying English?, Is he working today?, Are they waiting for us?)
  - نەرێنی کردن لە کرداری بەردەوام (I am not sleeping., She isn't listening to music., We aren't going to the party.)


3) Personal Pron

# Generate the Course Structure

In [13]:
num_topics = 30

system_prompt = ""
with open("Prompts/iterative_system.txt", 'r', encoding='utf-8') as file:
    system_prompt = file.read()
    
replacements = {
    "[target_language]": target_language,
    "[source_language]": source_language,
    "[CEFR_level]": CEFR_level,
    "[topic_of_interest]": topic_of_interest,
}

system_prompt = replace_tags(system_prompt, replacements)

In [14]:
topics = []
file_name = "Topic Distribution"
load_existing_topics = False

if load_existing_topics:
    topics = loadJSON(file_name)

else:
    prompt, response_format = load_prompt_and_format(
        "Prompts/iterative_prompt_topics.txt",
        "Prompts/iterative_format_topics.json"
    )
    replacements = {
        "[target_language]": target_language,
        "[CEFR_level]": CEFR_level,
        "[topic_of_interest]": topic_of_interest,
        "[num_topics]": num_topics,
    }
    
    prompt = replace_tags(prompt, replacements)
    topics = continuously_call_LLM(prompt, system_prompt, response_format)
    
    saveJSON(topics, file_name)

# Print the topic distribution
for i, topic in enumerate(topics):
    print(f'{i+1}) {topic["topic"]}')
    print('  New Phrases:')
    for new_phrase in topic["new_phrases"]:
        print(f'    - {new_phrase}')
    print('  New Skills:')
    for new_skill in topic["new_skills"]:
        print(f'    - {new_skill}')
    print('\n')

1) ناساندنی خۆت
  New Phrases:
    - Hello, my name is...
    - What is your name?
    - Nice to meet you.
    - I am a tourist.
    - Are you a tourist?
    - Yes, I am.
    - No, I am not.
    - Thank you.
    - You're welcome.
    - Goodbye.
  New Skills:
    - Basic greetings
    - Introducing yourself
    - Asking for names
    - Simple yes/no responses
    - Saying thank you


2) وڵاتان و زمانەکان
  New Phrases:
    - Where are you from?
    - I am from...
    - Do you speak English?
    - I speak a little Kurdish.
    - This country is beautiful.
    - Is this your first visit?
    - Yes, it's my first time here.
    - No, I've been here before.
    - What languages do you speak?
    - I speak English and a little Kurdish.
  New Skills:
    - Talking about countries
    - Asking about languages
    - Expressing nationality
    - Basic adjectives for places
    - Using 'from' in sentences


3) ژمارەکان و کات
  New Phrases:
    - What time is it?
    - It's three o'clock.
    - Ho

In [15]:
class LearningItem:
    def __init__(self, content: str, start_lesson: int, item_type: str):
        self.content = content
        self.start_lesson = start_lesson
        self.item_type = item_type
        self.exposure_count = 0
        self.last_exposure_day = start_lesson
        self.next_review_day = start_lesson+1
        self.difficulty_level = 1
        self.complexity_score = 1 if item_type != 'concept' else 3  # Default to 3 for concepts

    def update_after_exposure(self, current_day):
        self.exposure_count += 1
        self.last_exposure_day = current_day
        self.set_next_review_day(current_day)

    def set_next_review_day(self, current_day):
        intervals = [1, 3, 7, 14, 30, 60]

        if self.exposure_count <= 5:
            interval = [1, 2, 3, 5, 7][self.exposure_count - 1]
        else:
            interval = intervals[min(self.exposure_count - 1, len(intervals) - 1)]

        if self.item_type == 'concept':
            interval = int(interval * (self.complexity_score * 0.5))
        else:
            if self.difficulty_level == 2:
                interval = int(interval * 1.5)
            elif self.difficulty_level == 3:
                interval = int(interval * 2)

        self.next_review_day = current_day + interval

In [16]:
from tqdm import tqdm

distribution = []
file_name = "Lesson Distribution"
load_existing_distribution = False

if load_existing_distribution:
    distribution = loadJSON(file_name)

else:
    base_prompt, response_format = load_prompt_and_format(
        "Prompts/iterative_prompt_cycle.txt",
        "Prompts/iterative_format_cycle.json"
    )
    replacements = {
        "[source_language]": source_language,
        "[target_language]": target_language,
        "[CEFR_level]": CEFR_level,
        "[all_concepts]": concepts_as_text,
    }
    
    base_prompt = replace_tags(base_prompt, replacements)
    
    words_so_far = []
    concepts_so_far = []
    phrases_so_far = []
    all_items = []
    cycle_number = 1
    for cycle in tqdm(topics):
        if cycle_number > number_of_topics_to_generate:
            break

        # Update the prompt to match the current cycle
        prompt = base_prompt
        replacements = {
            # copy dictionary replacements from the commented out lines
            "[cycle_number]": str(cycle_number),
            "[cycle_topic]": cycle["topic"],
            "[new_skills]": "- "+"\n- ".join(s for s in cycle["new_skills"]),
            "[new_phrases]": "- "+"\n- ".join(p for p in cycle["new_phrases"]),
            "[num_previous_cycles]": str(min(len(distribution),5)),
            "[previous_cycles]": json.dumps(distribution[-min(len(distribution),5):], indent=2, ensure_ascii=False)
        }
        prompt = replace_tags(prompt, replacements)

        obj = continuously_call_LLM(prompt, system_prompt, response_format)


        # Collect the new list of words, concepts, and phrases in this cycle
        cycle_words = []
        cycle_concepts = []
        cycle_phrases = []
        lesson_number = (cycle_number-1)*len(obj["lessons"]) + 1
        for lesson in obj["lessons"]:
            for concept in lesson["concepts"]:
                if concept not in concepts_so_far:
                    concepts_so_far.append(concept)
                    cycle_concepts.append(concept)
                    all_items.append(LearningItem(concept, lesson_number, 'concept'))

            for word in lesson["words"]:
                if word not in words_so_far:
                    words_so_far.append(word)
                    cycle_words.append(word)
                    all_items.append(LearningItem(word, lesson_number, 'word'))

            for phrase in lesson["phrases"]:
                if phrase not in phrases_so_far:
                    phrases_so_far.append(phrase)
                    cycle_phrases.append(phrase)
                    all_items.append(LearningItem(phrase, lesson_number, 'phrase'))

            lesson["type"] = "normal"

            # Collect the list of items to review for the current lesson based on the spaced repetition algorithm
            review_items = [item for item in all_items
                            if item.next_review_day and item.next_review_day <= lesson_number]
            new_items = [item for item in all_items
                            if item.start_lesson == lesson_number and item.exposure_count == 0]

            selected_items = (sorted(review_items, key=lambda x: x.next_review_day) +
                                new_items)

            for item in selected_items:
                item.update_after_exposure(lesson_number)

            # Include all items to review in the current lesson structure
            for item in review_items:
                if item.item_type == 'concept':
                    if "conceptsToReview" in lesson:
                        lesson["conceptsToReview"].append(item.content)
                    else:
                        lesson["conceptsToReview"] = [item.content]
                elif item.item_type == 'word':
                    if "wordsToReview" in lesson:
                        lesson["wordsToReview"].append(item.content)
                    else:
                        lesson["wordsToReview"] = [item.content]
                elif item.item_type == 'phrase':
                    if "phrasesToReview" in lesson:
                        lesson["phrasesToReview"].append(item.content)
                    else:
                        lesson["phrasesToReview"] = [item.content]

            lesson_number += 1

        # Add the short and long lessons to the current cycle based on the newly added words, concepts, and phrases
        obj["lessons"].append({"type": "review", "concepts": cycle_concepts, "words": cycle_words, "phrases": cycle_phrases})
        obj["lessons"].append({"type": "story", "concepts": cycle_concepts, "words": cycle_words, "phrases": cycle_phrases})

        distribution.append(obj)
        cycle_number += 1
        
    saveJSON(distribution, file_name)

print(json.dumps(distribution[0], indent=2, ensure_ascii=False))

  6%|▋         | 2/32 [00:25<06:28, 12.95s/it]

{
  "topic": "ناساندنی خۆت",
  "lessons": [
    {
      "concepts": [
        "Basic greetings and introductions",
        "Personal Pronouns: I, you",
        "Present Simple Tense with 'to be' (am, are)",
        "Basic Questions: What is your name?"
      ],
      "words": [
        "hello",
        "hi",
        "name",
        "my",
        "your",
        "I",
        "am",
        "you",
        "are",
        "what",
        "is",
        "nice",
        "to",
        "meet"
      ],
      "phrases": [
        "Hello.",
        "Hi.",
        "My name is...",
        "What is your name?",
        "I am...",
        "You are...",
        "Nice to meet you."
      ],
      "type": "normal"
    },
    {
      "concepts": [
        "Occupations and identities",
        "Present Simple Tense with 'to be' (continued)",
        "Basic Questions: Yes/No questions with 'to be'",
        "Affirmative and negative responses"
      ],
      "words": [
        "tourist",
        "a",
      




# Generate the Lesson Scripts

In [17]:
system_prompt = ""
with open("Prompts/lesson_script_system.txt", 'r', encoding='utf-8') as file:
    system_prompt = file.read()
    
replacements = {
    "[target_language]": target_language,
    "[source_language]": source_language,
    "[CEFR_level]": CEFR_level,
    "[topic_of_interest]": topic_of_interest,
}

system_prompt = replace_tags(system_prompt, replacements)

In [18]:
base_prompt, response_format = load_prompt_and_format(
    "Prompts/lesson_script_base_prompt.txt",
    "Prompts/lesson_script_format.json"
)

lesson_types = ["normal", "review", "story"]
lesson_structures = {}
lesson_examples = {}
for lesson_type in lesson_types:
    with open(f"Prompts/lesson_script_{lesson_type}_structure.txt", 'r', encoding='utf-8') as file:
        lesson_structures[lesson_type] = file.read()
    with open(f"Prompts/lesson_script_{lesson_type}_example.txt", 'r', encoding='utf-8') as file:
        lesson_examples[lesson_type] = file.read()

intro_text = ""
with open("Prompts/intro.txt", 'r', encoding='utf-8') as file:
    intro_text = file.read()

In [19]:
import re
from docx import Document
from docx.enum.text import WD_ALIGN_PARAGRAPH
from docx.enum.text import WD_PARAGRAPH_ALIGNMENT
from docx.shared import Pt
from docx.oxml import OxmlElement
from docx.oxml.ns import qn

def contains_rtl(text):
    # Check if the text contains any RTL characters
    rtl_pattern = re.compile(r'[\u0600-\u06FF\u0750-\u077F\u08A0-\u08FF\uFB50-\uFDFF\uFE70-\uFEFF]')
    return bool(rtl_pattern.search(text))

def create_word_document(script, path_to_output):
    doc = Document()

    # Set the default font
    style = doc.styles['Normal']
    style.font.name = 'Arial'
    style.font.size = Pt(11)
    

    lines = script.split('\n')
    previous_line_rtl = False
    source_is_rtl = contains_rtl(source_language)
    target_is_rtl = contains_rtl(target_language)

    for line in lines:
        # Remove leading/trailing whitespace
        line = line.strip()

        if line:
            # Check if the line starts with a speaker tag
            speaker_match = re.match(r'^(\w+\))\s*(.*)', line)

            if speaker_match:
                speaker, content = speaker_match.groups()
                is_rtl = ("Narrator" in speaker and source_is_rtl) or ("Narrator" not in speaker and target_is_rtl)
                
                # Convert the parenthesis after the speaker to the correct form for RTL lines
                content = ' ' + content
                if is_rtl:
                    speaker = speaker[:-1]
                    content = ') ' + content

                p = doc.add_paragraph(style='Normal')
                p.add_run(speaker).bold = True

                if content:
                    r = p.add_run(content)
                    font = r.font
                    font.complex_script = True
                    font.rtl = is_rtl


                p_xml = p._element
                bidi = OxmlElement('w:bidi')
                bidi.set(qn('w:val'), '1' if is_rtl else '0')
                p_xml.get_or_add_pPr().append(bidi)

                previous_line_rtl = is_rtl
            elif line.strip() == '[pause]':
                p = doc.add_paragraph('[pause]', style='Normal')

                p.alignment = WD_ALIGN_PARAGRAPH.RIGHT if previous_line_rtl else WD_ALIGN_PARAGRAPH.LEFT
            else:
                is_rtl = contains_rtl(line) and source_is_rtl

                p = doc.add_paragraph(style='Normal')
                r = p.add_run(line)
                font = r.font
                font.complex_script = is_rtl
                font.rtl = is_rtl

                p_xml = p._element
                bidi = OxmlElement('w:bidi')
                bidi.set(qn('w:val'), '1' if is_rtl else '0')
                p_xml.get_or_add_pPr().append(bidi)

                previous_line_rtl = is_rtl
    
    # Add page numbers to the footer
    section = doc.sections[0]
    footer = section.footer
    footer_paragraph = footer.paragraphs[0]
    footer_paragraph.alignment = WD_PARAGRAPH_ALIGNMENT.LEFT

    # Add page number field
    page_number_run = footer_paragraph.add_run()
    fldChar = OxmlElement('w:fldChar')
    fldChar.set(qn('w:fldCharType'), 'begin')
    page_number_run._element.append(fldChar)

    instrText = OxmlElement('w:instrText')
    instrText.text = "PAGE"
    page_number_run._element.append(instrText)

    fldChar = OxmlElement('w:fldChar')
    fldChar.set(qn('w:fldCharType'), 'end')
    page_number_run._element.append(fldChar)

    # Save the document
    doc.save(path_to_output)

In [20]:
topic_number = 1
items_so_far = {"words":[],"phrases":[],"concepts":[]}
for topic in distribution:
  if topic_number > number_of_topics_to_generate:
    break

  topic_name = topic["topic"]
  print(f"Topic #{topic_number}:  {topic_name}")
  topic_folder = f'Full Script/Topic #{topic_number} - {topic_name}/'
  os.makedirs(os.path.dirname(output_folder+topic_folder), exist_ok=True)

  lesson_number = 1
  for lesson in topic["lessons"]:
    # Uncomment this to skip certain lessons or topics if you want to rerun the program
    # if topic_number != 1 or lesson_number != 5:
    #     lesson_number += 1
    #     continue
    print(f"--- Lesson #{lesson_number} ---")
    
    # Get the collection of review items and add them in the prompt
    review_phrases_text = "<There are no phrases to review>"
    review_concepts_text = "<There are no concepts to review>"
    if "phrasesToReview" in lesson:
      review_phrases_text = "- "+"\n- ".join(rp for rp in lesson["phrasesToReview"])
    if "conceptsToReview" in lesson:
      review_concepts_text = "- "+"\n- ".join(rc for rc in lesson["conceptsToReview"])
    
    # Get the collection of previously learned items and add them in the prompt
    old_words_text = "<There are no previously learned words>"
    old_phrases_text = "<There are no previously learned phrases>"
    old_concepts_text = "<There are no previously learned concepts>"
    if len(items_so_far["words"]) > 0:
      old_words_text = "- "+"\n- ".join(ow for ow in items_so_far["words"])
    if len(items_so_far["phrases"]) > 0:
      old_phrases_text = "- "+"\n- ".join(op for op in items_so_far["phrases"])
    if len(items_so_far["concepts"]) > 0:
      old_concepts_text = "- "+"\n- ".join(oc for oc in items_so_far["concepts"])

    # Get the base prompt, add the specific lesson structure, and replace all tags
    prompt = base_prompt
    
    replacements = {
        "[lesson_structure]": lesson_structures[lesson["type"]],
        "[example]": lesson_examples[lesson["type"]],
        "[source_language]": source_language,
        "[target_language]": target_language,
        "[total_lessons_for_topic]": str(len(topic["lessons"])),
        "[topic_number]": str(topic_number),
        "[topic_name]": topic_name,
        "[lesson_number]": str(lesson_number),
        "[new_words]": "- "+"\n- ".join(w for w in lesson["words"]),
        "[new_phrases]": "- "+"\n- ".join(p for p in lesson["phrases"]),
        "[new_concepts]": "- "+"\n- ".join(c for c in lesson["concepts"]),
        "[next_lesson]": "" if lesson_number == 5 else json.dumps(topic['lessons'][lesson_number], indent=2, ensure_ascii=False),
        "[next_topic]": distribution[topic_number]["topic"] if topic_number < len(distribution) else "<There are no more topics>",
        "[review_phrases]": review_phrases_text,
        "[review_concepts]": review_concepts_text,
        "[old_words]": old_words_text,
        "[old_phrases]": old_phrases_text,
        "[old_concepts]": old_concepts_text,
      
    }
    
    prompt = replace_tags(prompt, replacements)

    if "العامية" in source_language or "العامية" in target_language:
      prompt += f'\nSince the one of the languages you need to consider uses colloquial Arabic, you must speak using colloquial Arabic in the dialect specified. You can not use standard Arabic. Even if you don\'t know the dialect fully, try your absolute best to speak in the colloquial Arabic of the specified dialect.'

    if lesson_number == 1:
      prompt += f'''
      - Preface this lesson with the narrator saying the following intro:
      <intro>
      {intro_text}
      </intro>
      
      Note that this intro must be translated into {source_language} without changing the meaning or altering the structure. Keep in mind that "Agora Vision" must be kept as is, not translated or transliterated. Transition into the lesson script seemlessly.
      '''

    script_structure = continuously_call_LLM(prompt, system_prompt, response_format, models['claude 3.7'])
    script = '\n'.join(section['text'] for section in script_structure)

    # Save the lesson script in its own text file
    path_to_output = output_folder + topic_folder + f"Lesson {lesson_number}.docx"
    create_word_document(script, path_to_output)

    # Add the unique new items learned in this lesson to the total items encountered so far
    for word in lesson["words"]:
      if word not in items_so_far["words"]:
        items_so_far["words"].append(word)
    for phrase in lesson["phrases"]:
      if phrase not in items_so_far["phrases"]:
        items_so_far["phrases"].append(phrase)
    for concept in lesson["concepts"]:
      if concept not in items_so_far["concepts"]:
        items_so_far["concepts"].append(concept)
    lesson_number += 1

  topic_number += 1

Topic #1:  ناساندنی خۆت
--- Lesson #1 ---
--- Lesson #2 ---
--- Lesson #3 ---
--- Lesson #4 ---
--- Lesson #5 ---
Topic #2:  وڵاتان و زمانەکان
--- Lesson #1 ---
--- Lesson #2 ---
Converting response to JSON failed. Unterminated string starting at: line 215 column 13 (char 20964). Trying again...
--- Lesson #3 ---
--- Lesson #4 ---
--- Lesson #5 ---
Converting response to JSON failed. Invalid control character at: line 29 column 156 (char 4489). Trying again...
Converting response to JSON failed. Invalid control character at: line 29 column 154 (char 4409). Trying again...
