<a href="https://colab.research.google.com/github/Danishkc/PersonalStaticWebsite/blob/master/Chat_Template_Formatter_Assignment.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

## **Assignment: Chat Template Formatter using Python**

In [None]:
# Installing pyparsing library
!pip install pyparsing

In [20]:
from pyparsing import (
    CaselessLiteral, Combine, Literal, Word, Optional, restOfLine, Group, Forward, OneOrMore, NotAny, LineEnd
)

In [21]:
# Defining grammar rules
user_tag_start = "{{#user}}"
user_tag_end = "{{/user}}"
assistant_tag_start = "{{#assistant}}"
assistant_tag_end = "{{/assistant}}"
gen_command = "{{gen" + Combine(Optional(restOfLine)) + "}}"

In [22]:
# Defining combined tags
user_tag = Combine(user_tag_start + restOfLine + user_tag_end)
assistant_tag = Combine(assistant_tag_start + restOfLine + assistant_tag_end)
gen_command_tag = Combine(assistant_tag_start + gen_command + assistant_tag_end)

In [23]:
# Defining a template segment that captures everything except gen commands
template_segment = Combine(OneOrMore(~(gen_command | assistant_tag_end | user_tag_end) + ~LineEnd() + restOfLine))

In [24]:
# Defining a complete chat template
chat_template = Forward()
chat_template <<= (
    Group(user_tag + template_segment + user_tag_end) |
    Group(assistant_tag + gen_command_tag + assistant_tag_end) |
    Group(assistant_tag + template_segment + assistant_tag_end) |
    Group(template_segment)
)

In [26]:
# Creating a utility function
def format_chat_template(input_text):
    # Parse the input text
    parsed = chat_template.parseString(input_text, parseAll=True)

    # Initialize formatted text
    formatted_text = []

    for item in parsed:
        if isinstance(item, list):
            # If it's a list, join its elements into a string
            formatted_text.append("".join(map(str, item)))  # Convert each item to string before joining
        else:
            # Otherwise, add the item as it is
            formatted_text.append(str(item))  # Convert item to string

    # If the last assistant tag doesn't end with gen command, add one
    if formatted_text and "{{gen" not in formatted_text[-1]:
        formatted_text.append(assistant_tag_start + "{{gen 'write' }}" + assistant_tag_end)

    # Join the segments and return
    return " ".join(formatted_text)

In [27]:
# Assigning Test inputs and expected outputs
test_input_1 = "how are things going, tell me about Delhi"
expected_output_1 = "{{#user}}how are things going, tell me about Delhi{{/user}} {{#assistant}}{{gen 'write' }}{{/assistant}}"

test_input_2 = "Tweak this proverb to apply to model instructions instead. Where there is no guidance{{gen 'rewrite'}}"
expected_output_2 = "{{#user}}Tweak this proverb to apply to model instructions instead.{{/user}} Where there is no guidance {{#assistant}}{{gen 'rewrite'}}{{/assistant}}"

test_input_3 = "Hello! {{gen 'greet'}} How can I assist you today?"
expected_output_3 = "{{#user}}Hello!{{/user}} {{#assistant}}{{gen 'greet' }}{{/assistant}} How can I assist you today? {{#assistant}}{{gen 'write' }}{{/assistant}}"

test_input_4 = "Please provide details{{gen 'details'}} Thanks!"
expected_output_4 = "{{#user}}Please provide details{{/user}} {{#assistant}}{{gen 'details' }}{{/assistant}} Thanks! {{#assistant}}{{gen 'write' }}{{/assistant}}"

test_input_5 = "Hi there! Can you {{gen 'explain'}} this concept?"
expected_output_5 = "{{#user}}Hi there! Can you{{/user}} {{#assistant}}{{gen 'explain' }}{{/assistant}} this concept? {{#assistant}}{{gen 'write' }}{{/assistant}}"

test_input_6 = "I'm curious about {{gen 'define' 'pyparsing'}} library."
expected_output_6 = "{{#user}}I'm curious about{{/user}} {{#assistant}}{{gen 'define' 'pyparsing' }}{{/assistant}} library. {{#assistant}}{{gen 'write' }}{{/assistant}}"

test_input_7 = "{{gen 'greet'}} How can I assist you today?"
expected_output_7 = "{{#user}}{{/user}} {{#assistant}}{{gen 'greet' }}{{/assistant}} How can I assist you today? {{#assistant}}{{gen 'write' }}{{/assistant}}"

test_input_8 = "Let's talk about {{gen 'topic' 'AI'}} and its applications."
expected_output_8 = "{{#user}}Let's talk about{{/user}} {{#assistant}}{{gen 'topic' 'AI' }}{{/assistant}} and its applications. {{#assistant}}{{gen 'write' }}{{/assistant}}"

In [28]:
# Test cases
formatted_output_1 = format_chat_template(test_input_1)
formatted_output_2 = format_chat_template(test_input_2)
formatted_output_3 = format_chat_template(test_input_3)
formatted_output_4 = format_chat_template(test_input_4)
formatted_output_5 = format_chat_template(test_input_5)
formatted_output_6 = format_chat_template(test_input_6)
formatted_output_7 = format_chat_template(test_input_7)
formatted_output_8 = format_chat_template(test_input_8)

In [None]:
# Printing test cases
print(formatted_output_1)
print(formatted_output_2)
print(formatted_output_3)
print(formatted_output_4)
print(formatted_output_5)
print(formatted_output_6)
print(formatted_output_7)
print(formatted_output_8)