In [2]:
from dotenv import load_dotenv
import os

In [3]:
load_dotenv()
GROQ_API_KEY = os.environ.get("GROQ_API_KEY")

In [4]:
from langchain.chat_models import init_chat_model

model = init_chat_model("llama3-8b-8192", model_provider="groq")

In [5]:
from langchain_core.prompts import ChatPromptTemplate

system_template = """
You are a highly accurate and concise regex generator. Your task is to convert natural language instructions into valid, efficient, and minimal regular expressions (regex). Follow these guidelines carefully:
- Regex Output Only: Respond only with the regular expression pattern unless explicitly asked to explain. Do not include extra commentary, code blocks, or markdown formatting unless requested.
- Correctness First: Ensure the regex pattern matches the described behavior exactly and completely. Avoid overgeneralization or underfitting.
- Conciseness and Efficiency: Use the simplest regex possible to achieve the described goal. Avoid unnecessary capture groups or verbose constructs.
- Common Standards:
    - Assume the flavor is ECMAScript (JavaScript) unless specified otherwise.
    - Do not anchor with ^ or $ unless clearly required.
    - Use non-capturing groups (?:...) when groups are needed but not referenced.
- Character Classes & Quantifiers: Use shorthand character classes (\\d, \\w, \\s, etc.) when appropriate. Use greedy or lazy quantifiers as required by the instruction.
- Escaping: Properly escape special characters to avoid unintended behavior.
- Edge Cases: Handle edge cases if mentioned (e.g., case sensitivity, optional components, boundary conditions).
"""

prompt_template = ChatPromptTemplate.from_messages(
    [("system", system_template), ("user", "{query}")]
)

In [6]:
prompt = prompt_template.invoke({"query": "Match international phone numbers"})
prompt

ChatPromptValue(messages=[SystemMessage(content='\nYou are a highly accurate and concise regex generator. Your task is to convert natural language instructions into valid, efficient, and minimal regular expressions (regex). Follow these guidelines carefully:\n- Regex Output Only: Respond only with the regular expression pattern unless explicitly asked to explain. Do not include extra commentary, code blocks, or markdown formatting unless requested.\n- Correctness First: Ensure the regex pattern matches the described behavior exactly and completely. Avoid overgeneralization or underfitting.\n- Conciseness and Efficiency: Use the simplest regex possible to achieve the described goal. Avoid unnecessary capture groups or verbose constructs.\n- Common Standards:\n    - Assume the flavor is ECMAScript (JavaScript) unless specified otherwise.\n    - Do not anchor with ^ or $ unless clearly required.\n    - Use non-capturing groups (?:...) when groups are needed but not referenced.\n- Characte

In [7]:
response = model.invoke(prompt)
print(response.content)

`(?:\+?|-?)?\(?(\d{1,3})\)?[-\s\.]?(\d{3})[-\s\.]?(\d{4,5})`


In [8]:
system_template = """
You are a precise and knowledgeable regular expression (regex) explainer. Your role is to convert regex patterns into clear, concise, and accurate natural language explanations. Follow these instructions:
- Explain What the Regex Does: Describe the overall function of the pattern in plain language.
- Break Down Components: Explain each part or section of the regex, especially if the pattern is complex.
- Use Natural Language: Avoid technical jargon unless necessary. Favor readable descriptions over formal syntax.
- Be Accurate and Thorough: Ensure every element of the pattern is accounted for and explained correctly.
- Be Concise: Keep explanations clear and compact, especially for simple patterns.
- No Execution: Do not test or validate regex against data.
- Regex Flavor: Assume ECMAScript (JavaScript) flavor unless specified otherwise.
- Avoid Over-Explanation: Don't repeat similar parts unnecessarily (e.g., identical groups or ranges).

Output format:
- For basic use, return a paragraph explanation.
- If the pattern is complex, use bullet points or step-by-step breakdowns.
"""

prompt_template = ChatPromptTemplate.from_messages(
    [("system", system_template), ("user", "{query}")]
)

In [9]:
prompt = prompt_template.invoke({"query": r"\d{1,3}[-.]?\d{1,3}[-.]?\d{1,4}"})
prompt

ChatPromptValue(messages=[SystemMessage(content="\nYou are a precise and knowledgeable regular expression (regex) explainer. Your role is to convert regex patterns into clear, concise, and accurate natural language explanations. Follow these instructions:\n- Explain What the Regex Does: Describe the overall function of the pattern in plain language.\n- Break Down Components: Explain each part or section of the regex, especially if the pattern is complex.\n- Use Natural Language: Avoid technical jargon unless necessary. Favor readable descriptions over formal syntax.\n- Be Accurate and Thorough: Ensure every element of the pattern is accounted for and explained correctly.\n- Be Concise: Keep explanations clear and compact, especially for simple patterns.\n- No Execution: Do not test or validate regex against data.\n- Regex Flavor: Assume ECMAScript (JavaScript) flavor unless specified otherwise.\n- Avoid Over-Explanation: Don't repeat similar parts unnecessarily (e.g., identical groups 

In [10]:
response = model.invoke(prompt)
print(response.content)

This regex pattern matches IP addresses. Here's a breakdown of what it does:

* `\d{1,3}`: This part matches a digit between 1 and 3 times, which represents a number in an IP address (octet).
* `[-.]?`: This is a non-capturing group that matches an optional character, either a hyphen (-) or a period (.). This allows for either a dotted decimal notation (e.g., 192.168.1.1) or a dashed notation (e.g., 192-168-1-1).
* The pattern is repeated three times, allowing for the matching of three octets in an IP address.

In summary, this regex pattern matches IP addresses in either dotted decimal or dashed notation, with each octet being a number between 0 and 255.


In [20]:
system_template = """
You are a clear and simple error explainer. Given a regex and an input text, your job is to:
> Identify and describe in plain language the reasons the input text does not match the regex pattern.

Guidelines:
- Focus only on describing what is wrong or missing in the input text compared to what is expected.
- Avoid explaining the regex syntax or details.
- Use everyday language, suitable for someone not familiar with regex.
- Highlight the first or most important mismatch.
- Keep responses short, clear, and actionable.
- Do not suggest fixes or corrections unless asked.

Input Format:
You will be given two inputs:
- regex: The regular expression pattern.
- text: The input string to test against the regex.

Output Format:
- Provide a bullet-point list of mismatch explanations
"""

prompt_template = ChatPromptTemplate.from_messages(
    [("system", system_template), ("user", "Regex: {regex}\nText: {text}")]
)

In [23]:
prompt = prompt_template.invoke({"regex": r"\d{1,3}[-.]?\d{1,3}[-.]?\d{1,4}", "text": "12-45-6789"})
prompt

ChatPromptValue(messages=[SystemMessage(content='\nYou are a clear and simple error explainer. Given a regex and an input text, your job is to:\n> Identify and describe in plain language the reasons the input text does not match the regex pattern.\n\nGuidelines:\n- Focus only on describing what is wrong or missing in the input text compared to what is expected.\n- Avoid explaining the regex syntax or details.\n- Use everyday language, suitable for someone not familiar with regex.\n- Highlight the first or most important mismatch.\n- Keep responses short, clear, and actionable.\n- Do not suggest fixes or corrections unless asked.\n\nInput Format:\nYou will be given two inputs:\n- regex: The regular expression pattern.\n- text: The input string to test against the regex.\n\nOutput Format:\n- Provide a bullet-point list of mismatch explanations\n', additional_kwargs={}, response_metadata={}), HumanMessage(content='Regex: \\d{1,3}[-.]?\\d{1,3}[-.]?\\d{1,4}\nText: 12-45-6789', additional_kw

In [24]:
response = model.invoke(prompt)
print(response.content)

Here's what's wrong with the input text:

• The input text is missing a digit in the first group (it should be 1-3 digits, but it's only 2).


In [37]:
system_template = """
You are a skilled regex builder. Given a set of example texts, your task is to:
> Generate a regular expression pattern that matches all the given texts and follows their common pattern.

Guidelines:
- Each line contains one example text.
- Analyze the provided texts carefully to find the shared structure or pattern.
- Create a regex that matches all the examples, and is as general as possible without matching unrelated strings.
- Use common regex constructs such as character classes, quantifiers, groups, and anchors as needed.
- Prefer concise and readable regex patterns.
- Assume ECMAScript (JavaScript) flavor unless otherwise specified.
- Do not include explanations or additional text—output only the regex pattern unless otherwise requested.
- If the examples contain variations, reflect those accurately in the regex (e.g., optional parts, alternatives).
- Properly escape special characters if they appear literally in the examples.


"""

prompt_template = ChatPromptTemplate.from_messages(
    [("system", system_template), ("user", "{query}")]
)

In [38]:
prompt = prompt_template.invoke({"query": """
dog123  
dog456  
dog789
""".strip()})
prompt

ChatPromptValue(messages=[SystemMessage(content='\nYou are a skilled regex builder. Given a set of example texts, your task is to:\n> Generate a regular expression pattern that matches all the given texts and follows their common pattern.\n\nGuidelines:\n- Each line contains one example text.\n- Analyze the provided texts carefully to find the shared structure or pattern.\n- Create a regex that matches all the examples, and is as general as possible without matching unrelated strings.\n- Use common regex constructs such as character classes, quantifiers, groups, and anchors as needed.\n- Prefer concise and readable regex patterns.\n- Assume ECMAScript (JavaScript) flavor unless otherwise specified.\n- Do not include explanations or additional text—output only the regex pattern unless otherwise requested.\n- If the examples contain variations, reflect those accurately in the regex (e.g., optional parts, alternatives).\n- Properly escape special characters if they appear literally in the

In [39]:
"""
hello_world
hello_world!
hello_world123
""".strip()

'hello_world\nhello_world!\nhello_world123'

In [40]:
response = model.invoke(prompt)
print(response.content)

\d{3}dog
