In [None]:
from langchain_openai import OpenAI, ChatOpenAI
from langchain.prompts import PromptTemplate, ChatPromptTemplate, MessagesPlaceholder
from langchain_core.messages import HumanMessage

from langchain.chains import LLMChain, SimpleSequentialChain
import os

import json

# Prompt Template

In [None]:
str_template = """
               You need to return ONLY a valid LaTeX formula for the math formula the user describes or asks for, with no additional text,
               no wrapping in $$ or any other special characters, and no escaping of backslashes.  

                Here is the user's description of a formula: {query}  
               """
#Making Prompt Template from one string with variable placeholder {query}
give_latex_prompt =  PromptTemplate(template=str_template, 
                                    input_variables=['query'])
specific_prompt = give_latex_prompt.invoke({'query' : 'Varaince of sample'})
print(specific_prompt)

text="\n               You need to return ONLY a valid LaTeX formula for the math formula the user describes or asks for, with no additional text,\n               no wrapping in $$ or any other special characters, and no escaping of backslashes.  \n\n                Here is the user's description of a formula: Varaince of sample mean  \n               "


In [3]:
system_message = """You are an assistant for a scientist who writes articles.
                    He will ask you for mathematical formulas and expressions or describe them,
                    You **must always** return user **only** valid LaTex formula fiting his description wrapped in $$ that and **nothing else**:
                  """

messages = [('system', system_message),
            MessagesPlaceholder("msgs")]
#making Prompt Template from messages sequence with one placeholder
give_latex_prompt_chat = ChatPromptTemplate(messages)

specific_prompt = give_latex_prompt_chat.invoke({"msgs": [HumanMessage(content='Varaince of sample mean')]})

print(specific_prompt)

messages=[SystemMessage(content='You are an assistant for a scientist who writes articles.\n                    He will ask you for mathematical formulas and expressions or describe them,\n                    You **must always** return user **only** valid LaTex formula fiting his description wrapped in $$ that and **nothing else**:\n                  ', additional_kwargs={}, response_metadata={}), HumanMessage(content='Varaince of sample mean', additional_kwargs={}, response_metadata={})]


In [4]:
#Let's see how it works in combination with LLM instance
token_path = '../data/new_openai_token.txt'
with open(token_path, 'rt') as f:
    openai_token = f.read()

if "OPENAI_API_KEY" not in os.environ:
    os.environ["OPENAI_API_KEY"] = openai_token
#token is passed via Enviroment variable defined above
llm = ChatOpenAI(model="gpt-4-turbo")

In [5]:
#Most simplistic way to define langChain Chain-like object
chain = give_latex_prompt | llm

result = chain.invoke({'query' : 'Varaince of sample mean'})
print(result.content)

\sigma_{\bar{x}}^2 = \frac{\sigma^2}{n}


In [None]:
chat_chain = give_latex_prompt_chat | llm
chat_result = chat_chain.invoke({"msgs": [HumanMessage(content='Varaince of sample mean')]})
print(chat_result.content)

# Chain

In [6]:
#Alternatively we can define Chain like that
llm_chain = LLMChain(llm=llm,  # Ensure 'llm' is properly initialized
                     prompt=give_latex_prompt)

result = llm_chain.invoke({'query' : 'Covariance of two samples of random variables X,Y'})
print(result['text'])

  llm_chain = LLMChain(llm=llm,  # Ensure 'llm' is properly initialized


\operatorname{Cov}(X,Y) = \mathbb{E}[(X - \mathbb{E}[X])(Y - \mathbb{E}[Y])]


In [8]:
#There are great many types of Chains in LangChain
#For Example Q/A Chain that can retrieve info from documents
from langchain.chains import RetrievalQA
from langchain.vectorstores import FAISS
from langchain.embeddings import OpenAIEmbeddings
from langchain.document_loaders import TextLoader

In [9]:
text_content = """
               formula 1: Varaince of sample mean,
               formula 2: Covariance of two samples of random variables X,Y
               formula 3: Poisson distribution
               formula 4: Normal Distribution
               """
filepath = '../data/formulas.txt'
with open(filepath, 'wt') as f:
    f.write(text_content)

In [10]:
# Load documents into a retriever
loader = TextLoader(filepath)
docs = loader.load()
retriever = FAISS.from_documents(docs, OpenAIEmbeddings()).as_retriever()

  retriever = FAISS.from_documents(docs, OpenAIEmbeddings()).as_retriever()


In [11]:
# Define the RetrievalQA chain
qa_chain = RetrievalQA.from_chain_type(llm=llm, retriever=retriever)

# Ask a question
response = qa_chain.run("Give me formula №3 from my list")
print(response)

  response = qa_chain.run("Give me formula №3 from my list")


Formula 3 from your list is the Poisson distribution. The probability mass function of a Poisson-distributed random variable is given by:

\[ P(X = k) = \frac{\lambda^k e^{-\lambda}}{k!} \]

where:
- \(k\) is the number of occurrences (a non-negative integer),
- \(e\) is the base of the natural logarithm,
- \(\lambda\) is the average rate (or expected number) of occurrences in the given interval, and
- \(k!\) is the factorial of \(k\).


In [12]:
#Then we can put them into one Sequential Chain
seq_chain = SimpleSequentialChain(chains=[qa_chain, llm_chain])
poisson_latex_formula = seq_chain.run("Give me formula №3 from my list")

In [26]:
print(poisson_latex_formula)
#Предположим, что модель не справляется, с удалением двойных \\ или все равно ставит $$ $$ вокруг формулы.

P(X = k) = \frac{\lambda^k e^{-\lambda}}{k!}


In [17]:
#There is many ways to fix it, for example like this
from langchain_core.output_parsers import BaseOutputParser
import re

class LaTeXOutputParser(BaseOutputParser):
    """Custom parser to extract only the LaTeX formula without extra text or escaping issues."""

    def parse(self, text: str) -> str:
        # Extract formula inside $$...$$ if present
        match = re.search(r"\$\$(.*?)\$\$", text, re.DOTALL)
        formula = match.group(1) if match else text  # If no match, use full output
        
        # Fix double backslashes (unescape LaTeX)
        formula = formula.replace("\\\\", "\\")
        
        return formula.strip()  # Remove any leading/trailing spaces
#But also we could define Extra LLM chain to fix output of previous ones :) 
#MOAR LLM API REQUESTS!!!!

# Create LLMChain with the custom parser
llm_chain_w_parser = LLMChain(
    llm=llm,
    prompt=give_latex_prompt,
    output_parser=LaTeXOutputParser()  # Attach custom parser
)

seq_chain = SimpleSequentialChain(chains=[qa_chain, llm_chain_w_parser])

In [18]:
poisson_latex_formula = seq_chain.run("Give me formula №3 from my list")

In [25]:
print(poisson_latex_formula)

P(X = k) = \frac{\lambda^k e^{-\lambda}}{k!}


## Мы познакомились с простейшими иннструментами LangChain
Далее можно расширять наш кругозор в разные стороны. Например:
1. Интегрировать уже имеющиеся у нас простые инструменты с еще чем-нибудь и сделать это более похожим на настоящий сервис
2. Изучить более продвинутые инструменты, вроде LangGraph

# Интегрируем, то, что у нас есть с Playwright и Telebot

In [29]:
from playwright.sync_api import sync_playwright
import requests

In [30]:
#Use Playwright (synchronously) to generate image from LaTeX
def generate_latex_image(latex_code):
    with sync_playwright() as p:
        browser = p.chromium.launch(headless=True)
        page = browser.new_page()

        page.goto("https://latex2image.joeraut.com/")
        page.fill('textarea[name="latex_code"]', latex_code)
        page.click('button:has-text("Generate Image")')
        page.wait_for_selector('img.generated-image')

        img_url = page.get_attribute('img.generated-image', 'src')
        if img_url:
            response = requests.get(img_url)
            with open("../data/latex_image.png", "wb") as f:
                f.write(response.content)
            print("Image saved as latex_image.png")
        else:
            print("Error: Could not find generated image.")

        browser.close()

In [31]:
llm_chain = LLMChain(llm=llm,
                     prompt=give_latex_prompt)
result = llm_chain.invoke({'query' : 'Covariance of two samples of random variables X,Y'})
latex_formula = result['text']
print(latex_formula)

\operatorname{Cov}(X, Y) = E[(X - E[X])(Y - E[Y])]


In [32]:
generate_latex_image(latex_formula)

Error: It looks like you are using Playwright Sync API inside the asyncio loop.
Please use the Async API instead.

In [33]:
#!pip3 install telebot

Collecting telebot
  Downloading telebot-0.0.5-py3-none-any.whl (4.8 kB)
Collecting pyTelegramBotAPI
  Downloading pytelegrambotapi-4.26.0-py3-none-any.whl (270 kB)
     -------------------------------------- 270.5/270.5 KB 1.0 MB/s eta 0:00:00
Installing collected packages: pyTelegramBotAPI, telebot
Successfully installed pyTelegramBotAPI-4.26.0 telebot-0.0.5


You should consider upgrading via the 'C:\Users\arsen\Documents\Programming\LangChainLearning\env\Scripts\python.exe -m pip install --upgrade pip' command.


In [39]:
from telebot import types
from telebot.handler_backends import State, StatesGroup
from telebot.storage import StateMemoryStorage
from telebot.types import Message

from dataclasses import dataclass, field

with open('../data/tg_bot_token.txt', 'rt') as f:
    token = f.read()


In [None]:
class TelegramInterface:
    def __init__(self, tg_bot, chain):
        self.bot = tg_bot
        self.chain = chain

        # Register message handler
        #for start
        @self.bot.message_handler(commands=['start'])
        def send_welcome(message: Message):
            self.bot.reply_to(message, "Welcome! Send me formula description and I will generate an image for you.")
        #for ANY text message
        @self.bot.message_handler(func=lambda message: True)
        def handle_latex_message(message: Message):
            #Get latex formula from LLMChain
            latex_formula = chain.invoke({'query' : message.text.strip()})
            self.bot.send_message(message.chat.id, "Processing your formula... Please wait ⏳")
            #Generate Image via WebService
            image_path = generate_latex_image(latex_code)
            
            if image_path:
                with open(image_path, "rb") as img:
                    self.bot.send_photo(message.chat.id, img)
                os.remove(image_path)  # Cleanup after sending
            else:
                self.bot.send_message(message.chat.id, "Error generating image. Please check your LaTeX input.")

    def run(self):
        """Start the bot."""
        print("Bot is running...")
        self.bot.polling(none_stop=True)