In [5]:
# ! pip install langchain-openai
! pip install numpy

Collecting numpy
  Downloading numpy-2.2.5-cp310-cp310-macosx_10_9_x86_64.whl (21.3 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m21.3/21.3 MB[0m [31m18.7 MB/s[0m eta [36m0:00:00[0m00:01[0m00:01[0m
[?25hInstalling collected packages: numpy
Successfully installed numpy-2.2.5


In [None]:
from langchain_openai.embeddings import OpenAIEmbeddings

embeddings = OpenAIEmbeddings(model="text-embedding-3-small")
res = embeddings.embed_query("hello world")

In [6]:
import numpy as np

In [8]:
instructional_creational_queries = [
    "paragraph about the independence of Lesotho",
    "bolded statement on the theory of relativity",
    "concluding sentence for a paper on climate change",
    "title for an article about AI in education",
    "introductory paragraph for a blog on personal finance",
    "bullet points summarizing the causes of World War I",
    "short poem about the ocean",
    "metaphor describing time management",
    "persuasive sentence on why recycling is important",
    "definition of blockchain in simple terms",
    "one-line summary of the Great Depression",
    "email subject line for a job application",
    "quote-style statement on perseverance",
    "informal sentence explaining Newton's laws",
    "academic sentence linking two research studies",
    "hashtag-friendly phrase about remote work",
    "caption for a photo of a sunset",
    "sentence using a simile to describe anxiety",
    "tweet summarizing the importance of net neutrality",
    "exclamatory sentence celebrating a scientific discovery"
]

unfinished_latex_lines = [
    r"\begin{itemize}",
    r"\item This is an item",
    r"\begin{equation}",
    r"\frac{a}{",              # incomplete fraction
    r"\textbf{Important: ",    # missing closing brace
    r"\section{Introduction",  # missing closing brace
    r"\begin{align}",
    r"\includegraphics[width=", 
    r"\documentclass{",        # needs class name
    r"\usepackage{",           # missing package name
    r"\begin{tabular}{|c|c|",  # incomplete column formatting
    r"\title{A Study on ",     # unfinished title
    r"\author{",               # needs author name
    r"\label{fig:",            # unfinished figure label
    r"\cite{",                 # missing citation key
    r"\ref{",                  # missing reference key
    r"\begin{figure}[htbp]",   # typical figure start
    r"\caption{",              # unfinished caption
    r"\[ E = mc",              # incomplete math expression
    r"\textit{Note: "          # missing closing brace
]



In [9]:
# for all instructional queries, get their embeddings and find the average embedding
instructional_creational_embeddings = [embeddings.embed_query(query) for query in instructional_creational_queries]
instructional_creational_average_embedding = np.mean(instructional_creational_embeddings, axis=0)

# for all unfinished latex lines, get their embeddings and find the average embedding
unfinished_latex_embeddings = [embeddings.embed_query(line) for line in unfinished_latex_lines]
unfinished_latex_average_embedding = np.mean(unfinished_latex_embeddings, axis=0)


In [13]:
# save the average embeddings to a txt file
np.savetxt("instructional_creational_average_embedding.txt", instructional_creational_average_embedding)
np.savetxt("unfinished_latex_average_embedding.txt", unfinished_latex_average_embedding)

In [14]:
# open the file, add a comma to the end of each line and save the file
with open("instructional_creational_average_embedding.txt", "r") as file:
    lines = file.readlines()
    with open("instructional_creational_average_embedding.txt", "w") as file:
        for line in lines:
            file.write(line.strip() + "," + "\n")

with open("unfinished_latex_average_embedding.txt", "r") as file:
    lines = file.readlines()
    with open("unfinished_latex_average_embedding.txt", "w") as file:
        for line in lines:
            file.write(line.strip() + "," + "\n")


In [15]:
failed_ans = "Sure! Please provide the LaTeX code you would like me to complete."
failed_ans_embedding = embeddings.embed_query(failed_ans)
np.savetxt("failed_ans_embedding.txt", failed_ans_embedding)
with open("failed_ans_embedding.txt", "r") as file:
    lines = file.readlines()
    with open("failed_ans_embedding.txt", "w") as file:
        for line in lines:
            file.write(line.strip() + "," + "\n")
