# Development Enviroment Testing

- This is the code that will be packaged into the lambda function

In [39]:
from langchain.document_loaders.csv_loader import CSVLoader
from langchain.vectorstores import FAISS
from langchain.embeddings.openai import OpenAIEmbeddings
from langchain.prompts import PromptTemplate
from langchain.chat_models import ChatOpenAI
from langchain.chains import LLMChain
from dotenv import load_dotenv

import pandas as pd
import os

load_dotenv()

True

In [40]:
# 0. convert dataset
# df = pd.read_parquet('ml-arxiv-papers/data/train-00000-of-00001-52427cf3bce60f12.parquet')
# df.head(1000).to_csv('ml-arxiv-papers/csv_data/train.csv', index=False)
# df.head(50000).to_csv('ml-arxiv-papers/csv_data/train2.csv', index=False)

In [41]:
# 1. vectorize paper data
# os.getenv("OPEN_AI_KEY")

# loader = CSVLoader(file_path='ml-arxiv-papers/csv_data/train2.csv')
# documents = loader.load()

# embeddings = OpenAIEmbeddings()
# db = FAISS.from_documents(documents, embeddings)
# db.save_local("faiss_index")

In [42]:
embeddings = OpenAIEmbeddings()
db = FAISS.load_local("faiss_index", embeddings)

In [43]:
# 2. function for similiarity search 

def retrieve_info(query):
    similiar_response = db.similarity_search(query, k=3)
    page_contents_array = [doc.page_content for doc in similiar_response]
    return page_contents_array

In [45]:
# 3. Setup LLm Chain and prompt 

llm = ChatOpenAI(temperature=0, model="gpt-4")

template = """ 
You are a world class research paper writer.
I will share a title and context for a research paper with you and you will \
give the abstract that goes along with that paper based on past abstracts, \
and you will follow all the rules below:

1. The response should be very similiar or identical to best practices in terms of ton of voice, sentence structure and other concepts.

2. If the abstracts are irrelevent then try to mimic the style of the abstracts.

3. Make each response approximately 200 words long

Below is the title of the paper:
{title}

Here is some context:
{context}

Here is a list of abstracts of similiar papers:
{abstracts}

Please write the best abstract for this paper:
"""

prompt = PromptTemplate(
    input_variables = ["title","context","abstracts"],
    template=template
    )

chain = LLMChain(llm=llm, prompt=prompt)

In [46]:
# 4. Retriveal of augmented generation

message = "Robot Laser Goose"
context = "95% success rate in deterring Geese. Deep learning target recognition. 500m range"

abstracts = retrieve_info(message)
print("Top matches: ")
print(abstracts)
response = chain.run(title=message, context=context,abstracts=abstracts)

print(response)

Top matches: 
['title: Learning to Catch Piglets in Flight\nabstract: Catching objects in-flight is an outstanding challenge in robotics. In this paper, we present a closed-loop control system fusing data from two sensor modalities: an RGB-D camera and a radar. To develop and test our method, we start with an easy to identify object: a stuffed Piglet. We implement and compare two approaches to detect and track the object, and to predict the interception point. A baseline model uses colour filtering for locating the thrown object in the environment, while the interception point is predicted using a least squares regression over the physical ballistic trajectory equations. A deep learning based method uses artificial neural networks for both object detection and interception point prediction. We show that we are able to successfully catch Piglet in 80% of the cases with our deep learning approach.', 'title: Learned Visual Navigation for Under-Canopy Agricultural Robots\nabstract: We desc