In [4]:
MODEL='gpt-4o-2024-08-06'

In [57]:
SYS_MSG_KEYWORD_SEARCH = """
You are a search engine that translates search topics into a list of relevant keywords for effective search.
take in a research topic and you output.
"""

SYS_MSG_SEARCH_QUERIES = """
You are a search engine that takes in a research topic and you output a list of relevant
that perfectly encapsulate that topic.
"""

SYS_MSG_RELEVANCY_JUDGE = """
You are a research expert and an evalation engine for research results given a research topic of interest.
Given a research topic you output a binary score yes|no to determine if a paper summary is strictly relevant to the topic
or not.
"""

In [83]:
from openai import OpenAI
from pydantic import BaseModel, Field
from typing import List, Literal

client = OpenAI()

class Keywords(BaseModel):
    keywords: List[str] = Field(..., description="List of relevant keywords to search for")


class SearchQueries(BaseModel):
    queries: List[str] = Field(..., description="List of queries to search for")


def generate_keywords_for_search(research_topic):
    """
    
    """
    response = client.beta.chat.completions.parse(
        model=MODEL,
        messages=[{"role": "system", "content": SYS_MSG_KEYWORD_SEARCH},
                  {"role": "user", "content": research_topic}],
        response_format=Keywords
    )
    
    return response.choices[0].message.parsed

def generate_search_queries(research_topic, num_queries=5):
    
    response = client.beta.chat.completions.parse(
        model=MODEL,
        messages=[{"role": "system", "content": SYS_MSG_SEARCH_QUERIES},
                  {"role": "user", "content": f'Generate {num_queries} search queries for this research topic: {research_topic}'}],
        response_format=SearchQueries
    )
    
    return response.choices[0].message.parsed


class RelevantPaper(BaseModel):
    relevancy_score: Literal['yes', 'no'] = Field(description="A binary score yes|no if a paper is relevant given a research topic.")
    justification: str = Field(description="A short one sentence justification for the relevancy score.")


def filter_paper_relevancy(research_topic, paper_summary):
    paper_relevancy_score = client.beta.chat.completions.parse(
            model='gpt-4o-mini',
            messages=[
                {
                    'role': 'system', 'content': SYS_MSG_RELEVANCY_JUDGE,
                    'role': 'user', 'content': f'Given this research topic: {research_topic}, score the relevancy of this paper:\n\n {paper_summary}'
                }
            ],
            response_format=RelevantPaper
        )
    
    return paper_relevancy_score.choices[0].message.parsed

def filter_out_search_results(research_topic: str, search_results: dict):
    filtered_results = []
    for paper in search_results:
        relevancy = filter_paper_relevancy(research_topic, paper.summary)
        if relevancy.relevancy_score=='yes':
            filtered_results.append((paper, relevancy.justification))
        elif relevancy.relevancy_score=='no':
            continue
        else:
            raise ValueError("Invalid relevancy score")
    
    return filtered_results
                
        

def save_search_results(search_results: dict):
    pass




In [20]:
keywords = generate_keywords_for_search("LLMs for enhancing human's ability to research and learn")
keywords

Keywords(keywords=['LLMs', 'large language models', 'enhancing learning', 'research enhancement', 'AI in education', 'machine learning', 'cognitive assistant', 'natural language processing', 'learning algorithms', 'educational technology', 'personalized learning', 'knowledge retrieval', 'information synthesis', 'AI tools for learning'])

In [21]:
search_queries = generate_search_queries("LLMs for enhancing human's ability to research and learn")
search_queries

SearchQueries(queries=['How do language models enhance research capabilities?', 'Impact of large language models on modern learning', 'Language models in academic research', 'Advantages of AI in educational research', 'How LLMs assist in data analysis and interpretation', 'Role of AI in personalized learning experiences', 'Enhancing educational outcomes using LLMs', 'Language models as research assistants', 'AI-driven tools for academic research', 'Improving information synthesis with language models', 'Predictive analytics in education using large language models', 'Collaborative research projects involving LLMs', 'Using large language models for hypothesis generation', 'How do LLMs affect traditional learning methods', 'Are language models revolutionizing educational research?'])

In [76]:
import arxiv
from datetime import datetime

arxiv_client = arxiv.Client()

def search_arxiv_papers(keywords, year=2024, MAX_NUM_PAPERS=30):
    query = ' '.join(keywords)
    
    # Define the start and end dates for the specified year
    start_date = datetime(year, 1, 1)
    end_date = datetime(year, 12, 31)
    
    # Append the date range filter to the query
    date_filter = f'submittedDate:[{start_date.strftime("%Y%m%d%H%M%S")} TO {end_date.strftime("%Y%m%d%H%M%S")}]'
    full_query = f'{query} AND {date_filter}'
    
    # Perform the search
    search = arxiv.Search(query=full_query, max_results=MAX_NUM_PAPERS)
    
    # Fetch the results
    results = list(arxiv_client.results(search))
    
    return results

In [86]:
research_topic = "LLMs for enhancing human's ability to research and learn"
keywords = generate_keywords_for_search(research_topic)
structured_results = search_arxiv_papers(keywords.keywords)

In [78]:
# for paper in structured_results:
#     print(f'Title: {paper.title}')
#     print(f'URL: {paper.pdf_url}')
#     print(f'Abstract: {paper.summary}\n')
#     print("******")

In [79]:
len(structured_results)

30

In [81]:
filtered_papers = filter_out_search_results(research_topic, structured_results)
len(filtered_papers)

23

In [87]:
search_queries_obj = generate_search_queries(research_topic)
search_queries = search_queries_obj.queries
for query in search_queries:
    print(query)

LLMs in educational technology and their impact on learning
How large language models are revolutionizing research methodologies
Enhancing human learning efficiency with AI and large language models
Role of LLMs in improving cognitive abilities and research skills
Applications of large language models in academic and educational enhancement


In [88]:
full_search_result = []
for query in search_queries:
    keywords = generate_keywords_for_search(query)
    structured_results = search_arxiv_papers(keywords.keywords)
    full_search_result.extend(filter_out_search_results(research_topic, structured_results))

In [89]:
full_search_result

[(arxiv.Result(entry_id='http://arxiv.org/abs/2403.18679v2', updated=datetime.datetime(2024, 4, 16, 22, 10, 16, tzinfo=datetime.timezone.utc), published=datetime.datetime(2024, 3, 27, 15, 21, 58, tzinfo=datetime.timezone.utc), title="An Exploratory Study on Upper-Level Computing Students' Use of Large Language Models as Tools in a Semester-Long Project", authors=[arxiv.Result.Author('Ben Arie Tanay'), arxiv.Result.Author('Lexy Arinze'), arxiv.Result.Author('Siddhant S. Joshi'), arxiv.Result.Author('Kirsten A. Davis'), arxiv.Result.Author('James C. Davis')], summary="Background: Large Language Models (LLMs) such as ChatGPT and CoPilot are\ninfluencing software engineering practice. Software engineering educators must\nteach future software engineers how to use such tools well. As of yet, there\nhave been few studies that report on the use of LLMs in the classroom. It is,\ntherefore, important to evaluate students' perception of LLMs and possible ways\nof adapting the computing curriculu

In [91]:
len(full_search_result)

111