In [13]:
import os
import arxiv
from langchain.document_loaders import TextLoader
from langchain_text_splitters import CharacterTextSplitter
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.embeddings import HuggingFaceEmbeddings
from langchain.vectorstores import FAISS
from langchain.chains import RetrievalQA
from langchain.llms import Ollama
from langchain import hub
from langchain.chains.combine_documents.stuff import StuffDocumentsChain, create_stuff_documents_chain
from langchain.chains.llm import LLMChain
from langchain_core.prompts import PromptTemplate
from langchain.llms import OpenLLM
from langchain_ollama import ChatOllama
from langchain.schema import Document

In [15]:
# Define prompt
prompt_template = """Write a detailed summary of the following research paper:
"{text}"
DETAILED SUMMARY:"""
prompt = PromptTemplate.from_template(prompt_template)


In [16]:

# Initialize LLM
llm = ChatOllama(model="deepseek-r1:1.5b", temperature=0)
llm_chain = LLMChain(llm=llm, prompt=prompt)
stuff_chain = StuffDocumentsChain(llm_chain=llm_chain, document_variable_name="text")

In [18]:
def fetch_papers(query, max_results=5):
    search = arxiv.Search(
        query=query,
        max_results=max_results,
        sort_by=arxiv.SortCriterion.SubmittedDate
    )
    papers = []
    for result in search.results():
        papers.append({
            'title': result.title,
            'summary': result.summary,
            'pdf_url': result.pdf_url,
            'published': result.published
        })
    return papers

# Function to summarize a paper
def summarize_paper(paper):
    # Create a LangChain Document object
    doc = Document(page_content=paper['summary'], metadata={"title": paper['title']})
    # Generate summary
    summary = stuff_chain.run([doc])  # Pass a list of Document objects
    return summary

In [19]:
query = input("Enter your search query for arXiv (e.g., 'AI', 'Machine Learning'):")

In [20]:
papers = fetch_papers(query, max_results=3)

  for result in search.results():


In [21]:
papers

[{'title': 'PyEvalAI: AI-assisted evaluation of Jupyter Notebooks for immediate personalized feedback',
  'summary': 'Grading student assignments in STEM courses is a laborious and repetitive\ntask for tutors, often requiring a week to assess an entire class. For\nstudents, this delay of feedback prevents iterating on incorrect solutions,\nhampers learning, and increases stress when exercise scores determine admission\nto the final exam. Recent advances in AI-assisted education, such as automated\ngrading and tutoring systems, aim to address these challenges by providing\nimmediate feedback and reducing grading workload. However, existing solutions\noften fall short due to privacy concerns, reliance on proprietary closed-source\nmodels, lack of support for combining Markdown, LaTeX and Python code, or\nexcluding course tutors from the grading process. To overcome these\nlimitations, we introduce PyEvalAI, an AI-assisted evaluation system, which\nautomatically scores Jupyter notebooks u

In [22]:
for paper in papers:
    summary = summarize_paper(paper=paper)
    print(summary)
    print('==========================')

<think>
Okay, so I need to write a detailed summary of this research paper. Let me read through it first.

The paper is about grading student assignments in STEM courses. It says that graders spend a lot of time and time-consuming tasks, maybe even a week for each class. For students, this delay makes it hard to keep up with incorrect solutions because they can't iterate on them. This also hinders learning and increases stress when their scores determine if they get to the final exam.

Then, there's mention of AI-assisted education, like automated grading systems. These systems aim to provide immediate feedback but currently have limitations. The main issues are privacy concerns—students might share too much information. Also, these solutions often use proprietary models which aren't open or accessible. They don't support Markdown, LaTeX, and Python code, so it's hard for graders to input their work properly. Plus, they exclude tutors from the grading process, meaning graders can't see