In [1]:
import urllib.request as libreq
import xml.etree.ElementTree as ET

with libreq.urlopen('https://export.arxiv.org/api/query?search_query=submittedDate:[202504240000+TO+202504242459]&start=0&max_results=2000') as url:
    r = url.read()

# Decode bytes to string
xml_string = r.decode('utf-8')

# Parse XML
root = ET.fromstring(xml_string)

In [2]:
paper_data = {}
namespace = {'atom': 'http://www.w3.org/2005/Atom'}
for entry in root.findall('atom:entry', namespace):
    title = entry.find('atom:title', namespace).text
    summary = entry.find('atom:summary', namespace).text
    link = entry.find('atom:id', namespace).text
    published = entry.find('atom:published', namespace).text
    # Get all authors
    authors = []
    for author in entry.findall('atom:author', namespace):
        author_name = author.find('atom:name', namespace).text
        authors.append(author_name)
    paper_id = link.split('/')[-1]
    paper_data[paper_id] = {
        'title': title,
        'summary': summary,
        'link': link,
        'published': published,
        'authors': authors
    }

In [3]:
example_paper = paper_data["2504.17149v1"]

In [5]:
import pymupdf
import urllib.request
import io

# Download the PDF from the arXiv link
pdf_url = example_paper["link"].replace('abs', 'pdf') + '.pdf'
pdf_data = urllib.request.urlopen(pdf_url).read()

# Open the PDF from memory
doc = pymupdf.open(stream=pdf_data, filetype="pdf")
pdf_text = ""
for page in doc: # iterate the document pages
    text = page.get_text() # get plain text
    pdf_text += text + "\f" # add page delimiter (form feed)


In [6]:
pdf_text=pdf_text.replace("\n", "")


In [None]:
import os
import openai
import dotenv

dotenv.load_dotenv()
client = openai.OpenAI(api_key=os.getenv("OPENAI_API_KEY"))


# Prepare the prompt for GPT-3.5-turbo
prompt = f"""Please analyze this academic paper and provide:
1. A concise summary
2. Key keywords
3. Organizations/institutions involved NONE if not provided

Paper text:
{pdf_text[:4000]}  # Limiting text length to stay within token limits
"""

# Make API call
response = client.chat.completions.create(
    model="gpt-3.5-turbo",
    messages=[
        {"role": "system", "content": "You are an expert academic paper analyzer."},
        {"role": "user", "content": prompt}
    ],
    temperature=0.3
)

# Extract and print the analysis
analysis = response.choices[0].message.content
print(analysis)

# Optionally save the analysis to a file
with open('paper_analysis.txt', 'w', encoding='utf-8') as f:
    f.write(analysis)


In [11]:
import os
from google import genai
from google.genai import types
import dotenv
import json
import re

dotenv.load_dotenv()
client = genai.Client(api_key=os.getenv("GEMINI_API_KEY"))

# Prepare the prompt for Gemini
prompt = f"""Please analyze this academic paper and provide the following information in JSON format:
Use this JSON schema:
{{
    "Summary": "A concise summary of the paper",
    "Keywords": "Comma-separated list of key keywords",
    "Organizations": "List of organizations/institutions involved or 'NONE' if not provided"
}}

Paper text:
{pdf_text[:4000]}  # Limiting text length to stay within token limits
"""

response = client.models.generate_content(
    model="gemini-2.0-flash",
    config=types.GenerateContentConfig(
        system_instruction="You are an expert academic paper analyzer. Return the analysis in valid JSON format."),
    contents=prompt
)

# Clean up the response text
clean_text = response.text.strip()

# Remove any leading or trailing markdown code blocks
clean_text = re.sub(r"^```json\s*|\s*```$", "", clean_text)

try:
    analysis_json = json.loads(clean_text)
    print(json.dumps(analysis_json, indent=2))
except json.JSONDecodeError as e:
    print("Error: Response was not in valid JSON format")
    print(clean_text)

{
  "Summary": "This paper proposes a critical pragmatic philosophy for sustainable mathematics education to bridge the gap between critical theory and current implementations. It combines existential sustainability with pragmatic frameworks from ethics in mathematics education, leading to a three-stage implementation strategy: cultivating ethical classroom culture, ethnomathematics engagement, and addressing complex sustainability problems. The approach aims to equip teachers and students with necessary mathematical competencies, critical perspectives, and ethical grounding for a sustainable future.",
  "Keywords": "Sustainable Mathematics Education, Critical Pragmatism, Existential Sustainability, Ethics in Mathematics Education",
  "Organizations": [
    "University of Cologne"
  ]
}


In [15]:
example_paper

{'title': 'Towards a Critical Pragmatic Philosophy of Sustainable Mathematics\n  Education',
 'summary': '  This paper proposes critical pragmatism as a philosophy of sustainable\nmathematics education to bridge the gap between critical theory and the\nexisting patchwork implementations. Combining existential sustainability as a\nholistic concept with pragmatic frameworks from the ethics in mathematics\neducation literature creates a foundation enabling critical reflection and\npragmatic implementation. We outline how their synthesis naturally leads to a\nthree-stage implementation strategy: cultivating an ethical classroom culture,\nengaging with ethnomathematics, and tackling complex sustainability problems.\nOur critical pragmatic approach attempts to build a new philosophical\nperspective to equip teachers and students with the mathematical competencies,\ncritical perspectives, and ethical grounding necessary to navigate and\ncontribute to a sustainable future and to provide new an