In [1]:
import requests
from flask import Flask, jsonify, request, abort
import feedparser

In [None]:
app = Flask(__name__)


@app.route('/')
def home():
    return """
    Welcome to the API! This is the homepage. Here are the different endpoints: <br>
    /get_data: Fetches a list of articles from the site. Retrieving 5 articles might be sufficient.<br>
    /articles: Displays information about the articles, including the article number, title, publication date, etc., but not the content itself.<br>
    /article/<number>: Accesses the content of a specified article.<br>
    /ml or /ml/<number>: Executes a machine learning script. Depending on the desired goal, it applies to either all articles or a single one. For example, sentiment analysis.
    """


ARXIV_API_URL = "http://export.arxiv.org/api/query"


def fetch_arxiv_articles(query, max_results=5):
    # Construct the query URL with parameters directly in the string
    query_url = f"{ARXIV_API_URL}?search_query={query}&start=0&max_results={max_results}"
    response = requests.get(query_url)
    print(response.text)  # Print the response text for debugging
    if response.status_code == 200:
        return response.text
    else:
        return None


# Fetches a list of articles from the site. Retrieving 5 articles might be sufficient.
@app.route('/get_data', methods=['GET'])
def get_data():
    articles_xml = fetch_arxiv_articles('all:electron', max_results=5)
    if articles_xml:
        feed = feedparser.parse(articles_xml)
        articles = []
        for entry in feed.entries:
            article = {
                'title': entry.title,
                'summary': entry.summary,
                'authors': [author.name for author in entry.authors],
                'link': entry.link,
                'published': entry.published
            }
            articles.append(article)
        return jsonify(articles)
    else:
        abort(404, description="Resource not found")


# Displays information about the articles, including the article number, title, publication date, etc., but not the content itself.
@app.route('/articles', methods=['GET'])
def get_articles():
    articles_xml = fetch_arxiv_articles('all:electron')
    if articles_xml:
        # Here you would parse the XML and extract the article metadata
        # For simplicity, we're just returning the raw XML
        return articles_xml
    else:
        abort(404, description="Resource not found")


# Accesses the content of a specified article.
@app.route('/article/<int:article_number>', methods=['GET'])
def get_article(article_number):
    articles_xml = fetch_arxiv_articles(f'all:electron AND id:{article_number}', max_results=1)
    if articles_xml:
        feed = feedparser.parse(articles_xml)
        if feed.entries:
            article = feed.entries[0]
            article_url = article.link
            return jsonify({'url': article_url})
        else:
            abort(404, description="Article not found")
    else:
        abort(404, description="Error fetching data from arXiv")


# Executes a machine learning script. Depending on the desired goal, it applies to either all articles or a single one. For example, sentiment analysis.
@app.route('/ml', methods=['GET'])
@app.route('/ml/<int:article_number>', methods=['GET'])
def machine_learning(article_number=None):
    if article_number:
        # Apply ML to a single article
        # Fetch the article content first
        article_content = "Article content for sentiment analysis"
        # Here you would apply your ML model to the article_content
        sentiment = "Positive"  # Placeholder for actual sentiment analysis result
        return jsonify({'article_number': article_number, 'sentiment': sentiment})
    else:
        # Apply ML to all articles
        # Fetch all articles first
        articles_content = ["Article 1 content", "Article 2 content"]  # Placeholder
        # Here you would apply your ML model to each article in articles_content
        sentiments = ["Positive", "Neutral"]  # Placeholder for actual sentiment analysis results
        return jsonify({'sentiments': sentiments})


if __name__ == '__main__':
    app.run()

In [10]:
ex_url = "http://export.arxiv.org/api/query?search_query=all:electron&start=0&max_results=1"
response = requests.get(ex_url)
print(response.text)

<?xml version="1.0" encoding="UTF-8"?>
<feed xmlns="http://www.w3.org/2005/Atom">
  <link href="http://arxiv.org/api/query?search_query%3Dall%3Aelectron%26id_list%3D%26start%3D0%26max_results%3D1" rel="self" type="application/atom+xml"/>
  <title type="html">ArXiv Query: search_query=all:electron&amp;id_list=&amp;start=0&amp;max_results=1</title>
  <id>http://arxiv.org/api/cHxbiOdZaP56ODnBPIenZhzg5f8</id>
  <updated>2024-03-31T00:00:00-04:00</updated>
  <opensearch:totalResults xmlns:opensearch="http://a9.com/-/spec/opensearch/1.1/">208224</opensearch:totalResults>
  <opensearch:startIndex xmlns:opensearch="http://a9.com/-/spec/opensearch/1.1/">0</opensearch:startIndex>
  <opensearch:itemsPerPage xmlns:opensearch="http://a9.com/-/spec/opensearch/1.1/">1</opensearch:itemsPerPage>
  <entry>
    <id>http://arxiv.org/abs/cond-mat/0102536v1</id>
    <updated>2001-02-28T20:12:09Z</updated>
    <published>2001-02-28T20:12:09Z</published>
    <title>Impact of Electron-Electron Cusp on Configur