In [30]:
import requests
from flask import Flask, jsonify, request, abort
import feedparser
import nltk
from nltk.sentiment import SentimentIntensityAnalyzer

In [None]:
def fetch_arxiv_articles(query, max_results=5,id=False):
    # Construct the query URL with parameters directly in the string
    if id:
        query_url = f"http://export.arxiv.org/api/query?{query}"
    else:
        query_url = f"http://export.arxiv.org/api/query?search_query={query}&start=0&max_results={max_results}&sortBy=lastUpdatedDate&sortOrder=descending"
    response = requests.get(query_url)
    print(query_url)  # Print the response text for debugging
    if response.status_code == 200:
        return response.text
    else:
        return None

app = Flask(__name__)

@app.route('/')
def home():
    return """
    Welcome to the API! This is the homepage. Here are the different endpoints: <br>
    /get_data: Fetches a list of articles from the site. Retrieving 5 articles might be sufficient.<br>
    /articles: Displays information about the articles, including the article number, title, publication date, etc., but not the content itself.<br>
    /article/<"number">: Accesses the content of a specified article.<br>
    /ml or /ml/<"number">: Executes a machine learning script. Depending on the desired goal, it applies to either all articles or a single one. For example, sentiment analysis.
    """


# Fetches a list of articles from the site. Retrieving 5 articles might be sufficient.
@app.route('/get_data', methods=['GET'])
def get_data():
    articles_xml = fetch_arxiv_articles('cat:cs.AI', max_results=5)
    if articles_xml:
        feed = feedparser.parse(articles_xml)
        articles = []
        for entry in feed.entries:
            article = {
                'title': entry.title,
                'authors': [author.name for author in entry.authors],
                'link': entry.link,
                'published': entry.published,
                'number': entry.id[21:],
            }
            articles.append(article)
        return jsonify(articles)
    else:
        abort(404, description="Resource not found")


# Displays information about the articles, including the article number, title, publication date, etc., but not the content itself.
@app.route('/articles', methods=['GET'])
def get_articles():
    articles_xml = fetch_arxiv_articles('cat:cs.AI', max_results=5)
    if articles_xml:
        feed = feedparser.parse(articles_xml)
        articles = []
        for entry in feed.entries:
            article = {
                'title': entry.title,
                'summary': entry.summary,
                'authors': [author.name for author in entry.authors],
                'link': entry.link,
                'published': entry.published,
                'number': entry.id[21:],
            }
            articles.append(article)
        return jsonify(articles)
    else:
        abort(404, description="Resource not found")


# Accesses the content of a specified article.
@app.route('/article/<string:article_number>', methods=['GET'])
def get_article(article_number):
    article_xml = fetch_arxiv_articles(f'id_list={article_number}', max_results=1, id=True)
    if article_xml:
        entry = feedparser.parse(article_xml).entries[0]
        article = {
            'title': entry.title,
            'summary': entry.summary,
            'authors': [author.name for author in entry.authors],
            'link': entry.link,
            'published': entry.published,
            'number': entry.id[21:]
        }
        return jsonify(article)
    else:
        abort(404, description="Article not found")


def analyze_sentiment(text):
    # Use the VADER sentiment intensity analyzer
    scores = sia.polarity_scores(text)
    # Determine the sentiment based on the compound score
    compound_score = scores['compound']
    if compound_score >= 0.05:
        return "Positive"
    elif compound_score <= -0.05:
        return "Negative"
    else:
        return "Neutral"

# Initialize the VADER sentiment intensity analyzer
sia = SentimentIntensityAnalyzer()

def analyze_sentiment(text):
    # Use the VADER sentiment intensity analyzer
    scores = sia.polarity_scores(text)
    # Determine the sentiment based on the compound score
    compound_score = scores['compound']
    if compound_score >= 0.05:
        return "Positive"
    elif compound_score <= -0.05:
        return "Negative"
    else:
        return "Neutral"

@app.route('/ml/<string:article_number>', methods=['GET'])
def machine_learning(article_number=None):
    if article_number:
        # Apply ML to a single article
        # Fetch the article content first
        article_xml = fetch_arxiv_articles(f'id_list={article_number}', max_results=1, id=True)
        if article_xml:
            entry = feedparser.parse(article_xml).entries[0]
            article_content = entry.summary
            # Apply sentiment analysis to the article content
            sentiment = analyze_sentiment(article_content)
            return jsonify({'article_number': article_number, 'sentiment': sentiment, 'number': entry.id[21:]})
        else:
            abort(404, description="Article not found")
    else:
        # Apply ML to all articles
        articles_xml = fetch_arxiv_articles('cat:cs.AI', max_results=10)
        if articles_xml:
            feed = feedparser.parse(articles_xml)
            sentiments = []
            for entry in feed.entries:
                article_content = entry.summary
                sentiment = analyze_sentiment(article_content)
                sentiments.append({'title': entry.title, 'sentiment': sentiment, 'number': entry.id[21:]})
            return jsonify({'sentiments': sentiments})
        else:
            abort(404, description="Resource not found")



if __name__ == '__main__':
    app.run()

 * Serving Flask app '__main__'
 * Debug mode: off


 * Running on http://127.0.0.1:5000
Press CTRL+C to quit


In [21]:
# ex_url = "http://export.arxiv.org/api/query?id_list=cond-mat/0207270v1"
# response = requests.get(ex_url)
# print(response.text)

<?xml version="1.0" encoding="UTF-8"?>
<feed xmlns="http://www.w3.org/2005/Atom">
  <link href="http://arxiv.org/api/query?search_query%3D%26id_list%3Dcond-mat%2F0207270v1%26start%3D0%26max_results%3D10" rel="self" type="application/atom+xml"/>
  <title type="html">ArXiv Query: search_query=&amp;id_list=cond-mat/0207270v1&amp;start=0&amp;max_results=10</title>
  <id>http://arxiv.org/api/C0KeZIdyNG94Gv+LuapZg5mEFiE</id>
  <updated>2024-04-06T00:00:00-04:00</updated>
  <opensearch:totalResults xmlns:opensearch="http://a9.com/-/spec/opensearch/1.1/">1</opensearch:totalResults>
  <opensearch:startIndex xmlns:opensearch="http://a9.com/-/spec/opensearch/1.1/">0</opensearch:startIndex>
  <opensearch:itemsPerPage xmlns:opensearch="http://a9.com/-/spec/opensearch/1.1/">10</opensearch:itemsPerPage>
  <entry>
    <id>http://arxiv.org/abs/cond-mat/0207270v1</id>
    <updated>2002-07-10T17:10:30Z</updated>
    <published>2002-07-10T17:10:30Z</published>
    <title>Understanding Paramagnetic Spin Co