In [2]:
import requests
from flask import Flask, jsonify, request, abort
import feedparser
import nltk
nltk.download('vader_lexicon')
from nltk.sentiment import SentimentIntensityAnalyzer
import yake

[nltk_data] Downloading package vader_lexicon to
[nltk_data]     C:\Users\Irish\AppData\Roaming\nltk_data...
[nltk_data]   Package vader_lexicon is already up-to-date!


In [None]:

def fetch_arxiv_articles(query, max_results=5,id=False):
    # Construct the query URL with parameters directly in the string
    if id:
        query_url = f"http://export.arxiv.org/api/query?{query}"
    else:
        query_url = f"http://export.arxiv.org/api/query?search_query={query}&start=0&max_results={max_results}&sortBy=lastUpdatedDate&sortOrder=descending"
    response = requests.get(query_url)
    print(query_url)  # Print the response text for debugging
    if response.status_code == 200:
        return response.text
    else:
        return None

app = Flask(__name__)

@app.route('/')
def home():
    return """
    Welcome to the API! This is the homepage. Here are the different endpoints: <br>
    /get_data: Fetches a list of articles from the site. Retrieving 5 articles might be sufficient.<br>
    /articles: Displays information about the articles, including the article number, title, publication date, etc., but not the content itself.<br>
    /article/<"number">: Accesses the content of a specified article.<br>
    /ml or /ml/<"number">: Executes a machine learning script. Depending on the desired goal, it applies to either all articles or a single one. For example, sentiment analysis.
    """


# Fetches a list of articles from the site. Retrieving 5 articles might be sufficient.
@app.route('/get_data', methods=['GET'])
def get_data():
    articles_xml = fetch_arxiv_articles('cat:cs.AI', max_results=5)
    if articles_xml:
        feed = feedparser.parse(articles_xml)
        articles = []
        for entry in feed.entries:
            article = {
                'title': entry.title,
                'authors': [author.name for author in entry.authors],
                'link': entry.link,
                'published': entry.published,
                'number': entry.id[21:],
                'summary': entry.summary
            }
            articles.append(article)
        return jsonify(articles)
    else:
        abort(404, description="Resource not found")


# Displays information about the articles, including the article number, title, publication date, etc., but not the content itself.
@app.route('/articles', methods=['GET'])
def get_articles():
    articles_xml = fetch_arxiv_articles('cat:cs.AI', max_results=5)
    if articles_xml:
        feed = feedparser.parse(articles_xml)
        articles = []
        for entry in feed.entries:
            article = {
                'title': entry.title,
                'authors': [author.name for author in entry.authors],
                'link': entry.link,
                'published': entry.published,
                'number': entry.id[21:]
            }
            articles.append(article)
        return jsonify(articles)
    else:
        abort(404, description="Resource not found")


# Accesses the content of a specified article.
@app.route('/article/<string:article_number>', methods=['GET'])
def get_article(article_number):
    article_xml = fetch_arxiv_articles(f'id_list={article_number}', max_results=1, id=True)
    if article_xml:
        entry = feedparser.parse(article_xml).entries[0]
        article = {
            'title': entry.title,
            'summary': entry.summary,
            'authors': [author.name for author in entry.authors],
            'link': entry.link,
            'published': entry.published,
            'number': entry.id[21:]
        }
        return jsonify(article)
    else:
        abort(404, description="Article not found")


def analyze_sentiment(text):
    scores = sia.polarity_scores(text)
    compound_score = scores['compound']
    if compound_score >= 0.05:
        return "Positive"
    elif compound_score <= -0.05:
        return "Negative"
    else:
        return "Neutral"

# Initialize the VADER sentiment intensity analyzer
sia = SentimentIntensityAnalyzer()

# Initialize YAKE keyword extractor
language = "en"
max_ngram_size = 3
deduplication_threshold = 0.9
num_of_keywords = 5
kw_extractor = yake.KeywordExtractor(lan=language, n=max_ngram_size, dedupLim=deduplication_threshold, top=num_of_keywords, features=None)

def extract_keywords(text):
    keywords = kw_extractor.extract_keywords(text)
    keyword_phrases = [kw[0] for kw in keywords]
    return keyword_phrases

@app.route('/ml', methods=['GET'])
@app.route('/ml/<string:article_number>', methods=['GET'])
def machine_learning(article_number=None):
    if article_number:
        article_xml = fetch_arxiv_articles(f'id_list={article_number}', max_results=1, id=True)
        if article_xml:
            entry = feedparser.parse(article_xml).entries[0]
            article_content = entry.summary
            sentiment = analyze_sentiment(article_content)
            keywords = extract_keywords(article_content)
            return jsonify({
                'article_number': article_number,
                'sentiment': sentiment,
                'keywords': keywords
            })
        else:
            abort(404, description="Article not found")
    else:
        articles_xml = fetch_arxiv_articles('cat:cs.AI', max_results=100)
        if articles_xml:
            feed = feedparser.parse(articles_xml)
            analyses = []
            for entry in feed.entries:
                article_content = entry.summary
                sentiment = analyze_sentiment(article_content)
                keywords = extract_keywords(article_content)
                analyses.append({
                    'title': entry.title,
                    'sentiment': sentiment,
                    'keywords': keywords,
                    'number': entry.id[21:]
                })
            return jsonify({'analyses': analyses})
        else:
            abort(404, description="Resource not found")




if __name__ == '__main__':
    app.run(debug=False)

 * Serving Flask app '__main__'
 * Debug mode: off


 * Running on http://127.0.0.1:5000
Press CTRL+C to quit
127.0.0.1 - - [07/Apr/2024 17:33:56] "GET /get_data HTTP/1.1" 200 -


http://export.arxiv.org/api/query?search_query=cat:cs.AI&start=0&max_results=5&sortBy=lastUpdatedDate&sortOrder=descending
http://export.arxiv.org/api/query?search_query=cat:cs.AI&start=0&max_results=100&sortBy=lastUpdatedDate&sortOrder=descending


127.0.0.1 - - [07/Apr/2024 17:45:40] "GET /ml HTTP/1.1" 200 -
