In [1]:
!pip install feedparser pandas


Collecting feedparser
  Downloading feedparser-6.0.11-py3-none-any.whl.metadata (2.4 kB)
Collecting sgmllib3k (from feedparser)
  Downloading sgmllib3k-1.0.0.tar.gz (5.8 kB)
  Preparing metadata (setup.py) ... [?25l[?25hdone
Downloading feedparser-6.0.11-py3-none-any.whl (81 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m81.3/81.3 kB[0m [31m1.3 MB/s[0m eta [36m0:00:00[0m
[?25hBuilding wheels for collected packages: sgmllib3k
  Building wheel for sgmllib3k (setup.py) ... [?25l[?25hdone
  Created wheel for sgmllib3k: filename=sgmllib3k-1.0.0-py3-none-any.whl size=6046 sha256=775d8c56d0043512e53de06f2424aa966bca6cccd201f5d3e61297bb5f2eef86
  Stored in directory: /root/.cache/pip/wheels/3b/25/2a/105d6a15df6914f4d15047691c6c28f9052cc1173e40285d03
Successfully built sgmllib3k
Installing collected packages: sgmllib3k, feedparser
Successfully installed feedparser-6.0.11 sgmllib3k-1.0.0


In [2]:

import feedparser
import pandas as pd
from datetime import datetime
from google.colab import files

def fetch_arxiv_data(query, max_results=50):
    base_url = "http://export.arxiv.org/api/query?"
    search_query = f"search_query=all:{query.replace(' ', '+')}"
    params = f"&start=0&max_results={max_results}"
    url = base_url + search_query + params

    # Fetch and parse data from arXiv
    feed = feedparser.parse(url)

    # Process retrieved data
    papers = []
    current_year = datetime.now().year
    min_year = current_year - 10  # Only include papers from the last 10 years

    for entry in feed.entries:
        paper_year = datetime.strptime(entry.published, "%Y-%m-%dT%H:%M:%SZ").year
        if paper_year >= min_year:
            paper = {
                "Title": entry.title,
                "Abstract": entry.summary,
                "authors": ", ".join([author.name for author in entry.authors]),
                "Year": paper_year,
                "URL": entry.link,
            }
            papers.append(paper)

    return papers

# List of queries to search
queries = [
    # General AI & ML
    "artificial intelligence",
    "machine learning",
    "deep learning",
    "reinforcement learning",
    "supervised learning",
    "unsupervised learning",
    "semi-supervised learning",
    "self-supervised learning",
    "neural networks",
    "transformer models",
    "generative AI",
    "AI applications",
    "AI in healthcare",
    "AI in finance",
    "AI in robotics",
    "AI in education",
    "machine learning algorithms",
    "automated machine learning",
    "AI ethics",
    "explainable AI",
    "AI safety",
    "AI governance",

    # Core Algorithms & Techniques
    # Supervised
    "linear regression",
    "logistic regression",
    "decision trees",
    "random forest classifier",
    "support vector machines",
    "naive Bayes classifier",
    "k-nearest neighbors classifier",
    "gradient boosting machines",
    "XGBoost classifier",
    "LightGBM classifier",
    "CatBoost classifier",
    "ridge regression",
    "lasso regression",
    "elastic net regression",
    "multi-class classification",
    "ordinal regression",
    "ensemble methods in machine learning",
    "stacking ensemble learning",
    "bagging and boosting",
    "regression trees",
    "classification algorithms",
    "cross-validation techniques",

    # Unsupervised
    "k-means clustering",
    "hierarchical clustering",
    "density-based clustering",
    "DBSCAN algorithm",
    "Gaussian mixture models",
    "mean shift clustering",
    "spectral clustering",
    "affinity propagation",
    "self-organizing maps",
    "principal component analysis PCA",
    "independent component analysis ICA",
    "t-SNE visualization",
    "UMAP dimensionality reduction",
    "autoencoders for representation learning",
    "deep clustering methods",
    "latent Dirichlet allocation LDA",
    "topic modeling",
    "anomaly detection",
    "outlier detection algorithms",

    # Deep Learning & Neural Architectures
    "convolutional neural networks",
    "recurrent neural networks",
    "long short-term memory",
    "transformers",
    "attention mechanisms",
    "vision transformers",
    "GANs generative adversarial networks",
    "BERT model",
    "GPT models",
    "diffusion models",
    "multi-modal learning",
    "zero-shot learning",
    "few-shot learning",
    "meta learning",
    "neural architecture search",

    # Libraries & Frameworks
    "TensorFlow machine learning",
    "PyTorch deep learning",
    "Scikit-learn algorithms",
    "Keras deep learning",
    "Hugging Face transformers",
    "JAX ML library",
    "ONNX AI models",

    # Evaluation, Fairness, and Interpretability
    "model evaluation in machine learning",
    "model interpretability",
    "model explainability",
    "fairness in machine learning",
    "bias in AI models",
    "AUC ROC evaluation",
    "precision recall tradeoff",
    "SHAP values",
    "LIME explainability",

    # AI Research Topics & Trends
    "foundation models",
    "large language models",
    "AI and climate change",
    "AI for social good",
    "neurosymbolic AI",
    "human-in-the-loop learning",
    "online learning",
    "continual learning",
    "federated learning",
    "privacy preserving machine learning",
    "causal inference in ML",
    "contrastive learning",
    "representation learning"
]

# Fetch data for multiple queries
all_papers = []
for query in queries:
    all_papers.extend(fetch_arxiv_data(query))

# Convert to DataFrame
df = pd.DataFrame(all_papers)

# Save results as CSV in Colab
file_name = "Arxiv_Resources.csv"
df.to_csv(file_name, index=False)

# Download the file if user use Google Colab to run the code. Otherwise, will be downloaded at the root
files.download(file_name)

df.head()


<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

Unnamed: 0,Title,Abstract,authors,Year,URL
0,The Governance of Physical Artificial Intellig...,Physical artificial intelligence can prove to ...,"Yingbo Li, Anamaria-Beatrice Spulber, Yucong Duan",2023,http://arxiv.org/abs/2304.02924v1
1,Does an artificial intelligence perform market...,Who should be charged with responsibility for ...,Takanobu Mizuta,2020,http://arxiv.org/abs/2005.10488v1
2,Impact of Artificial Intelligence on Economic ...,Artificial intelligence has impacted many aspe...,Tshilidzi Marwala,2015,http://arxiv.org/abs/1509.01213v1
3,The case for psychometric artificial general i...,A short review of the literature on measuremen...,Mark McPherson,2020,http://arxiv.org/abs/2101.02179v1
4,AAAI FSS-18: Artificial Intelligence in Govern...,Proceedings of the AAAI Fall Symposium on Arti...,"Frank Stein, Alun Preece, Mihai Boicu",2018,http://arxiv.org/abs/1810.06018v1
