# Collect and store ~20â€“30 medRxiv abstracts related to infectious diseases.

### Import required libraries

In [1]:
%pip install -r requirements.txt

Note: you may need to restart the kernel to use updated packages.


In [2]:
import requests
import json
from pathlib import Path

### Setting up data folder

In [3]:
data_path = Path("data")
data_path.mkdir(exist_ok=True)

### Downloading medRxiv abstracts

In [4]:
url = "https://api.biorxiv.org/covid19/0/json"
response = requests.get(url)

if response.status_code == 200:
    data = response.json()
    
    abstracts = data.get("collection", [])
    print(f"Fetched {len(abstracts)} abstracts")

else:
    print("Failed to retrieve data:", response.status_code)
    abstracts = []

Fetched 30 abstracts


### Save abstracts

In [5]:
with open(data_path / "abstracts_raw.json", "w", encoding="utf-8") as f:
    json.dump(abstracts, f, ensure_ascii=False, indent=4)

print(f"Abstracts saved to {data_path / 'abstracts_raw.json'}")

Abstracts saved to data\abstracts_raw.json


### Verify data

In [None]:
if abstracts:
    sample = abstracts[0]
    print("Sample abstract:")
    print(f"Title: {sample.get('rel_title')}")
    print(f"DOI: {sample.get('rel_doi')}")
    print(f"Authors: {sample.get('rel_authors')}")
    print(f"Date: {sample.get('rel_date')}")
    
    abstract_text = sample.get('rel_abs') or ""
    print(f"Abstract: {abstract_text[:200]}...")

Sample abstract:
Title: An unconventional HxD motif orchestrates coatomer-dependent coronavirus morphogenesis
DOI: 10.1101/2025.10.16.682669
Authors: [{'author_name': 'Surovi Mohona', 'author_inst': 'Loyola University Chicago'}, {'author_name': 'Anil K Shakya', 'author_inst': 'University of Maryland Baltimore'}, {'author_name': 'Suruchi Singh', 'author_inst': 'University of Maryland Baltimore'}, {'author_name': 'Fiona L Kearns', 'author_inst': 'University of California, San Diego'}, {'author_name': 'Kezia Jemison', 'author_inst': 'University of California, San Diego'}, {'author_name': 'Satchal K Erramilli', 'author_inst': 'The University of Chicago'}, {'author_name': 'Debajit Dey', 'author_inst': 'University of Maryland Baltimore'}, {'author_name': 'Enya Qing', 'author_inst': 'Loyola University Chicago'}, {'author_name': 'Benjamin C Jennings', 'author_inst': 'Washington University School of Medicine'}, {'author_name': 'Balraj Doray', 'author_inst': 'Washington University School of Medi