# Agent for Single Cell RNAseq Analysis

# User Inputs

In [2]:
user_query = """Analyze this single cell RNA-seq data about Keloids. Identify
biomarkers from this dataset and discuss the relationship of these data to the
african american community."""
scrna_data_path = ".../data.h5ad"

# Download Papers

## Functions

In [3]:
"""
Generate search keywords from a user query using LangChain and OpenAI.
"""

from langchain.chat_models import ChatOpenAI
from langchain.schema import SystemMessage, HumanMessage
import os

def get_keywords_from_query(user_query: str, model_name="gpt-3.5-turbo", num_keywords=5) -> list:
    llm = ChatOpenAI(model=model_name, temperature=0.5)

    system_prompt = (
        "You are an assistant that converts user research questions into a short list of high-quality "
        "keywords for academic search (e.g., Semantic Scholar)."
    )
    user_prompt = (
        f"User query: {user_query}\n\n"
        f"Provide {num_keywords} short, relevant keywords or phrases separated by commas."
    )

    messages = [
        SystemMessage(content=system_prompt),
        HumanMessage(content=user_prompt)
    ]

    response = llm(messages)
    keyword_string = response.content.strip()

    # Split and clean keywords
    keywords = [kw.strip() for kw in keyword_string.split(",")]
    return keywords


In [4]:
"""
Search and download papers from Semantic Scholar.

Usage:
    python semantic_scholar_search.py "deep learning biology" 5 ./output/
"""

import requests
import os
import sys
import json
from tqdm import tqdm

BASE_URL = "https://api.semanticscholar.org/graph/v1/paper/search"

def search_semantic_scholar(query, limit=10):
    params = {
        "query": query,
        "limit": limit,
        "fields": "title,authors,abstract,url,year,externalIds,isOpenAccess,openAccessPdf"
    }
    response = requests.get(BASE_URL, params=params)
    response.raise_for_status()
    return response.json()["data"]

def save_metadata(papers, outdir):
    metadata_path = os.path.join(outdir, "papers_metadata.json")
    with open(metadata_path, "w") as f:
        json.dump(papers, f, indent=2)
    print(f"[+] Saved metadata to {metadata_path}")

def download_pdfs(papers, outdir):
    for i, paper in tqdm(enumerate(papers), desc="Downloading Papers"):
        pdf_url = paper.get("openAccessPdf", {}).get("url")
        if pdf_url:
            try:
                response = requests.get(pdf_url)
                response.raise_for_status()
                title = papers[0]['title'].replace(' ', '_').replace('.','_').replace('-','_').replace(',','_').replace(':','_')
                filename = f"{title}.pdf"
                filepath = os.path.join(outdir, filename)
                with open(filepath, "wb") as f:
                    f.write(response.content)
                print(f"[+] Downloaded: {filename}")
            except Exception as e:
                print(f"[!] Failed to download paper {i+1}: {e}")
        else:
            print(f"[!] No open access PDF for paper {i+1}")


## Constants

In [9]:
output_dir = "downloads" # Loacation where papers will be downloaded to
num_papers = 25          # Number of papers per downlowd

## Script

In [10]:
keywords

['Single cell RNA-seq',
 'Keloids',
 'Biomarkers',
 'African American community',
 'Relationship']

In [None]:
os.makedirs(output_dir, exist_ok=True)
keywords = get_keywords_from_query(user_query)
for keyword in tqdm(keywords, desc="Keywords"):
    papers = search_semantic_scholar(keyword, num_papers)
    save_metadata(papers, output_dir)
    download_pdfs(papers, output_dir)

Keywords:   0%|          | 0/5 [00:00<?, ?it/s]

[+] Saved metadata to downloads/papers_metadata.json



Downloading Papers: 0it [00:00, ?it/s][A
Downloading Papers: 1it [00:29, 29.95s/it][A

[+] Downloaded: Normalization_and_variance_stabilization_of_single_cell_RNA_seq_data_using_regularized_negative_binomial_regression.pdf



Downloading Papers: 2it [00:31, 13.17s/it][A

[+] Downloaded: Normalization_and_variance_stabilization_of_single_cell_RNA_seq_data_using_regularized_negative_binomial_regression.pdf



Downloading Papers: 3it [00:36,  9.72s/it][A

[+] Downloaded: Normalization_and_variance_stabilization_of_single_cell_RNA_seq_data_using_regularized_negative_binomial_regression.pdf



Downloading Papers: 4it [00:38,  6.52s/it][A

[+] Downloaded: Normalization_and_variance_stabilization_of_single_cell_RNA_seq_data_using_regularized_negative_binomial_regression.pdf



Downloading Papers: 5it [00:40,  4.77s/it][A

[+] Downloaded: Normalization_and_variance_stabilization_of_single_cell_RNA_seq_data_using_regularized_negative_binomial_regression.pdf



Downloading Papers: 6it [00:56,  8.70s/it][A

[+] Downloaded: Normalization_and_variance_stabilization_of_single_cell_RNA_seq_data_using_regularized_negative_binomial_regression.pdf



Downloading Papers: 7it [00:58,  6.49s/it][A

[+] Downloaded: Normalization_and_variance_stabilization_of_single_cell_RNA_seq_data_using_regularized_negative_binomial_regression.pdf



Downloading Papers: 8it [00:58,  4.50s/it][A

[+] Downloaded: Normalization_and_variance_stabilization_of_single_cell_RNA_seq_data_using_regularized_negative_binomial_regression.pdf



Downloading Papers: 9it [01:00,  3.58s/it][A

[+] Downloaded: Normalization_and_variance_stabilization_of_single_cell_RNA_seq_data_using_regularized_negative_binomial_regression.pdf
[!] No open access PDF for paper 10



Downloading Papers: 11it [01:01,  2.21s/it][A

[+] Downloaded: Normalization_and_variance_stabilization_of_single_cell_RNA_seq_data_using_regularized_negative_binomial_regression.pdf



Downloading Papers: 12it [01:03,  2.15s/it][A

[+] Downloaded: Normalization_and_variance_stabilization_of_single_cell_RNA_seq_data_using_regularized_negative_binomial_regression.pdf



Downloading Papers: 13it [01:05,  2.14s/it][A

[+] Downloaded: Normalization_and_variance_stabilization_of_single_cell_RNA_seq_data_using_regularized_negative_binomial_regression.pdf



Downloading Papers: 14it [01:15,  4.24s/it][A

[+] Downloaded: Normalization_and_variance_stabilization_of_single_cell_RNA_seq_data_using_regularized_negative_binomial_regression.pdf



Downloading Papers: 15it [01:27,  6.44s/it][A

[+] Downloaded: Normalization_and_variance_stabilization_of_single_cell_RNA_seq_data_using_regularized_negative_binomial_regression.pdf



Downloading Papers: 16it [01:53, 11.93s/it][A

[+] Downloaded: Normalization_and_variance_stabilization_of_single_cell_RNA_seq_data_using_regularized_negative_binomial_regression.pdf



Downloading Papers: 17it [01:56,  9.36s/it][A

[+] Downloaded: Normalization_and_variance_stabilization_of_single_cell_RNA_seq_data_using_regularized_negative_binomial_regression.pdf
[!] No open access PDF for paper 18



Downloading Papers: 19it [01:59,  5.89s/it][A

[+] Downloaded: Normalization_and_variance_stabilization_of_single_cell_RNA_seq_data_using_regularized_negative_binomial_regression.pdf



Downloading Papers: 20it [02:05,  5.97s/it][A

[+] Downloaded: Normalization_and_variance_stabilization_of_single_cell_RNA_seq_data_using_regularized_negative_binomial_regression.pdf
[!] No open access PDF for paper 21
[!] No open access PDF for paper 22



Downloading Papers: 23it [02:17,  4.91s/it][A

[+] Downloaded: Normalization_and_variance_stabilization_of_single_cell_RNA_seq_data_using_regularized_negative_binomial_regression.pdf



Downloading Papers: 24it [02:21,  4.77s/it][A

[+] Downloaded: Normalization_and_variance_stabilization_of_single_cell_RNA_seq_data_using_regularized_negative_binomial_regression.pdf



Downloading Papers: 25it [02:22,  5.69s/it][A
Keywords:  20%|██        | 1/5 [02:23<09:34, 143.55s/it]

[!] Failed to download paper 25: 403 Client Error: Forbidden for url: https://pmc.ncbi.nlm.nih.gov/articles/PMC5465819
[+] Saved metadata to downloads/papers_metadata.json



Downloading Papers: 0it [00:00, ?it/s][A
Downloading Papers: 2it [00:00, 10.72it/s][A

[!] No open access PDF for paper 1
[!] Failed to download paper 2: 403 Client Error: Forbidden for url: https://journals.lww.com/10.1097/PRS.0000000000008667
[!] No open access PDF for paper 3



Downloading Papers: 4it [00:00,  8.37it/s][A

[!] Failed to download paper 4: 403 Client Error: Forbidden for url: https://pmc.ncbi.nlm.nih.gov/articles/PMC8975835



Downloading Papers: 5it [00:00,  5.54it/s][A

[+] Downloaded: Hypertrophic_Scars_and_Keloids__Advances_in_Treatment_and_Review_of_Established_Therapies.pdf



Downloading Papers: 6it [00:01,  4.77it/s][A

[!] Failed to download paper 6: 403 Client Error: Forbidden for url: https://pmc.ncbi.nlm.nih.gov/articles/PMC9797913



Downloading Papers: 7it [00:01,  4.55it/s][A

[!] Failed to download paper 7: 403 Client Error: Forbidden for url: https://journals.lww.com/10.1097/CM9.0000000000002093
[!] No open access PDF for paper 8
[!] Failed to download paper 9: 403 Client Error: Forbidden for url: https://onlinelibrary.wiley.com/doi/pdfdirect/10.1111/exd.14121



Downloading Papers: 10it [00:02,  2.61it/s][A

[+] Downloaded: Hypertrophic_Scars_and_Keloids__Advances_in_Treatment_and_Review_of_Established_Therapies.pdf



Downloading Papers: 11it [00:04,  1.69it/s][A

[+] Downloaded: Hypertrophic_Scars_and_Keloids__Advances_in_Treatment_and_Review_of_Established_Therapies.pdf



Downloading Papers: 12it [00:04,  1.92it/s][A

[!] Failed to download paper 12: 403 Client Error: Forbidden for url: https://pmc.ncbi.nlm.nih.gov/articles/PMC7940466



Downloading Papers: 13it [00:05,  1.82it/s][A

[+] Downloaded: Hypertrophic_Scars_and_Keloids__Advances_in_Treatment_and_Review_of_Established_Therapies.pdf



Downloading Papers: 14it [00:06,  1.14it/s][A

[+] Downloaded: Hypertrophic_Scars_and_Keloids__Advances_in_Treatment_and_Review_of_Established_Therapies.pdf
[!] Failed to download paper 15: 403 Client Error: Forbidden for url: https://onlinelibrary.wiley.com/doi/pdfdirect/10.1111/exd.14414



Downloading Papers: 16it [00:11,  1.50s/it][A

[+] Downloaded: Hypertrophic_Scars_and_Keloids__Advances_in_Treatment_and_Review_of_Established_Therapies.pdf



Downloading Papers: 17it [00:13,  1.57s/it][A

[+] Downloaded: Hypertrophic_Scars_and_Keloids__Advances_in_Treatment_and_Review_of_Established_Therapies.pdf



Downloading Papers: 18it [00:15,  1.60s/it][A

[+] Downloaded: Hypertrophic_Scars_and_Keloids__Advances_in_Treatment_and_Review_of_Established_Therapies.pdf



Downloading Papers: 19it [00:18,  2.17s/it][A

[+] Downloaded: Hypertrophic_Scars_and_Keloids__Advances_in_Treatment_and_Review_of_Established_Therapies.pdf
[!] No open access PDF for paper 20



Downloading Papers: 21it [00:19,  1.31s/it][A

[+] Downloaded: Hypertrophic_Scars_and_Keloids__Advances_in_Treatment_and_Review_of_Established_Therapies.pdf



Downloading Papers: 22it [00:23,  1.94s/it][A

[+] Downloaded: Hypertrophic_Scars_and_Keloids__Advances_in_Treatment_and_Review_of_Established_Therapies.pdf



Downloading Papers: 23it [00:23,  1.60s/it][A

[+] Downloaded: Hypertrophic_Scars_and_Keloids__Advances_in_Treatment_and_Review_of_Established_Therapies.pdf



Downloading Papers: 24it [00:24,  1.33s/it][A

[+] Downloaded: Hypertrophic_Scars_and_Keloids__Advances_in_Treatment_and_Review_of_Established_Therapies.pdf



Downloading Papers: 25it [00:27,  1.10s/it][A
Keywords:  40%|████      | 2/5 [02:52<03:47, 75.97s/it] 

[+] Downloaded: Hypertrophic_Scars_and_Keloids__Advances_in_Treatment_and_Review_of_Established_Therapies.pdf


# Build RAG Database

# Single Cell Analysis