<a href="https://colab.research.google.com/github/JoeMcKenzie/JoeMcKenzie/blob/main/Untitled6.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import requests
import pandas as pd

def fetch_uniprot_data(query, fields=None, page_size=25):
    """Fetches data from UniProt API using pagination."""

    base_url = "https://rest.uniprot.org/uniprotkb/search"
    all_data = []
    cursor = None

    while True:
        params = {"query": query, "size": page_size, "format": "tsv"}
        if fields:
            params["fields"] = ",".join(fields)
        if cursor:
            params["cursor"] = cursor

        response = requests.get(base_url, params=params)
        response.raise_for_status()  # Raise HTTPError for bad responses (4xx or 5xx)

        if not response.text.strip():
            break #No results

        data = [line.split('\t') for line in response.text.strip().split('\n')]

        if not all_data:
             all_data.append(data[0]) #Append header
        all_data.extend(data[1:])

        if "Link" in response.headers:
            links = response.headers["Link"].split(",")
            next_link = [link for link in links if 'rel="next"' in link][0]
            cursor = next_link.split("cursor=")[1].split("&")[0].strip(">;")
        else:
            break

    return all_data

def save_to_excel(data, filename="uniprot_data.xlsx"):
    """Saves UniProt data to an Excel file."""

    if not data:
        print("No data to save.")
        return

    df = pd.DataFrame(data[1:],columns=data[0])
    df.to_excel(filename, index=False)
    print(f"Data saved to {filename}")

if __name__ == "__main__":
    search_query = "(keyword:KW-0732) AND (reviewed:true) AND (organism_id:9606)"  # Example: Reviewed human entries
    desired_fields = ["protein_name", "sequence", "organism_id", "cc_subcellular_location", "ft_signal"]  # Example fields

    uniprot_data = fetch_uniprot_data(search_query, fields=desired_fields)
    save_to_excel(uniprot_data)

Data saved to uniprot_data.xlsx
