In [3]:
import requests
import pandas as pd
import os

# Check current working directory
print("Current working directory:", os.getcwd())

# Check if file exists
file_path = "/content/mouse genes cross-checking.xlsx"
if os.path.exists(file_path):
    print("File exists")
else:
    print("File does not exist")
    raise FileNotFoundError(f"File {file_path} not found. Please upload it to /content.")

# Load your Excel file
df = pd.read_excel(file_path)
input_terms = df["Input Term"].tolist()

results = []
for term in input_terms:
    # Construct the API query for mouse, reviewed entries
    query = f"gene:{term} AND organism_id:10090 AND reviewed:true"
    url = f"https://rest.uniprot.org/uniprotkb/search?query={query}&format=json"
    response = requests.get(url)
    data = response.json()
    if data["results"]:
        entry = data["results"][0]  # Take the first result (canonical)
        uniprot_id = entry["primaryAccession"]
        protein_name = entry["proteinDescription"]["recommendedName"]["fullName"]["value"]
        gene_name = entry["genes"][0]["geneName"]["value"] if entry.get("genes") else ""
        results.append([term, uniprot_id, protein_name, gene_name])
    else:
        results.append([term, "", "", ""])  # Handle non-matching terms

# Create a DataFrame and save to Excel
result_df = pd.DataFrame(results, columns=["Input Term", "UniProt ID", "Protein Recommended Name", "Gene Name"])
result_df.to_excel("/content/completed_mouse_genes.xlsx", index=False)
print("Results saved to /content/completed_mouse_genes.xlsx")

Current working directory: /content
File exists
Results saved to /content/completed_mouse_genes.xlsx
