In [39]:
from Bio import Entrez
import xml.etree.ElementTree as ET
import time

Entrez.email = "dwc001@ucsd.eduy"
Entrez.api_key = "157de54fb02cc5dd075cf5f0b8d3053b3109"

def srr_to_sample_aliases(srr_list):
    results = {}
    for srr in srr_list:
        try:
            # Get the UID for the SRR
            search = Entrez.esearch(db="sra", term=srr, retmode="xml")
            search_result = Entrez.read(search)
            uid_list = search_result.get("IdList", [])
            if not uid_list:
                results[srr] = None
                continue

            # Get summary using UID
            summary = Entrez.esummary(db="sra", id=uid_list[0], retmode="xml")
            summary_result = Entrez.read(summary)[0]
            exp_xml = summary_result['ExpXml']

            # Parse XML to get sample alias
            wrapped_xml = f"<Root>{exp_xml}</Root>"
            root = ET.fromstring(wrapped_xml)
            sample_name = root.find(".//Sample").attrib.get("name")
            library_name = root.find(".//LIBRARY_NAME").text
            if sample_name != '':
                results[srr] = sample_name
            elif library_name != '':
                results[srr] = library_name
            else:
                print(exp_xml)
        except Exception as e:
            print(f"Error processing {srr}: {e}")
            results[srr] = None
        time.sleep(0.5)  # polite delay
    return results

In [2]:
lines = '''3D7
total 98G
SRR5161062
SRR5161062
SRR5161064
SRR5161064
SRR5161105
SRR5161105
SRR5161107
SRR5161107
SRR5161111
SRR5161111
SRR5161132
SRR5161132
SRR5161135
SRR5161135
SRR5161139
SRR5161139
SRR5161146
SRR5161146
SRR5161167
SRR5161167
SRR5161170
SRR5161170
SRR5161185
SRR5161185
SRR5161191
SRR5161191
SRR5161216
SRR5161216
SRR5161226
SRR5161226
SRR5161242
SRR5161242
SRR5161245
SRR5161245
SRR5161062
SRR5161062
SRR5161064
SRR5161064

CHM-Cladosporin:
total 51G
SRR5161151
SRR5161151
SRR5161171
SRR5161171
SRR5161200
SRR5161200
SRR5161240
SRR5161240
SRR5161242
SRR5161242
SRR5161245
SRR5161245
SRR5161260
SRR5161260
SRR5161261
SRR5161261
SRR5161262
SRR5161262

MALDA-MMV006767:
total 9.2G
SRR5161078
SRR5161078
SRR5161128
SRR5161128
SRR5161140
SRR5161140
SRR5161198
SRR5161198
SRR5161253
SRR5161253
SRR5161078
SRR5161078
SRR5161128
SRR5161128
SRR5161140
SRR5161140
SRR5161198
SRR5161198
SRR5161253
SRR5161253

MALDA-MMV007224:
total 2.2G
SRR5161123
SRR5161123
SRR5161131
SRR5161131
SRR5161218
SRR5161218

MALDA-MMV007564:
total 9.1G
SRR5161108
SRR5161108
SRR5161153
SRR5161153
SRR5161163
SRR5161163
SRR5161176
SRR5161176
SRR5161208
SRR5161208
SRR5161213
SRR5161213
SRR5161228
SRR5161228
SRR5161241
SRR5161241
SRR5161251
SRR5161251

RAW
total 81G
SRR5161062
SRR5161062
SRR5161064
SRR5161064
-rw-rw-r--. 1 1000 winzeler-group 3.2K Mar  8  2018 ftpdown.txt
SRR5161078
SRR5161078
SRR5161128
SRR5161128
SRR5161140
SRR5161140
SRR5161198
SRR5161198
SRR5161253
SRR5161253
SRR5161123
SRR5161123
SRR5161131
SRR5161131
SRR5161218
SRR5161218
SRR5161108
SRR5161108
SRR5161153
SRR5161153
SRR5161163
SRR5161163
SRR5161176
SRR5161176
SRR5161208
SRR5161208
SRR5161213
SRR5161213
SRR5161228
SRR5161228
SRR5161241
SRR5161241
SRR5161251
SRR5161251'''.split('\n')

In [40]:
srr_list = sorted(set([line.strip() for line in lines if line.startswith('SRR')]))
sample_dict = srr_to_sample_aliases(srr_list)

In [42]:
for line in lines:
    if line.startswith('SRR'):
        print(sample_dict[line])
    else:
        print()



MALDA-3D7Parent-18
MALDA-3D7Parent-18
MALDA-3D7Parent-5
MALDA-3D7Parent-5
MALDA-3D7Parent-15
MALDA-3D7Parent-15
MALDA-3D7Parent-2
MALDA-3D7Parent-2
MALDA-3D7Parent-12
MALDA-3D7Parent-12
MALDA-3D7Parent-20
MALDA-3D7Parent-20
MALDA-3D7Parent-9
MALDA-3D7Parent-9
MALDA-3D7Parent-4
MALDA-3D7Parent-4
MALDA-3D7Parent-10
MALDA-3D7Parent-10
MALDA-3D7Parent-16
MALDA-3D7Parent-16
MALDA-3D7Parent-11
MALDA-3D7Parent-11
MALDA-3D7Parent-14
MALDA-3D7Parent-14
MALDA-3D7Parent-8
MALDA-3D7Parent-8
MALDA-3D7Parent-1
MALDA-3D7Parent-1
MALDA-3D7Parent-6
MALDA-3D7Parent-6
MALDA-3D7Parent-17
MALDA-3D7Parent-17
MALDA-3D7Parent-7
MALDA-3D7Parent-7
MALDA-3D7Parent-18
MALDA-3D7Parent-18
MALDA-3D7Parent-5
MALDA-3D7Parent-5



malaria parasite P. falciparum
malaria parasite P. falciparum
malaria parasite P. falciparum
malaria parasite P. falciparum
malaria parasite P. falciparum
malaria parasite P. falciparum
malaria parasite P. falciparum
malaria parasite P. falciparum
MALDA-3D7Parent-17
MALDA-3D7Parent-17
MALDA