In [None]:
import os
import requests
from bs4 import BeautifulSoup

def fetch_geo_metadata(geo_ids):
    """Fetch metadata, GSM IDs, and SRA info for given GEO IDs."""
    base_url = "https://www.ncbi.nlm.nih.gov/geo/query/acc.cgi?acc="
    geo_data = {}

    for geo_id in geo_ids:
        print(f"Processing {geo_id}...")
        response = requests.get(base_url + geo_id)
        soup = BeautifulSoup(response.content, 'html.parser')
        
        # Extract GSM IDs
        gsm_links = soup.find_all('a', href=True)
        gsm_ids = [link.text for link in gsm_links if link.text.startswith('GSM')]
        geo_data[geo_id] = {'gsm_ids': gsm_ids}

        # Extract SRA Run information
        sra_info = []
        for gsm_id in gsm_ids:
            response = requests.get(base_url + gsm_id)
            soup = BeautifulSoup(response.content, 'html.parser')
            sra_link = soup.find('a', text='SRA Run Selector')
            if sra_link:
                sra_url = sra_link['href']
                sra_info.append(sra_url)

        geo_data[geo_id]['sra_info'] = sra_info

        # Download series matrix file
        matrix_url = f"https://ftp.ncbi.nlm.nih.gov/geo/series/{geo_id[:-3]}nnn/{geo_id}/matrix/{geo_id}_series_matrix.txt.gz"
        matrix_file = f"{geo_id}_series_matrix.txt.gz"
        response = requests.get(matrix_url, stream=True)
        if response.status_code == 200:
            with open(matrix_file, 'wb') as f:
                for chunk in response.iter_content(chunk_size=1024):
                    f.write(chunk)
            print(f"Downloaded series matrix file: {matrix_file}")
        else:
            print(f"Failed to download series matrix file for {geo_id}")

    return geo_data

if __name__ == "__main__":
    geo_ids_input = input("Enter GEO IDs (comma-separated): ")
    geo_ids = [geo_id.strip() for geo_id in geo_ids_input.split(',')]
    geo_data = fetch_geo_metadata(geo_ids)
    print("Retrieved data:", geo_data)
