# Gunter's Space Page GSP Scraper

In [1]:
import bs4
import requests
import pandas as pd
import os

In [2]:
#url = "https://space.skyrocket.de/doc_sdat/globalstar.htm"
oneweblist = [
    "https://space.skyrocket.de/doc_sdat/oneweb.htm"
]

starlinklist = [
    "https://space.skyrocket.de/doc_sdat/starlink-v0-9.htm",
    "https://space.skyrocket.de/doc_sdat/starlink-v1-0.htm",
    "https://space.skyrocket.de/doc_sdat/starlink-v1-5.htm",
    "https://space.skyrocket.de/doc_sdat/starlink-v2-mini.htm"
]

globalstarlist = [
    "https://space.skyrocket.de/doc_sdat/globalstar.htm",
    "https://space.skyrocket.de/doc_sdat/globalstar-2.htm"
]

iridiumlist = [
    "https://space.skyrocket.de/doc_sdat/iridium.htm",
    "https://space.skyrocket.de/doc_sdat/iridium-next.htm"
]

orbcommlist = [
    "https://space.skyrocket.de/doc_sdat/orbcomm.htm",
    "https://space.skyrocket.de/doc_sdat/orbcomm-ql.htm",
    "https://space.skyrocket.de/doc_sdat/orbcomm-2.htm"
]

swarmlist = [
    "https://space.skyrocket.de/doc_sdat/spacebee.htm",
    "https://space.skyrocket.de/doc_sdat/spacebee-5.htm",
    "https://space.skyrocket.de/doc_sdat/spacebee-10.htm"
]

In [3]:
def scrape_url(url):
    """
    
    """
    response = requests.get(url)
    soup = bs4.BeautifulSoup(response.text)

    satdata = soup.find("table", {"id": "satdata"})
    satlist = soup.find("table", {"id": "satlist"})

    def parse_row(trrow):
        tdlist = trrow.find_all("td")
        namelist = [td.text for td in tdlist]
        return namelist

    satlist_dict = {
        "Satellite": [],
        "COSPAR": [],
        "Date": [],
        "LS": [],
        "Failed": [],
        "Launche Vehicle": [],
        "Remarks": []
    }

    for trrow in satlist.find_all("tr")[1:]:
        #if
        namelist = parse_row(trrow)
        datestr = namelist[2].lower()
        if(datestr == "cancelled" or datestr == "2023" or datestr == "not launched"):
            continue
        try:
            date = pd.to_datetime(datestr, format="%d.%m.%Y")
        except:
            continue
        satlist_dict["Satellite"].append(namelist[0])
        satlist_dict["COSPAR"].append(namelist[1])
        satlist_dict["Date"].append(date)
        satlist_dict["LS"].append(namelist[3])
        satlist_dict["Failed"].append(namelist[4])
        satlist_dict["Launche Vehicle"].append(namelist[5])
        satlist_dict["Remarks"].append(namelist[6])

    df = pd.DataFrame(satlist_dict)
    return df

def scrape_urllist(urllist):
    """
    Takes a list of Gunter's Space Page urls to scrape from
    
    Returns dataframe of all the satellites found
    """
    dflist = [scrape_url(url) for url in starlinklist]
    return pd.concat(dflist)

## Starlink Specific Stuff

In [4]:
df = scrape_urllist(starlinklist)

In [5]:
for a in df["COSPAR"]:
    print(a)

2019-029A
2019-029B
2019-029C
2019-029D
2019-029E
2019-029F
2019-029G
2019-029H
2019-029J
2019-029K
2019-029L
2019-029M
2019-029N
2019-029P
2019-029Q
2019-029R
2019-029S
2019-029T
2019-029U
2019-029V
2019-029W
2019-029X
2019-029Y
2019-029Z
2019-029AA
2019-029AB
2019-029AC
2019-029AD
2019-029AE
2019-029AF
2019-029AG
2019-029AH
2019-029AJ
2019-029AK
2019-029AL
2019-029AM
2019-029AN
2019-029AP
2019-029AQ
2019-029AR
2019-029AS
2019-029AT
2019-029AU
2019-029AV
2019-029AW
2019-029AX
2019-029AY
2019-029AZ
2019-029BA
2019-029BB
2019-029BC
2019-029BD
2019-029BE
2019-029BF
2019-029BG
2019-029BH
2019-029BJ
2019-029BK
2019-029BL
2019-029BM
2019-074A
2019-074B
2019-074C
2019-074D
2019-074E
2019-074F
2019-074G
2019-074H
2019-074J
2019-074K
2019-074L
2019-074M
2019-074N
2019-074P
2019-074Q
2019-074R
2019-074S
2019-074T
2019-074U
2019-074V
2019-074W
2019-074X
2019-074Y
2019-074Z
2019-074AA
2019-074AB
2019-074AC
2019-074AD
2019-074AE
2019-074AF
2019-074AG
2019-074AH
2019-074AJ
2019-074AK
2019-074AL
201

In [115]:
df["Revision"] = df["Satellite"].apply(lambda sat: sat.split(" ")[1])

def parse_starlinknum(s):
    try:
        sname = s.split("(")[1].split(")")[0]
        snum_str = sname.split(" ")[1]
        if ',' in snum_str:
            snum_str = snum_str.split(",")[0] 
        snum = int(snum_str)
    except:
        print(s)
        return -1
    return snum

def parse_group(s):
    try:
        gname = s.split(" ")[2][:2]
        if(gname == "L1"):
            gname = "G1"
    except:
        print("asd",s)
        return -1
    return gname

In [116]:
df["STARLINK_NUMBER"] = df["Satellite"].apply(parse_starlinknum)
df["GROUP"] = df["Satellite"].apply(parse_group)

Starlink v1.5 G3-5-1
Starlink v1.5 G3-5-2
Starlink v1.5 G3-5-3
Starlink v1.5 G3-5-4
Starlink v1.5 G3-5-5
Starlink v1.5 G3-5-6
Starlink v1.5 G3-5-7
Starlink v1.5 G3-5-8
Starlink v1.5 G3-5-9
Starlink v1.5 G3-5-10
Starlink v1.5 G3-5-11
Starlink v1.5 G3-5-12
Starlink v1.5 G3-5-13
Starlink v1.5 G3-5-14
Starlink v1.5 G3-5-15
Starlink v1.5 G3-5-16
Starlink v1.5 G3-5-17
Starlink v1.5 G3-5-18
Starlink v1.5 G3-5-19
Starlink v1.5 G3-5-20
Starlink v1.5 G3-5-21
Starlink v1.5 G3-5-22
Starlink v1.5 G3-5-23
Starlink v1.5 G3-5-24
Starlink v1.5 G3-5-25
Starlink v1.5 G3-5-26
Starlink v1.5 G3-5-27
Starlink v1.5 G3-5-28
Starlink v1.5 G3-5-29
Starlink v1.5 G3-5-30
Starlink v1.5 G3-5-31
Starlink v1.5 G3-5-32
Starlink v1.5 G3-5-33
Starlink v1.5 G3-5-34
Starlink v1.5 G3-5-35
Starlink v1.5 G3-5-36
Starlink v1.5 G3-5-37
Starlink v1.5 G3-5-38
Starlink v1.5 G3-5-39
Starlink v1.5 G3-5-40
Starlink v1.5 G3-5-41
Starlink v1.5 G3-5-42
Starlink v1.5 G3-5-43
Starlink v1.5 G3-5-44
Starlink v1.5 G3-5-45
Starlink v1.5 G3-5-

In [117]:
#launch_dates_df = pd.DataFrame(list(set([rd for rd in zip(df["Revision"], df["Date"])])), columns=["Revision", "LaunchDate"])
df.to_csv("data/gsp_scraped_starlink.csv")

In [118]:
df

Unnamed: 0,Satellite,COSPAR,Date,LS,Failed,Launche Vehicle,Remarks,Revision,STARLINK_NUMBER,GROUP
0,Starlink v0.9 L1-1 (Starlink 31),2019-029A,2019-05-24,CC SLC-40,,Falcon-9 v1.2 (Block 5),"with Starlink v0.9 L1-1, ..., L1-60",v0.9,31,G1
1,Starlink v0.9 L1-2 (Starlink 22),2019-029B,2019-05-24,CC SLC-40,,Falcon-9 v1.2 (Block 5),"with Starlink v0.9 L1-1, ..., L1-60",v0.9,22,G1
2,Starlink v0.9 L1-3 (Starlink 23),2019-029C,2019-05-24,CC SLC-40,,Falcon-9 v1.2 (Block 5),"with Starlink v0.9 L1-1, ..., L1-60",v0.9,23,G1
3,Starlink v0.9 L1-4 (Starlink 24),2019-029D,2019-05-24,CC SLC-40,,Falcon-9 v1.2 (Block 5),"with Starlink v0.9 L1-1, ..., L1-60",v0.9,24,G1
4,Starlink v0.9 L1-5 (Starlink 25),2019-029E,2019-05-24,CC SLC-40,,Falcon-9 v1.2 (Block 5),"with Starlink v0.9 L1-1, ..., L1-60",v0.9,25,G1
...,...,...,...,...,...,...,...,...,...,...
37,Starlink v2-Mini G6-2-17 (Starlink 30111),2023-056S,2023-04-19,CC SLC-40,,Falcon-9 v1.2 (Block 5),"with Starlink v2-Mini G6-2-1, ..., G6-2-21",v2-Mini,30111,G6
38,Starlink v2-Mini G6-2-18 (Starlink 30097),2023-056T,2023-04-19,CC SLC-40,,Falcon-9 v1.2 (Block 5),"with Starlink v2-Mini G6-2-1, ..., G6-2-21",v2-Mini,30097,G6
39,Starlink v2-Mini G6-2-19 (Starlink 30099),2023-056U,2023-04-19,CC SLC-40,,Falcon-9 v1.2 (Block 5),"with Starlink v2-Mini G6-2-1, ..., G6-2-21",v2-Mini,30099,G6
40,Starlink v2-Mini G6-2-20 (Starlink 30094),2023-056V,2023-04-19,CC SLC-40,,Falcon-9 v1.2 (Block 5),"with Starlink v2-Mini G6-2-1, ..., G6-2-21",v2-Mini,30094,G6
