In [3]:
import requests
from bs4 import BeautifulSoup
import pandas as pd

In [4]:
ulr="https://en.wikipedia.org/wiki/List_of_Falcon_9_and_Falcon_Heavy_launches"
response=requests.get(ulr)

In [5]:
bs=BeautifulSoup(response.text,"html5lib")

In [6]:
my_table=bs.find('table',{'id':'2023ytd'})

In [7]:
column_names = []
headers=my_table.find_all('th')
for col in headers:
    col_name=col.get_text().strip()
    if col_name is not None and len(col_name)>0 and not(col_name.isdigit()):
        column_names.append(col_name)

In [8]:
 column_names

['Flight No.',
 'Date andtime (UTC)',
 'Version,booster[f]',
 'Launchsite',
 'Payload[g]',
 'Payload mass',
 'Orbit',
 'Customer',
 'Launchoutcome',
 'Boosterlanding',
 'FH 5',
 'FH 6',
 'FH 7',
 'FH 8',
 'FH 9']

In [9]:
cell_columns=[x for x in column_names if x not in ['Flight No.']]
cell_columns

['Date andtime (UTC)',
 'Version,booster[f]',
 'Launchsite',
 'Payload[g]',
 'Payload mass',
 'Orbit',
 'Customer',
 'Launchoutcome',
 'Boosterlanding',
 'FH 5',
 'FH 6',
 'FH 7',
 'FH 8',
 'FH 9']

In [10]:
launch_dict= dict.fromkeys(column_names)
launch_dict={col: [] for col in column_names}
launch_dict

{'Flight No.': [],
 'Date andtime (UTC)': [],
 'Version,booster[f]': [],
 'Launchsite': [],
 'Payload[g]': [],
 'Payload mass': [],
 'Orbit': [],
 'Customer': [],
 'Launchoutcome': [],
 'Boosterlanding': [],
 'FH 5': [],
 'FH 6': [],
 'FH 7': [],
 'FH 8': [],
 'FH 9': []}

In [11]:
rows=my_table.find_all('tr')

In [12]:
for row in rows:
    cells=row.find_all('td')
    if row.th and row.th.get_text().strip()!='Flight No.':
        launch_dict['Flight No.'].append(row.th.get_text().strip())
        for col,cell in zip(cell_columns,cells):
            launch_dict[col].append(cell.get_text().strip())

In [14]:
launch_dict = {key: value for key, value in launch_dict.items() if len(value) > 0}

In [15]:
df=pd.DataFrame(launch_dict)

In [16]:
df

Unnamed: 0,Flight No.,Date andtime (UTC),"Version,booster[f]",Launchsite,Payload[g],Payload mass,Orbit,Customer,Launchoutcome,Boosterlanding
0,195,"January 3, 202314:56[17]",F9 B5B1060.15,"Cape Canaveral, SLC‑40",Transporter-6 (115 payload smallsat rideshare),Unknown[h],SSO,Various,Success,Success (LZ‑1)
1,196,"January 10, 202304:50[23]",F9 B5B1076.2,"Cape Canaveral, SLC‑40",OneWeb 16 (40 satellites),"6,000 kg (13,000 lb)",Polar LEO,OneWeb,Success,Success (LZ‑1)
2,FH 5,"January 15, 202322:56[29]",Falcon Heavy B5B1070 (core),"Kennedy, LC‑39A",USSF-67 (CBAS-2 & LDPE-3A),"~3,750 kg (8,270 lb)",GEO,USSF,Success,No attempt
3,197,"January 18, 202312:24[33]",F9 B5B1077.2,"Cape Canaveral, SLC‑40",USA-343 (GPS-III SV06),"4,352 kg (9,595 lb)",MEO,USSF,Success,Success (JRTI)
4,198,"January 19, 202315:43[39]",F9 B5B1075.1,"Vandenberg, SLC‑4E",Starlink: Group 2-4 (51 satellites),"15,000 kg (33,000 lb)",LEO,SpaceX,Success,Success (OCISLY)
...,...,...,...,...,...,...,...,...,...,...
91,282,"December 19, 202304:01[207]",F9 B5B1081.3,"Cape Canaveral, SLC‑40",Starlink: Group 6-34 (23 satellites),"~16,800 kg (37,000 lb)",LEO,SpaceX,Success,Success (ASOG)
92,283,"December 23, 202305:33[208]",F9 B5B1058.19,"Cape Canaveral, SLC‑40",Starlink: Group 6-32 (23 satellites),"~16,800 kg (37,000 lb)",LEO,SpaceX,Success,Partial failure (JRTI)
93,284,"December 24, 202313:11[210]",F9 B5B1075.8,"Vandenberg, SLC‑4E",SARah 2 & 3[211],"~3,600 kg (7,900 lb)",SSO,German Intelligence Service,Success,Success (LZ‑4)
94,FH 9,"December 29, 202301:07[213]",Falcon Heavy B5 B1084 (core)[214],"Kennedy, LC‑39A",USSF-52 (Boeing X-37B OTV-7),"6,350 kg (14,000 lb)+ OTV payload",High Elliptical HEO[215],Department of the Air Force RapidCapabilities ...,Success,No attempt


In [17]:
df[['Date','Time']]=df['Date andtime (UTC)'].str.extract(r'^(.*?\d{4})(\d{2}:\d{2})')

In [19]:
df['Payload mass(Kg)']=df['Payload mass'].str.extract(r'~?([\d,]+)\s*kg')[0].str.replace(',','',regex=False).astype(float)

In [22]:
df.drop(['Date andtime (UTC)','Payload mass'],axis=1,inplace=True)

In [23]:
df

Unnamed: 0,Flight No.,"Version,booster[f]",Launchsite,Payload[g],Orbit,Customer,Launchoutcome,Boosterlanding,Date,Time,Payload mass(Kg)
0,195,F9 B5B1060.15,"Cape Canaveral, SLC‑40",Transporter-6 (115 payload smallsat rideshare),SSO,Various,Success,Success (LZ‑1),"January 3, 2023",14:56,
1,196,F9 B5B1076.2,"Cape Canaveral, SLC‑40",OneWeb 16 (40 satellites),Polar LEO,OneWeb,Success,Success (LZ‑1),"January 10, 2023",04:50,6000.0
2,FH 5,Falcon Heavy B5B1070 (core),"Kennedy, LC‑39A",USSF-67 (CBAS-2 & LDPE-3A),GEO,USSF,Success,No attempt,"January 15, 2023",22:56,3750.0
3,197,F9 B5B1077.2,"Cape Canaveral, SLC‑40",USA-343 (GPS-III SV06),MEO,USSF,Success,Success (JRTI),"January 18, 2023",12:24,4352.0
4,198,F9 B5B1075.1,"Vandenberg, SLC‑4E",Starlink: Group 2-4 (51 satellites),LEO,SpaceX,Success,Success (OCISLY),"January 19, 2023",15:43,15000.0
...,...,...,...,...,...,...,...,...,...,...,...
91,282,F9 B5B1081.3,"Cape Canaveral, SLC‑40",Starlink: Group 6-34 (23 satellites),LEO,SpaceX,Success,Success (ASOG),"December 19, 2023",04:01,16800.0
92,283,F9 B5B1058.19,"Cape Canaveral, SLC‑40",Starlink: Group 6-32 (23 satellites),LEO,SpaceX,Success,Partial failure (JRTI),"December 23, 2023",05:33,16800.0
93,284,F9 B5B1075.8,"Vandenberg, SLC‑4E",SARah 2 & 3[211],SSO,German Intelligence Service,Success,Success (LZ‑4),"December 24, 2023",13:11,3600.0
94,FH 9,Falcon Heavy B5 B1084 (core)[214],"Kennedy, LC‑39A",USSF-52 (Boeing X-37B OTV-7),High Elliptical HEO[215],Department of the Air Force RapidCapabilities ...,Success,No attempt,"December 29, 2023",01:07,6350.0


In [24]:
df.to_csv('spacex_web_scraped.csv',index=False)