# Space X Falcon 9 First Stage Landing Prediction
## Web scraping Falcon 9 and Falcon Heavy Launches Records from Wikipedia

In [1]:

# Import libraries 
import requests
from bs4 import BeautifulSoup
import pandas as pd

In [2]:
# Define the URL and get the response:

static_url = "https://en.wikipedia.org/w/index.php?title=List_of_Falcon_9_and_Falcon_Heavy_launches&oldid=1027686922"
response = requests.get(static_url)
soup = BeautifulSoup(response.text, 'html.parser')

In [3]:
# Define the functions:

def date_time(table_cells):
    """ This function returns the data and time from the HTML table cell. """
    try:
        return [data_time.strip() for data_time in list(table_cells.strings)][0:2]
    except:
        return ['Unknown', 'Unknown']  

def booster_version(table_cells):
    """ This function returns the booster version from the HTML table cell. """
    try:
        return ''.join([booster_version for i, booster_version in enumerate(table_cells.strings) if i % 2 == 0][0:-1])
    except:
        return 'Unknown'

def landing_status(table_cells):
    """ This function returns the landing status from the HTML table cell. """
    try:
        return [i for i in table_cells.strings][0]
    except:
        return 'Unknown'

def get_mass(table_cells):
    """ This function extracts the payload mass from the HTML table cell. """
    try:
        mass = table_cells.text.strip()
        if "kg" in mass:
            return mass[:mass.find("kg")+2]
        else:
            return mass
    except:
        return '0 kg'  



In [4]:
# Initialize the launch dictionary
launch_dict = {
    'Date': [],
    'Time': [],
    'Version Booster': [],
    'Launch Site': [],
    'Payload': [],
    'Payload Mass': [],
    'Orbit': [],
    'Customer': [],
    'Launch Outcome': [],
    'Booster Landing': []
}


In [5]:
# Extracting data from the table
table = soup.find('table', {'class': 'wikitable plainrowheaders collapsible'})  # Adjust class to match the table's
for row in table.find_all('tr')[1:]:  
    cells = row.find_all('td')
    if len(cells) < 9:
        continue  

    # Populate data into launch_dict
    launch_dict['Date'].append(date_time(cells[0])[0] if len(cells) > 0 else 'Unknown')
    launch_dict['Time'].append(date_time(cells[0])[1] if len(cells) > 0 else 'Unknown')
    launch_dict['Version Booster'].append(booster_version(cells[1]) if len(cells) > 1 else 'Unknown')
    launch_dict['Launch Site'].append(cells[2].text.strip() if len(cells) > 2 else 'Unknown')
    launch_dict['Payload'].append(cells[3].text.strip() if len(cells) > 3 else 'Unknown')
    launch_dict['Payload Mass'].append(get_mass(cells[4]) if len(cells) > 4 else 'Unknown')
    launch_dict['Orbit'].append(cells[5].text.strip() if len(cells) > 5 else 'Unknown')
    launch_dict['Customer'].append(cells[6].text.strip() if len(cells) > 6 else 'Unknown')
    launch_dict['Launch Outcome'].append(cells[7].text.strip() if len(cells) > 7 else 'Unknown')
    launch_dict['Booster Landing'].append(landing_status(cells[8]) if len(cells) > 8 else 'Unknown')

In [6]:
# Create DataFrame from launch_dict
df = pd.DataFrame(launch_dict)

df.head()