In this capstone, we will predict if the Falcon 9 first stage will land successfully. SpaceX advertises Falcon 9 rocket launches on its website with a cost of 62 million dollars; other providers cost upward of 165 million dollars each, much of the savings is because SpaceX can reuse the first stage. Therefore if we can determine if the first stage will land, we can determine the cost of a launch. This information can be used if an alternate company wants to bid against SpaceX for a rocket launch. In this lab, you will collect and make sure the data is in the correct format from an API. The following is an example of a successful and launch.

In [1]:
# Importing necessary libraries
import requests           # For making HTTP requests to SpaceX API
import pandas as pd       # For data manipulation and analysis
import numpy as np        # For working with arrays and matrices
import datetime           # For handling date and time

# Setting options to display all columns and full text in the dataframe
pd.set_option('display.max_columns', None)
pd.set_option('display.max_colwidth', None)

# SpaceX API URL for retrieving rocket launch data
spacex_url = 'https://api.spacexdata.com/v4/launches/past'

# Initialize lists to store data from API responses
BoosterVersion = []
PayloadMass = []
Orbit = []
LaunchSite = []
Outcome = []
Flights = []
GridFins = []
Reused = []
Legs = []
LandingPad = []
Block = []
ReusedCount = []
Serial = []
Longitude = []
Latitude = []

def getBoosterVersion(data):
    """
    Fetches BoosterVersion from SpaceX API based on rocket ID.

    Args:
        data (dict): A dictionary containing launch data.

    Returns:
        None
    """
    for x in data['rocket']:
        if x:
            response = requests.get("https://api.spacexdata.com/v4/rockets/" + str(x)).json()
            BoosterVersion.append(response['name'])

def getLaunchSite(data):
    """
    Fetches LaunchSite details from SpaceX API based on launchpad ID.

    Args:
        data (dict): A dictionary containing launch data.

    Returns:
        None
    """
    for x in data['launchpad']:
        if x:
            response = requests.get("https://api.spacexdata.com/v4/launchpads/" + str(x)).json()
            Longitude.append(response['longitude'])
            Latitude.append(response['latitude'])
            LaunchSite.append(response['name'])

def getPayloadData(data):
    """
    Fetches Payload data from SpaceX API based on payload ID.

    Args:
        data (dict): A dictionary containing launch data.

    Returns:
        None
    """
    for load in data['payloads']:
        if load:
            response = requests.get("https://api.spacexdata.com/v4/payloads/" + load).json()
            PayloadMass.append(response['mass_kg'])
            Orbit.append(response['orbit'])

def getCoreData(data):
    """
    Fetches Core data from SpaceX API based on core ID.

    Args:
        data (dict): A dictionary containing launch data.

    Returns:
        None
    """
    for core in data['cores']:
        if core['core'] is not None:
            response = requests.get("https://api.spacexdata.com/v4/cores/" + core['core']).json()
            Block.append(response['block'])
            ReusedCount.append(response['reuse_count'])
            Serial.append(response['serial'])
        else:
            Block.append(None)
            ReusedCount.append(None)
            Serial.append(None)
        Outcome.append(str(core['landing_success']) + ' ' + str(core['landing_type']))
        Flights.append(core['flight'])
        GridFins.append(core['gridfins'])
        Reused.append(core['reused'])
        Legs.append(core['legs'])
        LandingPad.append(core['landpad'])

# Sending a request to SpaceX API to retrieve launch data
response = requests.get(spacex_url)

# Checking the status code and content of the response
print(response.status_code)
# print(response.content)

# Converting the JSON response into a Pandas DataFrame
data = pd.json_normalize(response.json())

# Displaying the first few rows of the DataFrame
data.head()

# Selecting only the relevant columns from the DataFrame
data = data[['rocket', 'payloads', 'launchpad', 'cores', 'flight_number', 'date_utc']]

# Filtering rows with only one core and one payload
data = data[data['cores'].map(len) == 1]
data = data[data['payloads'].map(len) == 1]

# Extracting single values from lists in 'cores' and 'payloads' columns
data['cores'] = data['cores'].map(lambda x: x[0])
data['payloads'] = data['payloads'].map(lambda x: x[0])

# Converting 'date_utc' to datetime format and extracting the date part
data['date'] = pd.to_datetime(data['date_utc']).dt.date

# Filtering the data to include launches up to November 13, 2020
data = data[data['date'] <= datetime.date(2020, 11, 13)]

# Creating a dictionary to store launch data
launch_dict = {
    'FlightNumber': list(data['flight_number']),
    'Date': list(data['date']),
    'BoosterVersion': BoosterVersion,
    'PayloadMass': PayloadMass,
    'Orbit': Orbit,
    'LaunchSite': LaunchSite,
    'Outcome': Outcome,
    'Flights': Flights,
    'GridFins': GridFins,
    'Reused': Reused,
    'Legs': Legs,
    'LandingPad': LandingPad,
    'Block': Block,
    'ReusedCount': ReusedCount,
    'Serial': Serial,
    'Longitude': Longitude,
    'Latitude': Latitude
}

# Call created functions
getBoosterVersion(data)
getPayloadData(data)
getCoreData(data)
getLaunchSite(data)

# Creating a new DataFrame from the launch_dict
data = pd.DataFrame(launch_dict)

# Displaying the DataFrame
data.head()

# Showing summary statistics of the DataFrame
data.describe()

# Filtering for Falcon 9 rockets and resetting the flight number column
data_falcon9 = data[data['BoosterVersion']!='Falcon 1']
data_falcon9.loc[:,'FlightNumber'] = list(range(1, data_falcon9.shape[0]+1))

# Inputting info for PayloadMass using mean
data_falcon9['PayloadMass'].fillna(int(data_falcon9['PayloadMass'].mean()), inplace=True)
data_falcon9.isnull().sum()

# Saving data to CSV
data_falcon9.to_csv('dataset_part_1.csv', index=False)


200


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  data_falcon9['PayloadMass'].fillna(int(data_falcon9['PayloadMass'].mean()), inplace=True)
