In [28]:
# Objectives
# In this lab, you will make a get request to the SpaceX API. You will also do some basic data wrangling and formating.

# Request to the SpaceX API
# Clean the requested data

# Requests allows us to make HTTP requests which we will use to get data from an API
import requests
# Pandas is a software library written for the Python programming language for data manipulation and analysis.
import pandas as pd
# NumPy is a library for the Python programming language, adding support for large, multi-dimensional arrays and matrices, along with a large collection of high-level mathematical functions to operate on these arrays
import numpy as np
# Datetime is a library that allows us to represent dates
import datetime

# Setting this option will print all collumns of a dataframe
pd.set_option('display.max_columns', None)
# Setting this option will print all of the data in a feature
pd.set_option('display.max_colwidth', None)

# Takes the dataset and uses the rocket column to call the API and append the data to the list
def getBoosterVersion(data):
    for x in data['rocket']:
       if x:
        response = requests.get("https://api.spacexdata.com/v4/rockets/"+str(x)).json()
        BoosterVersion.append(response['name'])

# Takes the dataset and uses the launchpad column to call the API and append the data to the list
def getLaunchSite(data):
    for x in data['launchpad']:
       if x:
         response = requests.get("https://api.spacexdata.com/v4/launchpads/"+str(x)).json()
         Longitude.append(response['longitude'])
         Latitude.append(response['latitude'])
         LaunchSite.append(response['name'])
        
# Takes the dataset and uses the payloads column to call the API and append the data to the lists
def getPayloadData(data):
    for load in data['payloads']:
       if load:
        response = requests.get("https://api.spacexdata.com/v4/payloads/"+load).json()
        PayloadMass.append(response['mass_kg'])
        Orbit.append(response['orbit'])
        
# Takes the dataset and uses the cores column to call the API and append the data to the lists
def getCoreData(data):
    for core in data['cores']:
            if core['core'] != None:
                response = requests.get("https://api.spacexdata.com/v4/cores/"+core['core']).json()
                Block.append(response['block'])
                ReusedCount.append(response['reuse_count'])
                Serial.append(response['serial'])
            else:
                Block.append(None)
                ReusedCount.append(None)
                Serial.append(None)
            Outcome.append(str(core['landing_success'])+' '+str(core['landing_type']))
            Flights.append(core['flight'])
            GridFins.append(core['gridfins'])
            Reused.append(core['reused'])
            Legs.append(core['legs'])
            LandingPad.append(core['landpad'])

#Now let's start requesting rocket launch data from SpaceX API with the following URL:

spacex_url="https://api.spacexdata.com/v4/launches/past"
response = requests.get(spacex_url)
#print(response)

In [29]:
#Task 1: Request and parse the SpaceX launch data using the GET request

#To make the requested JSON results more consistent, we will use the following static response object for this project:
#static_json_url='https://cf-courses-data.s3.us.cloud-object-storage.appdomain.cloud/IBM-DS0321EN-SkillsNetwork/datasets/API_call_spacex_api.json'

##We should see that the request was successfull with the 200 status response code
#response.status_code

# Check if the request was successful (status code 200)
if response.status_code == 200:
    # Decode the JSON response
    data_json = response.json()

    # Convert JSON data to Pandas DataFrame using json_normalize
    data = pd.json_normalize(data_json)

    # Display the DataFrame
    #print(df.head(5))

#     # Lets take a subset of our dataframe keeping only the features we want and the flight number, and date_utc.
    data = data[['rocket', 'payloads', 'launchpad', 'cores', 'flight_number', 'date_utc']]

#     # We will remove rows with multiple cores because those are falcon rockets with 2 extra rocket boosters and rows that have multiple payloads in a single rocket.
    data = data[data['cores'].map(len)==1]
    data = data[data['payloads'].map(len)==1]

#     # Since payloads and cores are lists of size 1 we will also extract the single value in the list and replace the feature.
    data['cores'] = data['cores'].map(lambda x : x[0])
    data['payloads'] = data['payloads'].map(lambda x : x[0])

#     # We also want to convert the date_utc to a datetime datatype and then extracting the date leaving the time
    data['date'] = pd.to_datetime(data['date_utc']).dt.date

#     # Using the date we will restrict the dates of the launches
    data = data[data['date'] <= datetime.date(2020, 11, 13)]

    #The data from these requests will be stored in lists and will be used to create a new dataframe.
    #Global variables 
    BoosterVersion = []
    PayloadMass = []
    Orbit = []
    LaunchSite = []
    Outcome = []
    Flights = []
    GridFins = []
    Reused = []
    Legs = []
    LandingPad = []
    Block = []
    ReusedCount = []
    Serial = []
    Longitude = []
    Latitude = []

    BoosterVersion

    # Call getBoosterVersion
    getBoosterVersion(data)

    BoosterVersion[0:5]
    #we can apply the rest of the functions here:

    # Call getLaunchSite
    getLaunchSite(data)
    # Call getPayloadData
    getPayloadData(data)
    # Call getCoreData
    getCoreData(data)

    #Finally lets construct our dataset using the data we have obtained. We we combine the columns into a dictionary.

    launch_dict = {'FlightNumber': list(data['flight_number']),
    'Date': list(data['date']),
    'BoosterVersion':BoosterVersion,
    'PayloadMass':PayloadMass,
    'Orbit':Orbit,
    'LaunchSite':LaunchSite,
    'Outcome':Outcome,
    'Flights':Flights,
    'GridFins':GridFins,
    'Reused':Reused,
    'Legs':Legs,
    'LandingPad':LandingPad,
    'Block':Block,
    'ReusedCount':ReusedCount,
    'Serial':Serial,
    'Longitude': Longitude,
    'Latitude': Latitude}

    # Create a data from launch_dict
    launch_df = pd.DataFrame(launch_dict)
    
    #Show the summary of the dataframe
    print(launch_df.head())

else:
    print("Error:", response.status_code)


   FlightNumber        Date BoosterVersion  PayloadMass Orbit  \
0             1  2006-03-24       Falcon 1         20.0   LEO   
1             2  2007-03-21       Falcon 1          NaN   LEO   
2             4  2008-09-28       Falcon 1        165.0   LEO   
3             5  2009-07-13       Falcon 1        200.0   LEO   
4             6  2010-06-04       Falcon 9          NaN   LEO   

        LaunchSite    Outcome  Flights  GridFins  Reused   Legs LandingPad  \
0  Kwajalein Atoll  None None        1     False   False  False       None   
1  Kwajalein Atoll  None None        1     False   False  False       None   
2  Kwajalein Atoll  None None        1     False   False  False       None   
3  Kwajalein Atoll  None None        1     False   False  False       None   
4     CCSFS SLC 40  None None        1     False   False  False       None   

   Block  ReusedCount    Serial   Longitude   Latitude  
0    NaN            0  Merlin1A  167.743129   9.047721  
1    NaN            0  Mer

In [30]:
#Task 2: Filter the dataframe to only include Falcon 9 launches
#Finally we will remove the Falcon 1 launches keeping only the Falcon 9 launches. Filter the data dataframe using the BoosterVersion column to only keep the Falcon 9 launches. Save the filtered data to a new dataframe called data_falcon9

# Filter the DataFrame to only include Falcon 9 launches
data_falcon9 = launch_df[launch_df['BoosterVersion'].str.contains('Falcon 9')]

# Reset the FlightNumber column after filtering
data_falcon9.reset_index(drop=True, inplace=True)

# Print the head of the filtered DataFrame to verify the results
print(data_falcon9.head())

#Data Wrangling
#We can see below that some of the rows are missing values in our dataset
data_falcon9.isnull().sum()

   FlightNumber        Date BoosterVersion  PayloadMass Orbit    LaunchSite  \
0             6  2010-06-04       Falcon 9          NaN   LEO  CCSFS SLC 40   
1             8  2012-05-22       Falcon 9        525.0   LEO  CCSFS SLC 40   
2            10  2013-03-01       Falcon 9        677.0   ISS  CCSFS SLC 40   
3            11  2013-09-29       Falcon 9        500.0    PO   VAFB SLC 4E   
4            12  2013-12-03       Falcon 9       3170.0   GTO  CCSFS SLC 40   

       Outcome  Flights  GridFins  Reused   Legs LandingPad  Block  \
0    None None        1     False   False  False       None    1.0   
1    None None        1     False   False  False       None    1.0   
2    None None        1     False   False  False       None    1.0   
3  False Ocean        1     False   False  False       None    1.0   
4    None None        1     False   False  False       None    1.0   

   ReusedCount Serial   Longitude   Latitude  
0            0  B0003  -80.577366  28.561857  
1         

FlightNumber       0
Date               0
BoosterVersion     0
PayloadMass        5
Orbit              0
LaunchSite         0
Outcome            0
Flights            0
GridFins           0
Reused             0
Legs               0
LandingPad        26
Block              0
ReusedCount        0
Serial             0
Longitude          0
Latitude           0
dtype: int64

In [31]:
# Task 3: Dealing with Missing Values
# Calculate below the mean for the PayloadMass using the .mean(). Then use the mean and the .replace() function to replace np.nan values in the data with the mean you calculated.

# Calculate the mean for the PayloadMass column
payload_mean = data_falcon9['PayloadMass'].mean()

# Replace NaN values in the PayloadMass column with the mean
data_falcon9['PayloadMass'].replace(np.nan, payload_mean, inplace=True)

# Print the DataFrame to verify the replacement
print(data_falcon9)

print(data_falcon9['static_fire_date_utc'])

#data_falcon9.to_csv('dataset_part_1.csv', index=False)

    FlightNumber        Date BoosterVersion   PayloadMass Orbit    LaunchSite  \
0              6  2010-06-04       Falcon 9   6123.547647   LEO  CCSFS SLC 40   
1              8  2012-05-22       Falcon 9    525.000000   LEO  CCSFS SLC 40   
2             10  2013-03-01       Falcon 9    677.000000   ISS  CCSFS SLC 40   
3             11  2013-09-29       Falcon 9    500.000000    PO   VAFB SLC 4E   
4             12  2013-12-03       Falcon 9   3170.000000   GTO  CCSFS SLC 40   
..           ...         ...            ...           ...   ...           ...   
85           102  2020-09-03       Falcon 9  15600.000000  VLEO    KSC LC 39A   
86           103  2020-10-06       Falcon 9  15600.000000  VLEO    KSC LC 39A   
87           104  2020-10-18       Falcon 9  15600.000000  VLEO    KSC LC 39A   
88           105  2020-10-24       Falcon 9  15600.000000  VLEO  CCSFS SLC 40   
89           106  2020-11-05       Falcon 9   3681.000000   MEO  CCSFS SLC 40   

        Outcome  Flights  G

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  return self._update_inplace(result)


KeyError: 'static_fire_date_utc'

In [32]:
# Filter for Falcon 9 launches
data_falcon9 = data[data['rocket'] == "Falcon 9"]

# Count Falcon 9 launches
number_of_falcon9_launches = data_falcon9.shape[0]

print(f"Number of Falcon 9 launches: {number_of_falcon9_launches}")


Number of Falcon 9 launches: 0


In [33]:
number_of_missing_landingpad_values = data['landingPad'].isnull().sum()

print(f"Number of missing values in 'landingPad' column: {number_of_missing_landingpad_values}")


KeyError: 'landingPad'