# Analyzing SpaceX Falcon 9 Launch Data



![](https://cf-courses-data.s3.us.cloud-object-storage.appdomain.cloud/IBMDeveloperSkillsNetwork-DS0701EN-SkillsNetwork/api/Images/landing_1.gif)

![](https://cf-courses-data.s3.us.cloud-object-storage.appdomain.cloud/IBMDeveloperSkillsNetwork-DS0701EN-SkillsNetwork/api/Images/crash.gif)



# Objectives :

### - Retrieve and process SpaceX launch data from the SpaceX API using Python's requests library.
### - Extract relevant information such as booster version, payload mass, launch site, landing outcome, etc., from the retrieved data.
### - Clean and preprocess the data, handling missing values and formatting issues.
### - Filter the dataset to include only Falcon 9 launches and reset the flight numbers accordingly.
### - Export the processed data to a CSV file for further analysis.






#### we will predict if the Falcon 9 first stage will land successfully. SpaceX advertises Falcon 9 rocket launches on its website with a cost of 62 million dollars; other providers cost upward of 165 million dollars each, much of the savings is because SpaceX can reuse the first stage. Therefore if we can determine if the first stage will land, we can determine the cost of a launch. This information can be used if an alternate company wants to bid against SpaceX for a rocket launch. In this lab, you will collect and make sure the data is in the correct format from an API. The following is an example of a successful and launch.

In [34]:
# Requests allows us to make HTTP requests which we will use to get data from an API
import requests
# Pandas is a software library written for the Python programming language for data manipulation and analysis.
import pandas as pd
# NumPy is a library for the Python programming language, adding support for large, multi-dimensional arrays and matrices, along with a large collection of high-level mathematical functions to operate on these arrays
import numpy as np
# Datetime is a library that allows us to represent dates
import datetime

# Setting this option will print all collumns of a dataframe
pd.set_option('display.max_columns', None)
# Setting this option will print all of the data in a feature
pd.set_option('display.max_colwidth', None)

In [35]:

# Takes the dataset and uses the rocket column to call the API and append the data to the list
def getBoosterVersion(data):
    for x in data['rocket']:
       if x:
        response = requests.get("https://api.spacexdata.com/v4/rockets/"+str(x)).json()
        BoosterVersion.append(response['name'])


In [36]:
# Takes the dataset and uses the launchpad column to call the API and append the data to the list
def getLaunchSite(data):
    for x in data['launchpad']:
       if x:
         response = requests.get("https://api.spacexdata.com/v4/launchpads/"+str(x)).json()
         Longitude.append(response['longitude'])
         Latitude.append(response['latitude'])
         LaunchSite.append(response['name'])


In [37]:
# Takes the dataset and uses the payloads column to call the API and append the data to the lists
def getPayloadData(data):
    for load in data['payloads']:
       if load:
        response = requests.get("https://api.spacexdata.com/v4/payloads/"+load).json()
        PayloadMass.append(response['mass_kg'])
        Orbit.append(response['orbit'])


In [38]:
# Takes the dataset and uses the cores column to call the API and append the data to the lists
def getCoreData(data):
    for core in data['cores']:
            if core['core'] != None:
                response = requests.get("https://api.spacexdata.com/v4/cores/"+core['core']).json()
                Block.append(response['block'])
                ReusedCount.append(response['reuse_count'])
                Serial.append(response['serial'])
            else:
                Block.append(None)
                ReusedCount.append(None)
                Serial.append(None)
            Outcome.append(str(core['landing_success'])+' '+str(core['landing_type']))
            Flights.append(core['flight'])
            GridFins.append(core['gridfins'])
            Reused.append(core['reused'])
            Legs.append(core['legs'])
            LandingPad.append(core['landpad'])


In [39]:
# Now let's start requesting rocket launch data from SpaceX API with the following URL:
spacex_url="https://api.spacexdata.com/v4/launches/past"

response = requests.get(spacex_url)

In [40]:
# To make the requested JSON results more consistent, we will use the following static response object for this project:
static_json_url='https://cf-courses-data.s3.us.cloud-object-storage.appdomain.cloud/IBM-DS0321EN-SkillsNetwork/datasets/API_call_spacex_api.json'

# We should see that the request was successfull with the 200 status response code
response.status_code


200

In [41]:
# Now we decode the response content as a Json using <code>.json()</code> and turn it into a Pandas dataframe using <code>.json_normalize()</code>
# Use json_normalize meethod to convert the json result into a dataframe
from pandas import json_normalize
json_data = response.json()
data = json_normalize(json_data)

In [None]:
# Lets take a subset of our dataframe keeping only the features we want and the flight number, and date_utc.
data = data[['rocket', 'payloads', 'launchpad', 'cores', 'flight_number', 'date_utc']]

# We will remove rows with multiple cores because those are falcon rockets with 2 extra rocket boosters and rows that have multiple payloads in a single rocket.
data = data[data['cores'].map(len)==1]
data = data[data['payloads'].map(len)==1]

# Since payloads and cores are lists of size 1 we will also extract the single value in the list and replace the feature.
data['cores'] = data['cores'].map(lambda x : x[0])
data['payloads'] = data['payloads'].map(lambda x : x[0])

# We also want to convert the date_utc to a datetime datatype and then extracting the date leaving the time
data['date'] = pd.to_datetime(data['date_utc']).dt.date

# Using the date we will restrict the dates of the launches
data = data[data['date'] <= datetime.date(2020, 11, 13)]


In [44]:

#Global variables 
BoosterVersion = []
PayloadMass = []
Orbit = []
LaunchSite = []
Outcome = []
Flights = []
GridFins = []
Reused = []
Legs = []
LandingPad = []
Block = []
ReusedCount = []
Serial = []
Longitude = []
Latitude = []


In [46]:
# Call getBoosterVersion
getBoosterVersion(data)

In [47]:
# the list has now been update 
BoosterVersion

['Falcon 1',
 'Falcon 1',
 'Falcon 1',
 'Falcon 1',
 'Falcon 1',
 'Falcon 9',
 'Falcon 9',
 'Falcon 9',
 'Falcon 9',
 'Falcon 9',
 'Falcon 9',
 'Falcon 9',
 'Falcon 9',
 'Falcon 9',
 'Falcon 9',
 'Falcon 9',
 'Falcon 9',
 'Falcon 9',
 'Falcon 9',
 'Falcon 9',
 'Falcon 9',
 'Falcon 9',
 'Falcon 9',
 'Falcon 9',
 'Falcon 9',
 'Falcon 9',
 'Falcon 9',
 'Falcon 9',
 'Falcon 9',
 'Falcon 9',
 'Falcon 9',
 'Falcon 9',
 'Falcon 9',
 'Falcon 9',
 'Falcon 9',
 'Falcon 9',
 'Falcon 9',
 'Falcon 9',
 'Falcon 9',
 'Falcon 9',
 'Falcon 9',
 'Falcon 9',
 'Falcon 9',
 'Falcon 9',
 'Falcon 9',
 'Falcon 9',
 'Falcon 9',
 'Falcon 9',
 'Falcon 9',
 'Falcon 9',
 'Falcon 9',
 'Falcon 9',
 'Falcon 9',
 'Falcon 9',
 'Falcon Heavy',
 'Falcon 9',
 'Falcon 9',
 'Falcon 9',
 'Falcon 9',
 'Falcon 9',
 'Falcon 9',
 'Falcon 9',
 'Falcon 9',
 'Falcon 9',
 'Falcon 9',
 'Falcon 9',
 'Falcon 9',
 'Falcon 9',
 'Falcon 9',
 'Falcon 9',
 'Falcon 9',
 'Falcon 9',
 'Falcon 9',
 'Falcon 9',
 'Falcon 9',
 'Falcon 9',
 'Falcon

In [48]:
# we can apply the rest of the  functions here:
# Call getLaunchSite
getLaunchSite(data)

In [49]:
LaunchSite

['Kwajalein Atoll',
 'Kwajalein Atoll',
 'Kwajalein Atoll',
 'Kwajalein Atoll',
 'Kwajalein Atoll',
 'CCSFS SLC 40',
 'CCSFS SLC 40',
 'CCSFS SLC 40',
 'CCSFS SLC 40',
 'CCSFS SLC 40',
 'VAFB SLC 4E',
 'CCSFS SLC 40',
 'CCSFS SLC 40',
 'CCSFS SLC 40',
 'CCSFS SLC 40',
 'CCSFS SLC 40',
 'CCSFS SLC 40',
 'CCSFS SLC 40',
 'CCSFS SLC 40',
 'CCSFS SLC 40',
 'CCSFS SLC 40',
 'CCSFS SLC 40',
 'CCSFS SLC 40',
 'CCSFS SLC 40',
 'CCSFS SLC 40',
 'VAFB SLC 4E',
 'CCSFS SLC 40',
 'CCSFS SLC 40',
 'CCSFS SLC 40',
 'CCSFS SLC 40',
 'CCSFS SLC 40',
 'CCSFS SLC 40',
 'CCSFS SLC 40',
 'CCSFS SLC 40',
 'VAFB SLC 4E',
 'KSC LC 39A',
 'KSC LC 39A',
 'KSC LC 39A',
 'KSC LC 39A',
 'KSC LC 39A',
 'KSC LC 39A',
 'KSC LC 39A',
 'VAFB SLC 4E',
 'KSC LC 39A',
 'KSC LC 39A',
 'VAFB SLC 4E',
 'KSC LC 39A',
 'VAFB SLC 4E',
 'KSC LC 39A',
 'KSC LC 39A',
 'CCSFS SLC 40',
 'VAFB SLC 4E',
 'CCSFS SLC 40',
 'CCSFS SLC 40',
 'KSC LC 39A',
 'VAFB SLC 4E',
 'CCSFS SLC 40',
 'VAFB SLC 4E',
 'CCSFS SLC 40',
 'CCSFS SLC 40',


In [50]:
# Call getPayloadData
getPayloadData(data)

# Call getCoreData
getCoreData(data)


TypeError: can only concatenate str (not "list") to str

In [None]:
# Finally lets construct our dataset using the data we have obtained. We we combine the columns into a dictionary.
launch_dict = {'FlightNumber': list(data['flight_number']),
'Date': list(data['date']),
'BoosterVersion':BoosterVersion,
'PayloadMass':PayloadMass,
'Orbit':Orbit,
'LaunchSite':LaunchSite,
'Outcome':Outcome,
'Flights':Flights,
'GridFins':GridFins,
'Reused':Reused,
'Legs':Legs,
'LandingPad':LandingPad,
'Block':Block,
'ReusedCount':ReusedCount,
'Serial':Serial,
'Longitude': Longitude,
'Latitude': Latitude}


# Then, we need to create a Pandas data frame from the dictionary launch_dict.
# Create a data from launch_dict
launch_dict_data = pd.DataFrame(launch_dict)
launch_dict_data 


In [None]:
# Finally we will remove the Falcon 1 launches keeping only the Falcon 9 launches. Filter the data dataframe using the BoosterVersion column to only keep the Falcon 9 launches. Save the filtered data to a new dataframe called data_falcon9
data_falcon9 = launch_dict_data [launch_dict_data ['BoosterVersion'].str.startswith('Falcon 9')]
data_falcon9

In [None]:
# Now that we have removed some values we should reset the FlgihtNumber column
data_falcon9.loc[:,'FlightNumber'] = list(range(1, data_falcon9.shape[0]+1))
data_falcon9

In [None]:
# We can see below that some of the rows are missing values in our dataset.
data_falcon9.isnull().sum()

In [None]:
# Calculate the mean value of PayloadMass column
payload_mass_mean = data_falcon9['PayloadMass'].mean()

# Replace the np.nan values with its mean value
data_falcon9.loc[:, 'PayloadMass'].fillna(payload_mass_mean, inplace=True)

# ** CREATER BY MOHAMED MAHMOUD **