In [1]:
# Requests allows us to make HTTP requests which we will use to get data from an API
import requests
# Pandas is a software library written for the Python programming language for data manipulation and analysis.
import pandas as pd
# NumPy is a library for the Python programming language, adding support for large, multi-dimensional arrays and matrices, along with a large collection of high-level mathematical functions to operate on these arrays
import numpy as np
# Datetime is a library that allows us to represent dates
import datetime

In [2]:
# Takes the dataset and uses the rocket column to call the API and append the data to the list
def getBoosterVersion(data):
    for x in data['rocket']:
       if x:
        response = requests.get("https://api.spacexdata.com/v4/rockets/"+str(x)).json()
        BoosterVersion.append(response['name'])

In [3]:
# Takes the dataset and uses the payloads column to call the API and append the data to the lists
def getPayloadData(data):
    for load in data['payloads']:
        response = requests.get("https://api.spacexdata.com/v4/payloads/"+load).json()
        PayloadMass.append(response['mass_kg'])
        Orbit.append(response['orbit'])

In [4]:
# Takes the dataset and uses the cores column to call the API and append the data to the lists
def getCoreData(data):
    for core in data['cores']:
            if core['core'] != None:
                response = requests.get("https://api.spacexdata.com/v4/cores/"+core['core']).json()
                Block.append(response['block'])
                ReusedCount.append(response['reuse_count'])
                Serial.append(response['serial'])
            else:
                Block.append(None)
                ReusedCount.append(None)
                Serial.append(None)
            Outcome.append(str(core['landing_success'])+' '+str(core['landing_type']))
            Flights.append(core['flight'])
            GridFins.append(core['gridfins'])
            Reused.append(core['reused'])
            Legs.append(core['legs'])
            LandingPad.append(core['landpad']) 

In [5]:
#We will use an static URL instead
static_json_url='https://cf-courses-data.s3.us.cloud-object-storage.appdomain.cloud/IBM-DS0321EN-SkillsNetwork/datasets/API_call_spacex_api.json'
response = requests.get(static_json_url)
#We check the response status 
response.status_code #code 200 ok

# Use json_normalize meethod to convert the json result into a dataframe
data = pd.json_normalize(response.json())  # convert to flat table


In [6]:
# Lets take a subset of our dataframe keeping only the features we want and the flight number, and date_utc.
data = data[['rocket', 'payloads', 'launchpad', 'cores', 'flight_number', 'date_utc']]

# We will remove rows with multiple cores because those are falcon rockets with 2 extra rocket boosters and rows that have multiple payloads in a single rocket.

data = data[data['cores'].map(len)==1] 
# data['cores'] selects the cores column of the DataFrame data.
#.map(len) applies the Python built-in function len() to each element of the cores column, which converts each element from a list to an integer representing the length of the list.
# ==1 checks if the length of each list in the cores column is equal to 1. This returns a boolean mask with True values for the rows where the condition is satisfied and False values for the rows where the condition is not satisfied.
# data[data['cores'].map(len)==1] uses this boolean mask to filter the rows of the DataFrame data to include only the rows where the length of the cores column is equal to 1.

# same with payloads column
data = data[data['payloads'].map(len)==1]

# Since payloads and cores are lists of size 1 we will also extract the single value in the list and replace the feature.
# he line data['cores'] = data['cores'].map(lambda x : x[0]) does not convert a list into a string, it actually converts the values in the 'cores' column, which are lists of size 1, into the single value in the list.

data['cores'] = data['cores'].map(lambda x : x[0])
data['payloads'] = data['payloads'].map(lambda x : x[0])

# We also want to convert the date_utc to a datetime datatype and then extracting the date leaving the time
#This line is converting the 'date_utc' column of the dataframe to a datetime datatype using pandas' to_datetime() function. It then extracts only the date portion of the datetime using the .dt.date attribute, and assigns the result to a new column called 'date' in the dataframe. So essentially, this line is creating a new column called 'date' which contains only the date of each launch, based on the 'date_utc' column.
data['date'] = pd.to_datetime(data['date_utc']).dt.date

# Using the date we will restrict the dates of the launches
data = data[data['date'] <= datetime.date(2020, 11, 13)]

In [7]:
#Global variables 
BoosterVersion = []
PayloadMass = []
Orbit = []
LaunchSite = []
Outcome = []
Flights = []
GridFins = []
Reused = []
Legs = []
LandingPad = []
Block = []
ReusedCount = []
Serial = []
Longitude = []
Latitude = []

In [8]:
getBoosterVersion(data)
# Call getBoosterVersion

# Call getPayloadData
getPayloadData(data)

# Call getCoreData
getCoreData(data)

In [9]:
#Finally lets construct our dataset using the data we have obtained. We we combine the columns into a dictionary.

launch_dict = {'FlightNumber': list(data['flight_number']),
'Date': list(data['date']),
'PayloadMass':PayloadMass,
'BoosterVersion':BoosterVersion}

In [10]:
# Convert dictionary to dataframe
data_falcon9 = pd.DataFrame(launch_dict)
print(data_falcon9.head())

   FlightNumber        Date  PayloadMass BoosterVersion
0             1  2006-03-24         20.0       Falcon 1
1             2  2007-03-21          NaN       Falcon 1
2             4  2008-09-28        165.0       Falcon 1
3             5  2009-07-13        200.0       Falcon 1
4             6  2010-06-04          NaN       Falcon 9


In [11]:
# New dataframe with only values where BoosterVersion is Falcon 9
newdf = data_falcon9[(data_falcon9.BoosterVersion == "Falcon 9")]
print(newdf.head())

   FlightNumber        Date  PayloadMass BoosterVersion
4             6  2010-06-04          NaN       Falcon 9
5             8  2012-05-22        525.0       Falcon 9
6            10  2013-03-01        677.0       Falcon 9
7            11  2013-09-29        500.0       Falcon 9
8            12  2013-12-03       3170.0       Falcon 9


In [12]:
# create a copy of the original DataFrame
newdf2 = newdf.copy() 
# modify the view of the copy so the flightnumber is ordered.
newdf2.loc[:,'FlightNumber'] = list(range(1, newdf.shape[0]+1)) 
print(newdf2.head())

   FlightNumber        Date  PayloadMass BoosterVersion
4             1  2010-06-04          NaN       Falcon 9
5             2  2012-05-22        525.0       Falcon 9
6             3  2013-03-01        677.0       Falcon 9
7             4  2013-09-29        500.0       Falcon 9
8             5  2013-12-03       3170.0       Falcon 9


In [13]:
# replace null values in column 'PayloadMass' with 0
newdf2['PayloadMass'].fillna(0, inplace=True)
print('DataFrame with null values in column A replaced with 0:\n', newdf2.head())

DataFrame with null values in column A replaced with 0:
    FlightNumber        Date  PayloadMass BoosterVersion
4             1  2010-06-04          0.0       Falcon 9
5             2  2012-05-22        525.0       Falcon 9
6             3  2013-03-01        677.0       Falcon 9
7             4  2013-09-29        500.0       Falcon 9
8             5  2013-12-03       3170.0       Falcon 9


In [14]:
#Calculate mean of payload column.
meanPayloadMass = newdf2['PayloadMass'].mean()
print('Average of column PayloadMass:', round(meanPayloadMass, 2))

Average of column PayloadMass: 5783.35
