In [1]:
# Import necessary libraries
import requests
import pandas as pd
import numpy as np
import datetime


In [2]:

# Setting display options
pd.set_option('display.max_columns', None)
pd.set_option('display.max_colwidth', None)

# Define functions to make API calls and populate lists


In [3]:

def getBoosterVersion(data):
    for x in data['rocket']:
        if x:
            response = requests.get(f"https://api.spacexdata.com/v4/rockets/{x}").json()
            BoosterVersion.append(response.get('name', 'Unknown'))  # Using .get() to avoid key errors

def getLaunchSite(data):
    for x in data['launchpad']:
        if x:
            response = requests.get(f"https://api.spacexdata.com/v4/launchpads/{x}").json()
            Longitude.append(response.get('longitude', None))
            Latitude.append(response.get('latitude', None))
            LaunchSite.append(response.get('name', 'Unknown'))

def getPayloadData(data):
    for load in data['payloads']:
        if load:
            response = requests.get(f"https://api.spacexdata.com/v4/payloads/{load}").json()
            PayloadMass.append(response.get('mass_kg', None))
            Orbit.append(response.get('orbit', 'Unknown'))

def getCoreData(data):
    for core in data['cores']:
        if core['core'] != None:
            response = requests.get(f"https://api.spacexdata.com/v4/cores/{core['core']}").json()
            Block.append(response.get('block', None))
            ReusedCount.append(response.get('reuse_count', None))
            Serial.append(response.get('serial', 'Unknown'))
        else:
            Block.append(None)
            ReusedCount.append(None)
            Serial.append(None)
        Outcome.append(str(core.get('landing_success', 'None')) + ' ' + str(core.get('landing_type', 'None')))
        Flights.append(core.get('flight', None))
        GridFins.append(core.get('gridfins', None))
        Reused.append(core.get('reused', None))
        Legs.append(core.get('legs', None))
        LandingPad.append(core.get('landpad', 'Unknown'))


In [4]:

# Fetch the SpaceX launches data
spacex_url = "https://api.spacexdata.com/v4/launches/past"
response = requests.get(spacex_url)
data = response.json()


In [5]:

# Normalize the JSON data into a DataFrame
df = pd.json_normalize(data)

# Subset the dataframe keeping only the desired columns
df = df[['rocket', 'payloads', 'launchpad', 'cores', 'flight_number', 'date_utc']]

# Filter out rows with multiple cores or payloads
df = df[df['cores'].map(len) == 1]
df = df[df['payloads'].map(len) == 1]

# Extract the single values in the list and replace the feature
df['cores'] = df['cores'].map(lambda x: x[0])
df['payloads'] = df['payloads'].map(lambda x: x[0])

# Convert the date_utc to a datetime datatype and extract the date
df['date'] = pd.to_datetime(df['date_utc']).dt.date

# Restrict the dates of the launches
df = df[df['date'] <= datetime.date(2020, 11, 13)]


In [6]:

# Initialize lists to store extracted data
BoosterVersion = []
PayloadMass = []
Orbit = []
LaunchSite = []
Outcome = []
Flights = []
GridFins = []
Reused = []
Legs = []
LandingPad = []
Block = []
ReusedCount = []
Serial = []
Longitude = []
Latitude = []


In [7]:
# Call functions to populate the lists
getBoosterVersion(df)
getLaunchSite(df)
getPayloadData(df)
getCoreData(df)


In [8]:
# Check the first 5 booster versions (as an example)
print(BoosterVersion[0:5])


['Falcon 1', 'Falcon 1', 'Falcon 1', 'Falcon 1', 'Falcon 9']


In [9]:
# Create a dictionary with the extracted data
launch_dict = {
    'FlightNumber': list(df['flight_number']),
    'Date': list(df['date']),
    'BoosterVersion': BoosterVersion,
    'PayloadMass': PayloadMass,
    'Orbit': Orbit,
    'LaunchSite': LaunchSite,
    'Outcome': Outcome,
    'Flights': Flights,
    'GridFins': GridFins,
    'Reused': Reused,
    'Legs': Legs,
    'LandingPad': LandingPad,
    'Block': Block,
    'ReusedCount': ReusedCount,
    'Serial': Serial,
    'Longitude': Longitude,
    'Latitude': Latitude
}

In [10]:
# Convert the dictionary into a DataFrame for easier analysis
launch_df = pd.DataFrame(launch_dict)
launch_df.head()  # Display the first few rows of the final DataFrame


Unnamed: 0,FlightNumber,Date,BoosterVersion,PayloadMass,Orbit,LaunchSite,Outcome,Flights,GridFins,Reused,Legs,LandingPad,Block,ReusedCount,Serial,Longitude,Latitude
0,1,2006-03-24,Falcon 1,20.0,LEO,Kwajalein Atoll,None None,1,False,False,False,,,0,Merlin1A,167.743129,9.047721
1,2,2007-03-21,Falcon 1,,LEO,Kwajalein Atoll,None None,1,False,False,False,,,0,Merlin2A,167.743129,9.047721
2,4,2008-09-28,Falcon 1,165.0,LEO,Kwajalein Atoll,None None,1,False,False,False,,,0,Merlin2C,167.743129,9.047721
3,5,2009-07-13,Falcon 1,200.0,LEO,Kwajalein Atoll,None None,1,False,False,False,,,0,Merlin3C,167.743129,9.047721
4,6,2010-06-04,Falcon 9,,LEO,CCSFS SLC 40,None None,1,False,False,False,,1.0,0,B0003,-80.577366,28.561857


In [11]:
# Convert the launch_dict into a DataFrame
launch_df = pd.DataFrame(launch_dict)

# Display the first few rows of the DataFrame to verify the data
print(launch_df.head())


   FlightNumber        Date BoosterVersion  PayloadMass Orbit  \
0             1  2006-03-24       Falcon 1         20.0   LEO   
1             2  2007-03-21       Falcon 1          NaN   LEO   
2             4  2008-09-28       Falcon 1        165.0   LEO   
3             5  2009-07-13       Falcon 1        200.0   LEO   
4             6  2010-06-04       Falcon 9          NaN   LEO   

        LaunchSite    Outcome  Flights  GridFins  Reused   Legs LandingPad  \
0  Kwajalein Atoll  None None        1     False   False  False       None   
1  Kwajalein Atoll  None None        1     False   False  False       None   
2  Kwajalein Atoll  None None        1     False   False  False       None   
3  Kwajalein Atoll  None None        1     False   False  False       None   
4     CCSFS SLC 40  None None        1     False   False  False       None   

   Block  ReusedCount    Serial   Longitude   Latitude  
0    NaN            0  Merlin1A  167.743129   9.047721  
1    NaN            0  Mer

In [12]:
# Create the DataFrame from launch_dict
launch_df = pd.DataFrame(launch_dict)

# Show the first few rows of the DataFrame
print(launch_df.head())


   FlightNumber        Date BoosterVersion  PayloadMass Orbit  \
0             1  2006-03-24       Falcon 1         20.0   LEO   
1             2  2007-03-21       Falcon 1          NaN   LEO   
2             4  2008-09-28       Falcon 1        165.0   LEO   
3             5  2009-07-13       Falcon 1        200.0   LEO   
4             6  2010-06-04       Falcon 9          NaN   LEO   

        LaunchSite    Outcome  Flights  GridFins  Reused   Legs LandingPad  \
0  Kwajalein Atoll  None None        1     False   False  False       None   
1  Kwajalein Atoll  None None        1     False   False  False       None   
2  Kwajalein Atoll  None None        1     False   False  False       None   
3  Kwajalein Atoll  None None        1     False   False  False       None   
4     CCSFS SLC 40  None None        1     False   False  False       None   

   Block  ReusedCount    Serial   Longitude   Latitude  
0    NaN            0  Merlin1A  167.743129   9.047721  
1    NaN            0  Mer

In [13]:
# Filter the DataFrame to keep only Falcon 9 launches
data_falcon9 = launch_df[launch_df['BoosterVersion'] == 'Falcon 9']

# Display the first few rows of the filtered DataFrame to verify
print(data_falcon9.head())


   FlightNumber        Date BoosterVersion  PayloadMass Orbit    LaunchSite  \
4             6  2010-06-04       Falcon 9          NaN   LEO  CCSFS SLC 40   
5             8  2012-05-22       Falcon 9        525.0   LEO  CCSFS SLC 40   
6            10  2013-03-01       Falcon 9        677.0   ISS  CCSFS SLC 40   
7            11  2013-09-29       Falcon 9        500.0    PO   VAFB SLC 4E   
8            12  2013-12-03       Falcon 9       3170.0   GTO  CCSFS SLC 40   

       Outcome  Flights  GridFins  Reused   Legs LandingPad  Block  \
4    None None        1     False   False  False       None    1.0   
5    None None        1     False   False  False       None    1.0   
6    None None        1     False   False  False       None    1.0   
7  False Ocean        1     False   False  False       None    1.0   
8    None None        1     False   False  False       None    1.0   

   ReusedCount Serial   Longitude   Latitude  
4            0  B0003  -80.577366  28.561857  
5         

In [14]:
data_falcon9.loc[:,'FlightNumber'] = list(range(1, data_falcon9.shape[0]+1))
data_falcon9

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self._setitem_single_column(ilocs[0], value, pi)


Unnamed: 0,FlightNumber,Date,BoosterVersion,PayloadMass,Orbit,LaunchSite,Outcome,Flights,GridFins,Reused,Legs,LandingPad,Block,ReusedCount,Serial,Longitude,Latitude
4,1,2010-06-04,Falcon 9,,LEO,CCSFS SLC 40,None None,1,False,False,False,,1.0,0,B0003,-80.577366,28.561857
5,2,2012-05-22,Falcon 9,525.0,LEO,CCSFS SLC 40,None None,1,False,False,False,,1.0,0,B0005,-80.577366,28.561857
6,3,2013-03-01,Falcon 9,677.0,ISS,CCSFS SLC 40,None None,1,False,False,False,,1.0,0,B0007,-80.577366,28.561857
7,4,2013-09-29,Falcon 9,500.0,PO,VAFB SLC 4E,False Ocean,1,False,False,False,,1.0,0,B1003,-120.610829,34.632093
8,5,2013-12-03,Falcon 9,3170.0,GTO,CCSFS SLC 40,None None,1,False,False,False,,1.0,0,B1004,-80.577366,28.561857
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
89,86,2020-09-03,Falcon 9,15600.0,VLEO,KSC LC 39A,True ASDS,2,True,True,True,5e9e3032383ecb6bb234e7ca,5.0,12,B1060,-80.603956,28.608058
90,87,2020-10-06,Falcon 9,15600.0,VLEO,KSC LC 39A,True ASDS,3,True,True,True,5e9e3032383ecb6bb234e7ca,5.0,13,B1058,-80.603956,28.608058
91,88,2020-10-18,Falcon 9,15600.0,VLEO,KSC LC 39A,True ASDS,6,True,True,True,5e9e3032383ecb6bb234e7ca,5.0,12,B1051,-80.603956,28.608058
92,89,2020-10-24,Falcon 9,15600.0,VLEO,CCSFS SLC 40,True ASDS,3,True,True,True,5e9e3033383ecbb9e534e7cc,5.0,12,B1060,-80.577366,28.561857


In [15]:
#Data Wrangling
data_falcon9.isnull().sum()

FlightNumber       0
Date               0
BoosterVersion     0
PayloadMass        5
Orbit              0
LaunchSite         0
Outcome            0
Flights            0
GridFins           0
Reused             0
Legs               0
LandingPad        26
Block              0
ReusedCount        0
Serial             0
Longitude          0
Latitude           0
dtype: int64

In [16]:
# Import NumPy if not already imported
import numpy as np

# Calculate the mean of the PayloadMass column
mean_payload_mass = data_falcon9['PayloadMass'].mean()

# Print the calculated mean
print("Mean Payload Mass:", mean_payload_mass)

# Replace np.nan values in the PayloadMass column with the calculated mean
data_falcon9['PayloadMass'] = data_falcon9['PayloadMass'].replace(np.nan, mean_payload_mass)

# Check if there are any remaining missing values in the PayloadMass column
missing_values_count = data_falcon9['PayloadMass'].isna().sum()
print("Remaining missing values in PayloadMass:", missing_values_count)


Mean Payload Mass: 6123.547647058824
Remaining missing values in PayloadMass: 0


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  # This is added back by InteractiveShellApp.init_path()


In [17]:
# Import NumPy if not already imported
import numpy as np

# Calculate the mean value of the PayloadMass column
mean_payload_mass = data_falcon9['PayloadMass'].mean()

# Print the calculated mean value
print("Mean Payload Mass:", mean_payload_mass)

# Replace np.nan values in the PayloadMass column with the calculated mean value
data_falcon9['PayloadMass'].fillna(mean_payload_mass, inplace=True)

# Verify that there are no more missing values in the PayloadMass column
remaining_missing_values = data_falcon9['PayloadMass'].isna().sum()
print("Remaining missing values in PayloadMass:", remaining_missing_values)


Mean Payload Mass: 6123.547647058824
Remaining missing values in PayloadMass: 0


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  return self._update_inplace(result)
