In [4]:
# Import necessary libraries
import requests
import pandas as pd
import numpy as np
import datetime


In [5]:

# Setting display options
pd.set_option('display.max_columns', None)
pd.set_option('display.max_colwidth', None)

# Define functions to make API calls and populate lists


In [6]:

def getBoosterVersion(data):
    for x in data['rocket']:
        if x:
            response = requests.get(f"https://api.spacexdata.com/v4/rockets/{x}").json()
            BoosterVersion.append(response.get('name', 'Unknown'))  # Using .get() to avoid key errors

def getLaunchSite(data):
    for x in data['launchpad']:
        if x:
            response = requests.get(f"https://api.spacexdata.com/v4/launchpads/{x}").json()
            Longitude.append(response.get('longitude', None))
            Latitude.append(response.get('latitude', None))
            LaunchSite.append(response.get('name', 'Unknown'))

def getPayloadData(data):
    for load in data['payloads']:
        if load:
            response = requests.get(f"https://api.spacexdata.com/v4/payloads/{load}").json()
            PayloadMass.append(response.get('mass_kg', None))
            Orbit.append(response.get('orbit', 'Unknown'))

def getCoreData(data):
    for core in data['cores']:
        if core['core'] != None:
            response = requests.get(f"https://api.spacexdata.com/v4/cores/{core['core']}").json()
            Block.append(response.get('block', None))
            ReusedCount.append(response.get('reuse_count', None))
            Serial.append(response.get('serial', 'Unknown'))
        else:
            Block.append(None)
            ReusedCount.append(None)
            Serial.append(None)
        Outcome.append(str(core.get('landing_success', 'None')) + ' ' + str(core.get('landing_type', 'None')))
        Flights.append(core.get('flight', None))
        GridFins.append(core.get('gridfins', None))
        Reused.append(core.get('reused', None))
        Legs.append(core.get('legs', None))
        LandingPad.append(core.get('landpad', 'Unknown'))


In [7]:

# Fetch the SpaceX launches data
spacex_url = "https://api.spacexdata.com/v4/launches/past"
response = requests.get(spacex_url)
data = response.json()


In [8]:

# Normalize the JSON data into a DataFrame
df = pd.json_normalize(data)

# Subset the dataframe keeping only the desired columns
df = df[['rocket', 'payloads', 'launchpad', 'cores', 'flight_number', 'date_utc']]

# Filter out rows with multiple cores or payloads
df = df[df['cores'].map(len) == 1]
df = df[df['payloads'].map(len) == 1]

# Extract the single values in the list and replace the feature
df['cores'] = df['cores'].map(lambda x: x[0])
df['payloads'] = df['payloads'].map(lambda x: x[0])

# Convert the date_utc to a datetime datatype and extract the date
df['date'] = pd.to_datetime(df['date_utc']).dt.date

# Restrict the dates of the launches
df = df[df['date'] <= datetime.date(2020, 11, 13)]


In [9]:

# Initialize lists to store extracted data
BoosterVersion = []
PayloadMass = []
Orbit = []
LaunchSite = []
Outcome = []
Flights = []
GridFins = []
Reused = []
Legs = []
LandingPad = []
Block = []
ReusedCount = []
Serial = []
Longitude = []
Latitude = []


In [None]:
# Call functions to populate the lists
getBoosterVersion(df)
getLaunchSite(df)
getPayloadData(df)
getCoreData(df)


In [None]:
# Check the first 5 booster versions (as an example)
print(BoosterVersion[0:5])


In [None]:
# Create a dictionary with the extracted data
launch_dict = {
    'FlightNumber': list(df['flight_number']),
    'Date': list(df['date']),
    'BoosterVersion': BoosterVersion,
    'PayloadMass': PayloadMass,
    'Orbit': Orbit,
    'LaunchSite': LaunchSite,
    'Outcome': Outcome,
    'Flights': Flights,
    'GridFins': GridFins,
    'Reused': Reused,
    'Legs': Legs,
    'LandingPad': LandingPad,
    'Block': Block,
    'ReusedCount': ReusedCount,
    'Serial': Serial,
    'Longitude': Longitude,
    'Latitude': Latitude
}

In [None]:
# Convert the dictionary into a DataFrame for easier analysis
launch_df = pd.DataFrame(launch_dict)
launch_df.head()  # Display the first few rows of the final DataFrame


In [None]:
# Convert the launch_dict into a DataFrame
launch_df = pd.DataFrame(launch_dict)

# Display the first few rows of the DataFrame to verify the data
print(launch_df.head())


In [None]:
# Create the DataFrame from launch_dict
launch_df = pd.DataFrame(launch_dict)

# Show the first few rows of the DataFrame
print(launch_df.head())


In [None]:
# Filter the DataFrame to keep only Falcon 9 launches
data_falcon9 = launch_df[launch_df['BoosterVersion'] == 'Falcon 9']

# Display the first few rows of the filtered DataFrame to verify
print(data_falcon9.head())


In [None]:
data_falcon9.loc[:,'FlightNumber'] = list(range(1, data_falcon9.shape[0]+1))
data_falcon9

In [None]:
#Data Wrangling
data_falcon9.isnull().sum()

In [None]:
# Import NumPy if not already imported
import numpy as np

# Calculate the mean of the PayloadMass column
mean_payload_mass = data_falcon9['PayloadMass'].mean()

# Print the calculated mean
print("Mean Payload Mass:", mean_payload_mass)

# Replace np.nan values in the PayloadMass column with the calculated mean
data_falcon9['PayloadMass'] = data_falcon9['PayloadMass'].replace(np.nan, mean_payload_mass)

# Check if there are any remaining missing values in the PayloadMass column
missing_values_count = data_falcon9['PayloadMass'].isna().sum()
print("Remaining missing values in PayloadMass:", missing_values_count)


In [None]:
# Import NumPy if not already imported
import numpy as np

# Calculate the mean value of the PayloadMass column
mean_payload_mass = data_falcon9['PayloadMass'].mean()

# Print the calculated mean value
print("Mean Payload Mass:", mean_payload_mass)

# Replace np.nan values in the PayloadMass column with the calculated mean value
data_falcon9['PayloadMass'].fillna(mean_payload_mass, inplace=True)

# Verify that there are no more missing values in the PayloadMass column
remaining_missing_values = data_falcon9['PayloadMass'].isna().sum()
print("Remaining missing values in PayloadMass:", remaining_missing_values)
