In [24]:
import requests
import pandas as pd
import datetime

# Initialize global variables to store data
BoosterVersion = []
PayloadMass = []
Orbit = []
LaunchSite = []
Outcome = []
Flights = []
GridFins = []
Reused = []
Legs = []
LandingPad = []
Block = []
ReusedCount = []
Serial = []
Longitude = []
Latitude = []

# Functions to fetch data from the SpaceX API

def getBoosterVersion(data):
    """Populate BoosterVersion list with rocket names."""
    for x in data['rocket']:
        if x:
            response = requests.get(f"https://api.spacexdata.com/v4/rockets/{str(x)}").json()
            BoosterVersion.append(response['name'])

def getLaunchSite(data):
    """Populate LaunchSite, Longitude, and Latitude lists."""
    for x in data['launchpad']:
        if x:
            response = requests.get(f"https://api.spacexdata.com/v4/launchpads/{str(x)}").json()
            Longitude.append(response['longitude'])
            Latitude.append(response['latitude'])
            LaunchSite.append(response['name'])

def getPayloadData(data):
    """Populate PayloadMass and Orbit lists."""
    for load in data['payloads']:
        if load:
            response = requests.get(f"https://api.spacexdata.com/v4/payloads/{load}").json()
            PayloadMass.append(response['mass_kg'])
            Orbit.append(response['orbit'])

def getCoreData(data):
    """Populate lists related to core data like Block, ReusedCount, Serial, Outcome, Flights, etc."""
    for core in data['cores']:
        if core['core'] is not None:
            response = requests.get(f"https://api.spacexdata.com/v4/cores/{core['core']}").json()
            Block.append(response['block'])
            ReusedCount.append(response['reuse_count'])
            Serial.append(response['serial'])
        else:
            Block.append(None)
            ReusedCount.append(None)
            Serial.append(None)

        Outcome.append(f"{core['landing_success']} {core['landing_type']}")
        Flights.append(core['flight'])
        GridFins.append(core['gridfins'])
        Reused.append(core['reused'])
        Legs.append(core['legs'])
        LandingPad.append(core['landpad'])

# Request data from SpaceX API
spacex_url = "https://api.spacexdata.com/v4/launches/past"
response = requests.get(spacex_url)

if response.status_code == 200:
    # Decode the response JSON and load it into a DataFrame
    json_data = response.json()
    df = pd.json_normalize(json_data)
    print("Initial Data:")
    print(df.head())

    # Selecting necessary columns for further processing
    df = df[['rocket', 'payloads', 'launchpad', 'cores', 'flight_number', 'date_utc']]

    # Handling cases where 'cores' and 'payloads' are lists, selecting the first item if present
    df['cores'] = df['cores'].map(lambda x: x[0] if isinstance(x, list) else x)
    df['payloads'] = df['payloads'].map(lambda x: x[0] if isinstance(x, list) else x)

    # Convert the 'date_utc' column to a datetime type and extract only the date
    df['date'] = pd.to_datetime(df['date_utc']).dt.date

    # Filter the DataFrame by date (up to 2020-11-13)
    df = df[df['date'] <= datetime.date(2020, 11, 13)]

    # Check columns of the DataFrame to confirm that necessary columns are present
    print("Columns in the DataFrame:")
    print(df.columns)

    # Now, let's handle missing values for each relevant column
    if 'PayloadMass' in df.columns:
        df['PayloadMass'].fillna(0, inplace=True)
    else:
        print("'PayloadMass' column is missing")

    if 'Orbit' in df.columns:
        df['Orbit'].fillna('Unknown', inplace=True)
    else:
        print("'Orbit' column is missing")

    if 'LaunchSite' in df.columns:
        df['LaunchSite'].fillna(df['LaunchSite'].mode()[0], inplace=True)
    else:
        print("'LaunchSite' column is missing")

    # Check missing values after handling
    print("\nMissing values after handling:")
    print(df.isnull().sum())

    # Call functions to populate the global variables
    getBoosterVersion(df)
    getLaunchSite(df)
    getPayloadData(df)
    getCoreData(df)

    # Check if the lists have been populated correctly
    print("BoosterVersion:", BoosterVersion[:5])  # Display first 5 items in BoosterVersion list
    print("PayloadMass:", PayloadMass[:5])  # Display first 5 items in PayloadMass list

    # Construct the final dictionary to create the DataFrame
    launch_dict = {
        'FlightNumber': list(df['flight_number']),
        'Date': list(df['date']),
        'BoosterVersion': BoosterVersion,
        'PayloadMass': PayloadMass,
        'Orbit': Orbit,
        'LaunchSite': LaunchSite,
        'Outcome': Outcome,
        'Flights': Flights,
        'GridFins': GridFins,
        'Reused': Reused,
        'Legs': Legs,
        'LandingPad': LandingPad,
        'Block': Block,
        'ReusedCount': ReusedCount,
        'Serial': Serial,
        'Longitude': Longitude,
        'Latitude': Latitude
    }

    # Create the final DataFrame
    launch_df = pd.DataFrame(launch_dict)

    # Display the first few rows of the final DataFrame
    print("\nFinal DataFrame:")
    print(launch_df.head())

    # Clean missing values in the final DataFrame
    launch_df['Outcome'].fillna('Unknown', inplace=True)
    launch_df['LaunchSite'].fillna('Unknown', inplace=True)
    launch_df.dropna(subset=['FlightNumber', 'Date'], inplace=True)

    print("\nCleaned DataFrame:")
    print(launch_df.head())

else:
    print(f"Failed to fetch data, Status Code: {response.status_code}")


Initial Data:
       static_fire_date_utc  static_fire_date_unix    net  window  \
0  2006-03-17T00:00:00.000Z           1.142554e+09  False     0.0   
1                      None                    NaN  False     0.0   
2                      None                    NaN  False     0.0   
3  2008-09-20T00:00:00.000Z           1.221869e+09  False     0.0   
4                      None                    NaN  False     0.0   

                     rocket success  \
0  5e9d0d95eda69955f709d1eb   False   
1  5e9d0d95eda69955f709d1eb   False   
2  5e9d0d95eda69955f709d1eb   False   
3  5e9d0d95eda69955f709d1eb    True   
4  5e9d0d95eda69955f709d1eb    True   

                                                                                                            failures  \
0                                                [{'time': 33, 'altitude': None, 'reason': 'merlin engine failure'}]   
1            [{'time': 301, 'altitude': 289, 'reason': 'harmonic oscillation leading to prematur

The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  launch_df['Outcome'].fillna('Unknown', inplace=True)
The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  launch_df['LaunchSite'].fillna('Unknown', inplace=True)
