In [67]:
import requests                         # to make HTTP request; gets Data from URL

import numpy as np, pandas as pd        # managing the Data

from datetime import datetime                         # Lets us represent Dates


### Sources for Data:
Rocket Data: https://docs.spacexdata.com/#16c58b5e-44de-4183-b858-0fae51d242a5

SpaceX V4 Data: https://github.com/r-spacex/SpaceX-API/blob/master/docs/rockets/v4/query.md

In [69]:
# to extract Launch data between certain Start and End Date:



def get_launch_data_between_dates(start_input, end_input):

    launch_url = "https://api.spacexdata.com/v4/launches"
    
    api_response = requests.get(launch_url)

    if api_response.status_code != 200:
        print("Occured Error in API response...")
        return

    else:
        #raw_launch_data = response.json()
        launch_data = pd.json_normalize(api_response.json())
        launch_data['date_utc'] = launch_data['date_utc'].str[:10]
        launch_data['date_utc'] = pd.to_datetime(launch_data['date_utc'])

        start_date = pd.to_datetime(start_input)
        end_date = pd.to_datetime(end_input)
        print(f" start____ {start_date}  |   end_____{end_date}")

        launch_data_cleared = launch_data[(launch_data['date_utc'] >= start_date) & (launch_data['date_utc'] <= end_date)]
        return launch_data_cleared

        


launch_data = get_launch_data_between_dates(start_input="2000-01-01" , end_input="2023-01-01")
print(launch_data.shape)

 start____ 2000-01-01 00:00:00  |   end_____2023-01-01 00:00:00
(205, 43)


In [70]:
launch_data = launch_data[['rocket', 'payloads', 'launchpad', 'cores', 'flight_number', 'date_utc']]

launch_data.head(n=1)

Unnamed: 0,rocket,payloads,launchpad,cores,flight_number,date_utc
0,5e9d0d95eda69955f709d1eb,[5eb0e4b5b6c3bb0006eeb1e1],5e9e4502f5090995de566f86,"[{'core': '5e9e289df35918033d3b2623', 'flight'...",1,2006-03-24


In [71]:
# clearing the Data:
#   -> selecting core==1  
#   -> selecting payload==1 



launch_data = launch_data[launch_data['cores'].map(len) == 1]
launch_data = launch_data[launch_data['payloads'].map(len) == 1]

launch_data['cores'] = launch_data['cores'].map(lambda x : x[0])
launch_data['payloads'] = launch_data['payloads'].map(lambda x : x[0])


launch_data.head(n=3)

Unnamed: 0,rocket,payloads,launchpad,cores,flight_number,date_utc
0,5e9d0d95eda69955f709d1eb,5eb0e4b5b6c3bb0006eeb1e1,5e9e4502f5090995de566f86,"{'core': '5e9e289df35918033d3b2623', 'flight':...",1,2006-03-24
1,5e9d0d95eda69955f709d1eb,5eb0e4b6b6c3bb0006eeb1e2,5e9e4502f5090995de566f86,"{'core': '5e9e289ef35918416a3b2624', 'flight':...",2,2007-03-21
3,5e9d0d95eda69955f709d1eb,5eb0e4b7b6c3bb0006eeb1e5,5e9e4502f5090995de566f86,"{'core': '5e9e289ef3591855dc3b2626', 'flight':...",4,2008-09-28


Lets undermine the hidden data behind this Data.

from <code>Rocket</code>, we can know about: boosterversion, <br>
from <code>Payload</code>, we can know about: payload_mass, orbit, <br>
from <code>launchpad</code>, we can know about: launchsite, lonigtude, latitude <br>
from <code>cores</code>, we can get data like: flights, gridfins, reused, legs, landingpad, blocks etc


In [None]:
# setting up global variables



In [79]:

BoosterVersion = []

def getBoosterVersion(data):
    for rocket_id in data['rocket']:
        response = requests.get("https://api.spacexdata.com/v4/rockets/"+ str(rocket_id)).json()
        BoosterVersion.append(response['name'])
    
    return 


getBoosterVersion(launch_data)



In [81]:
Latitude = []
Longitude = []
LaunchSite = []

def getlaunchData(data):
    for launchpad_id in data['launchpad']:
        if launchpad_id:
            response = requests.get("https://api.spacexdata.com/v4/launchpads/"+str(launchpad_id)).json()
            Latitude.append(response['latitude'])
            Longitude.append(response['longitude'])
            LaunchSite.append(response['name'])

getlaunchData(launch_data)

In [87]:
Orbit = []
PayloadMass = []

def getPayloadData(data):
    for payload_id in data['payloads']:
        if payload_id:
            response = requests.get("https://api.spacexdata.com/v4/payloads/"+str(payload_id)).json()
            Orbit.append(response['orbit'])
            PayloadMass.append(response['mass_kg'])


getPayloadData(launch_data)


 177     177


In [92]:
Outcome = []
Flights = []
GridFins = []
Reused = []
Legs = []
LandingPad = []
Block = []
ReusedCount = []
Serial = []



def getCoreData(data):
    for core_id in data['cores']:
        if core_id['core'] != None:
            response = requests.get("https://api.spacexdata.com/v4/cores/"+str(core_id['core'])).json()
            Block.append(response['block'])
            ReusedCount.append(response['reuse_count'])
            Serial.append(response['serial'])
        else:
            Block.append(None)
            ReusedCount.append(None)
            Serial.append(None)

        Outcome.append(str(core_id['landing_success'])+' '+str(core_id['landing_type']))
        Flights.append(core_id['flight'])
        GridFins.append(core_id['gridfins'])
        Reused.append(core_id['reused'])
        Legs.append(core_id['legs'])
        LandingPad.append(core_id['landpad'])



getCoreData(launch_data)
            

In [112]:
launch_data_dict = {
    'FlightNumber'  :   list(launch_data['flight_number']),
    'date'          :   list(launch_data['date_utc']),
    'BoosterVersion':   BoosterVersion,
    'PayloadMass'   :   PayloadMass,
    'LaunchSite'    :   LaunchSite,
    'Latitude'      :   Latitude,
    'Longitude'     :   Longitude,
    'Orbit'         :   Orbit,
    'Flights'       :   Flights,
    'Reused'        :   Reused,
    'GridFins'      :   GridFins,
    'Legs'          :   Legs,
    'LandingPad'    :   LandingPad,
    'ReusedCount'   :   ReusedCount,
    'Serial'        :   Serial,
    'Block'         :   Block,
    'Outcome'       :   Outcome

}   

In [113]:
new_launch_data = pd.DataFrame(launch_data_dict)


new_launch_data.head(n=3)

Unnamed: 0,FlightNumber,date,BoosterVersion,PayloadMass,LaunchSite,Latitude,Longitude,Orbit,Flights,Reused,GridFins,Legs,LandingPad,ReusedCount,Serial,Block,Outcome
0,1,2006-03-24,Falcon 1,20.0,Kwajalein Atoll,9.047721,167.743129,LEO,1.0,False,False,False,,0.0,Merlin1A,,None None
1,2,2007-03-21,Falcon 1,,Kwajalein Atoll,9.047721,167.743129,LEO,1.0,False,False,False,,0.0,Merlin2A,,None None
2,4,2008-09-28,Falcon 1,165.0,Kwajalein Atoll,9.047721,167.743129,LEO,1.0,False,False,False,,0.0,Merlin2C,,None None


Filter the Data Where Booster is falcon 9


In [114]:
falcon_9_data = new_launch_data[new_launch_data['BoosterVersion']=='Falcon 9']
falcon_9_data.shape

(173, 17)

In [115]:
falcon_9_data.loc[:,'FlightNumber'] = list(range(1, falcon_9_data.shape[0]+1))
falcon_9_data.head(n=6)

Unnamed: 0,FlightNumber,date,BoosterVersion,PayloadMass,LaunchSite,Latitude,Longitude,Orbit,Flights,Reused,GridFins,Legs,LandingPad,ReusedCount,Serial,Block,Outcome
4,1,2010-06-04,Falcon 9,,CCSFS SLC 40,28.561857,-80.577366,LEO,1.0,False,False,False,,0.0,B0003,1.0,None None
5,2,2012-05-22,Falcon 9,525.0,CCSFS SLC 40,28.561857,-80.577366,LEO,1.0,False,False,False,,0.0,B0005,1.0,None None
6,3,2013-03-01,Falcon 9,677.0,CCSFS SLC 40,28.561857,-80.577366,ISS,1.0,False,False,False,,0.0,B0007,1.0,None None
7,4,2013-09-29,Falcon 9,500.0,VAFB SLC 4E,34.632093,-120.610829,PO,1.0,False,False,False,,0.0,B1003,1.0,False Ocean
8,5,2013-12-03,Falcon 9,3170.0,CCSFS SLC 40,28.561857,-80.577366,GTO,1.0,False,False,False,,0.0,B1004,1.0,None None
9,6,2014-01-06,Falcon 9,3325.0,CCSFS SLC 40,28.561857,-80.577366,GTO,1.0,False,False,False,,0.0,B1005,1.0,None None


Handling the Missing Values:

In [119]:
falcon_9_data.isnull().sum()

FlightNumber       0
date               0
BoosterVersion     0
PayloadMass        0
LaunchSite         0
Latitude           0
Longitude          0
Orbit              1
Flights            5
Reused             5
GridFins           5
Legs               5
LandingPad        31
ReusedCount        5
Serial             5
Block              5
Outcome            0
dtype: int64

In [117]:
print(falcon_9_data['PayloadMass'].mean())

8183.607718120806


In [None]:
mean_payload_weight = falcon_9_data['PayloadMass'].mean()

falcon_9_data['PayloadMass'] = falcon_9_data['PayloadMass'].fillna(mean_payload_weight)

print(falcon_9_data['PayloadMass'].mean())

In [120]:
falcon_9_data_cleaned = falcon_9_data.dropna(subset=[col for col in falcon_9_data.columns if col != 'LandingPad'])

In [124]:
falcon_9_data_cleaned.isnull().sum()

FlightNumber       0
date               0
BoosterVersion     0
PayloadMass        0
LaunchSite         0
Latitude           0
Longitude          0
Orbit              0
Flights            0
Reused             0
GridFins           0
Legs               0
LandingPad        26
ReusedCount        0
Serial             0
Block              0
Outcome            0
dtype: int64

In [123]:
falcon_9_data_cleaned.to_csv("falcon_9_cleaned_data.csv", index = False)