In [None]:
import requests
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

In [None]:
URLs = ["https://api.spacexdata.com/v4/capsules",
         "https://api.spacexdata.com/v4/cores",
         "https://api.spacexdata.com/v4/landpads",
         "https://api.spacexdata.com/v4/launchpads",
         "https://api.spacexdata.com/v4/payloads",
         "https://api.spacexdata.com/v4/ships",
         "https://api.spacexdata.com/v4/rockets",
]

## Requesting Space X APIs

In [None]:
dataframes = {}
for url in URLs:
    response = requests.get(url)
    if response.status_code == 200: # to ensure data is available
        df = pd.json_normalize(response.json())
        endpoint_name = url.split("/")[-1]
        dataframes[endpoint_name] = df
    else:
        print(f"Failed to retrieve data from {url}. Status code: {response.status_code}")

## Requesting Past Launches Data

In [None]:
url = "https://api.spacexdata.com/v4/launches/past"
response = requests.get(url)
launches_data = pd.json_normalize(response.json())
launches_data

Unnamed: 0,static_fire_date_utc,static_fire_date_unix,net,window,rocket,success,failures,details,crew,ships,...,links.reddit.media,links.reddit.recovery,links.flickr.small,links.flickr.original,links.presskit,links.webcast,links.youtube_id,links.article,links.wikipedia,fairings
0,2006-03-17T00:00:00.000Z,1.142554e+09,False,0.0,5e9d0d95eda69955f709d1eb,False,"[{'time': 33, 'altitude': None, 'reason': 'mer...",Engine failure at 33 seconds and loss of vehicle,[],[],...,,,[],[],,https://www.youtube.com/watch?v=0a_00nJ_Y88,0a_00nJ_Y88,https://www.space.com/2196-spacex-inaugural-fa...,https://en.wikipedia.org/wiki/DemoSat,
1,,,False,0.0,5e9d0d95eda69955f709d1eb,False,"[{'time': 301, 'altitude': 289, 'reason': 'har...",Successful first stage burn and transition to ...,[],[],...,,,[],[],,https://www.youtube.com/watch?v=Lk4zQ2wP-Nc,Lk4zQ2wP-Nc,https://www.space.com/3590-spacex-falcon-1-roc...,https://en.wikipedia.org/wiki/DemoSat,
2,,,False,0.0,5e9d0d95eda69955f709d1eb,False,"[{'time': 140, 'altitude': 35, 'reason': 'resi...",Residual stage 1 thrust led to collision betwe...,[],[],...,,,[],[],,https://www.youtube.com/watch?v=v0w9p3U8860,v0w9p3U8860,http://www.spacex.com/news/2013/02/11/falcon-1...,https://en.wikipedia.org/wiki/Trailblazer_(sat...,
3,2008-09-20T00:00:00.000Z,1.221869e+09,False,0.0,5e9d0d95eda69955f709d1eb,True,[],Ratsat was carried to orbit on the first succe...,[],[],...,,,[],[],,https://www.youtube.com/watch?v=dLQ2tZEH6G0,dLQ2tZEH6G0,https://en.wikipedia.org/wiki/Ratsat,https://en.wikipedia.org/wiki/Ratsat,
4,,,False,0.0,5e9d0d95eda69955f709d1eb,True,[],,[],[],...,,,[],[],http://www.spacex.com/press/2012/12/19/spacexs...,https://www.youtube.com/watch?v=yTaIDooc8Og,yTaIDooc8Og,http://www.spacex.com/news/2013/02/12/falcon-1...,https://en.wikipedia.org/wiki/RazakSAT,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
182,,,False,,5e9d0d95eda69973a809d1ec,True,[],,[],[],...,,https://www.reddit.com/r/spacex/comments/k2ts1...,[],[],,https://youtu.be/NONM-xsKMSs,NONM-xsKMSs,,,
183,,,False,,5e9d0d95eda69973a809d1ec,True,[],,[],[],...,,https://www.reddit.com/r/spacex/comments/k2ts1...,[],[],,,,,,
184,,,False,,5e9d0d95eda69973a809d1ec,True,[],,[],[],...,,https://www.reddit.com/r/spacex/comments/k2ts1...,[],[],,https://youtu.be/ZlQHF_yBkMQ,ZlQHF_yBkMQ,,,
185,,,False,,5e9d0d95eda69973a809d1ec,True,[],,[],[],...,,https://www.reddit.com/r/spacex/comments/k2ts1...,[],[],,https://youtu.be/VVu2bSJJhgI,VVu2bSJJhgI,,,


In [None]:
launches_data.shape

(187, 43)

In [None]:
launches_data.columns

Index(['static_fire_date_utc', 'static_fire_date_unix', 'net', 'window',
       'rocket', 'success', 'failures', 'details', 'crew', 'ships', 'capsules',
       'payloads', 'launchpad', 'flight_number', 'name', 'date_utc',
       'date_unix', 'date_local', 'date_precision', 'upcoming', 'cores',
       'auto_update', 'tbd', 'launch_library_id', 'id', 'fairings.reused',
       'fairings.recovery_attempt', 'fairings.recovered', 'fairings.ships',
       'links.patch.small', 'links.patch.large', 'links.reddit.campaign',
       'links.reddit.launch', 'links.reddit.media', 'links.reddit.recovery',
       'links.flickr.small', 'links.flickr.original', 'links.presskit',
       'links.webcast', 'links.youtube_id', 'links.article', 'links.wikipedia',
       'fairings'],
      dtype='object')

## What does these attributes mean ?

<ul style="color: #cfa21b;">
    <li><strong>static_fire_date_utc:</strong> A date of a test that is done where fire is emitted for seconds before launching to ensure the engine and everything is fine.</li>
    <li><strong>static_fire_date_unix:</strong> Another way to represent the date in terms of seconds since January 1, 1970.</li>
    <li><strong>net:</strong> Indicates whether the launch will or will not be done in the future.</li>
    <li><strong>window:</strong> The timeframe within which to launch.</li>
    <li><strong>rocket:</strong> Rocket type doing the launch.</li>
    <li><strong>success:</strong> Indicates if the launch is successful.</li>
    <li><strong>failure:</strong> List of failures including time, altitude, and reason.</li>
    <li><strong>details:</strong> Details about the launch, if there are any.</li>
    <li><strong>crew:</strong> List of crew members for the launch.</li>
    <li><strong>ships:</strong> List of ships involved in the recovery.</li>
    <li><strong>capsules:</strong> List of capsules used in the mission.</li>
    <li><strong>payloads:</strong> List of payloads being launched.</li>
    <li><strong>launchpad:</strong> Identifier for the launch pad used.</li>
    <li><strong>flight_number:</strong> Unique identifier for the launch.</li>
    <li><strong>name:</strong> Name of the launch.</li>
    <li><strong>date_utc:</strong> Launch date and time in UTC.</li>
    <li><strong>date_unix:</strong> Unix timestamp for the launch date.</li>
    <li><strong>date_local:</strong> Local date and time of the launch.</li>
    <li><strong>date_precision:</strong> Precision of the launch date (exact, TBD, etc.).</li>
    <li><strong>upcoming:</strong> Boolean indicating if the launch is upcoming.</li>
    <li><strong>cores:</strong> List of rocket cores used in the launch.</li>
    <li><strong>auto_update:</strong> Boolean indicating if the launch information is automatically updated.</li>
    <li><strong>tbd:</strong> Boolean indicating if the launch date is still to be determined.</li>
    <li><strong>launch_library_id:</strong> Identifier for the launch in the Launch Library database.</li>
    <li><strong>id:</strong> Unique identifier for the launch.</li>
    <li><strong>fairings.reused:</strong> Boolean indicating if fairings were reused.</li>
    <li><strong>fairings.recovery_attempt:</strong> Boolean indicating if there was an attempt to recover the fairings.</li>
    <li><strong>fairings.recovered:</strong> Boolean indicating if the fairings were successfully recovered.</li>
    <li><strong>fairings.ships:</strong> List of ships involved in the fairing recovery.</li>
    <li><strong>fairings:</strong> Object containing information about fairings (protective covers for payloads).</li>
</ul>


<p >
    Based on our understanding of the attributes, we will:
</p>
<ol style="color: #80bf13;">
    <li>Remove columns.</li>
    <li>Split some columns that have values in the form of lists.</li>
    <li>Sort the datfarame using launch id </li>
</ol>

In [None]:
launches_data["net"].unique() # no need for it

array([False])

In [None]:
launches_data["rocket"].value_counts() # there are 3 kinds of rocket that are used

Unnamed: 0_level_0,count
rocket,Unnamed: 1_level_1
5e9d0d95eda69973a809d1ec,179
5e9d0d95eda69955f709d1eb,5
5e9d0d95eda69974db09d1ed,3


In [None]:
launches_data["failures"][0] # one of the columns that need to splitted in 3 columns

[{'time': 33, 'altitude': None, 'reason': 'merlin engine failure'}]

In [None]:
launches_data["date_precision"].value_counts()

Unnamed: 0_level_0,count
date_precision,Unnamed: 1_level_1
hour,186
day,1


In [None]:
launches_data["upcoming"].value_counts() # no need for it

Unnamed: 0_level_0,count
upcoming,Unnamed: 1_level_1
False,187


In [None]:
launches_data["fairings"].isnull().sum() # not necessary as the whole column is Nan

187

<p style="color: #f7310a;">
    Now we can do the following:
</p>
<ol style="color: #0af761;">
    <li>Remove unnecessary columns: <strong>['crew',
    "net",
    'date_unix',
    'static_fire_date_unix',
    'upcoming',
    'auto_update',
    'tbd',
    'links.patch.small',
    'links.patch.large',
    'links.reddit.campaign',
    'links.reddit.launch',
    'links.reddit.media',
    'links.reddit.recovery',
    'links.flickr.small',
    'links.flickr.original',
    'links.presskit',
    'links.webcast',
    'links.youtube_id',
    'links.article',
    'links.wikipedia',
    'fairings',
    'details',
    "launch_library_id",
    "date_precision","fairings.reused","fairings.recovery_attempt","fairings.recovered"]</strong>.</li>
    <li>Split columns: <strong>[cores]</strong>.</li>
    <li>Sort the dataframe using launch ID.</li>
    <li>Since we won't deal with upcoming launches, we can request only past launches from the API.</li>
</ol>

In [None]:
launches_data.drop(columns= ["static_fire_date_utc",'crew',  "net", 'date_unix', 'static_fire_date_unix',  'upcoming',  'auto_update',
  'tbd',  'links.patch.small',  'links.patch.large',  'links.reddit.campaign', 'links.reddit.launch',  'links.reddit.media',
  'links.reddit.recovery',  'links.flickr.small',  'links.flickr.original',  'links.presskit',  'links.webcast',  'links.youtube_id',  'links.article',
    'links.wikipedia',  'fairings',  'details', "launch_library_id",
    "date_precision","fairings.reused","fairings.recovery_attempt","fairings.recovered","capsules","fairings.ships","window","date_local"],inplace=True)

In [None]:
launches_data.shape

(187, 11)

In [None]:
launches_data.head()

Unnamed: 0,rocket,success,failures,ships,payloads,launchpad,flight_number,name,date_utc,cores,id
0,5e9d0d95eda69955f709d1eb,False,"[{'time': 33, 'altitude': None, 'reason': 'mer...",[],[5eb0e4b5b6c3bb0006eeb1e1],5e9e4502f5090995de566f86,1,FalconSat,2006-03-24T22:30:00.000Z,"[{'core': '5e9e289df35918033d3b2623', 'flight'...",5eb87cd9ffd86e000604b32a
1,5e9d0d95eda69955f709d1eb,False,"[{'time': 301, 'altitude': 289, 'reason': 'har...",[],[5eb0e4b6b6c3bb0006eeb1e2],5e9e4502f5090995de566f86,2,DemoSat,2007-03-21T01:10:00.000Z,"[{'core': '5e9e289ef35918416a3b2624', 'flight'...",5eb87cdaffd86e000604b32b
2,5e9d0d95eda69955f709d1eb,False,"[{'time': 140, 'altitude': 35, 'reason': 'resi...",[],"[5eb0e4b6b6c3bb0006eeb1e3, 5eb0e4b6b6c3bb0006e...",5e9e4502f5090995de566f86,3,Trailblazer,2008-08-03T03:34:00.000Z,"[{'core': '5e9e289ef3591814873b2625', 'flight'...",5eb87cdbffd86e000604b32c
3,5e9d0d95eda69955f709d1eb,True,[],[],[5eb0e4b7b6c3bb0006eeb1e5],5e9e4502f5090995de566f86,4,RatSat,2008-09-28T23:15:00.000Z,"[{'core': '5e9e289ef3591855dc3b2626', 'flight'...",5eb87cdbffd86e000604b32d
4,5e9d0d95eda69955f709d1eb,True,[],[],[5eb0e4b7b6c3bb0006eeb1e6],5e9e4502f5090995de566f86,5,RazakSat,2009-07-13T03:35:00.000Z,"[{'core': '5e9e289ef359184f103b2627', 'flight'...",5eb87cdcffd86e000604b32e


In [None]:
launches_data.failures

Unnamed: 0,failures
0,"[{'time': 33, 'altitude': None, 'reason': 'mer..."
1,"[{'time': 301, 'altitude': 289, 'reason': 'har..."
2,"[{'time': 140, 'altitude': 35, 'reason': 'resi..."
3,[]
4,[]
...,...
182,[]
183,[]
184,[]
185,[]


In [None]:
launches_data[launches_data["success"]==True]["failures"].value_counts()

Unnamed: 0_level_0,count
failures,Unnamed: 1_level_1
[],181


since there is just 5 failures we may drop this columns

In [None]:
launches_data.drop(columns = ["failures"],inplace=True)

In [None]:
launches_data.shape

(187, 10)

In [None]:
launches_data.columns

Index(['rocket', 'success', 'ships', 'payloads', 'launchpad', 'flight_number',
       'name', 'date_utc', 'cores', 'id'],
      dtype='object')

In [None]:
launches_data.head()

Unnamed: 0,rocket,success,ships,payloads,launchpad,flight_number,name,date_utc,cores,id
0,5e9d0d95eda69955f709d1eb,False,[],[5eb0e4b5b6c3bb0006eeb1e1],5e9e4502f5090995de566f86,1,FalconSat,2006-03-24T22:30:00.000Z,"[{'core': '5e9e289df35918033d3b2623', 'flight'...",5eb87cd9ffd86e000604b32a
1,5e9d0d95eda69955f709d1eb,False,[],[5eb0e4b6b6c3bb0006eeb1e2],5e9e4502f5090995de566f86,2,DemoSat,2007-03-21T01:10:00.000Z,"[{'core': '5e9e289ef35918416a3b2624', 'flight'...",5eb87cdaffd86e000604b32b
2,5e9d0d95eda69955f709d1eb,False,[],"[5eb0e4b6b6c3bb0006eeb1e3, 5eb0e4b6b6c3bb0006e...",5e9e4502f5090995de566f86,3,Trailblazer,2008-08-03T03:34:00.000Z,"[{'core': '5e9e289ef3591814873b2625', 'flight'...",5eb87cdbffd86e000604b32c
3,5e9d0d95eda69955f709d1eb,True,[],[5eb0e4b7b6c3bb0006eeb1e5],5e9e4502f5090995de566f86,4,RatSat,2008-09-28T23:15:00.000Z,"[{'core': '5e9e289ef3591855dc3b2626', 'flight'...",5eb87cdbffd86e000604b32d
4,5e9d0d95eda69955f709d1eb,True,[],[5eb0e4b7b6c3bb0006eeb1e6],5e9e4502f5090995de566f86,5,RazakSat,2009-07-13T03:35:00.000Z,"[{'core': '5e9e289ef359184f103b2627', 'flight'...",5eb87cdcffd86e000604b32e


In [None]:
launches_data["cores"][1]

[{'core': '5e9e289ef35918416a3b2624',
  'flight': 1,
  'gridfins': False,
  'legs': False,
  'reused': False,
  'landing_attempt': False,
  'landing_success': None,
  'landing_type': None,
  'landpad': None}]

In [None]:
def get_core_id(data):
    if len(data) == 0:
        return None
    else:
        return data[0]["core"]
def get_core_flight(data):
    if len(data) == 0:
        return None
    else:
        return data[0]["flight"]
def get_core_gridfins(data):
    if len(data) == 0:
        return None
    else:
        return data[0]["gridfins"]
def get_core_legs(data):
    if len(data) == 0:
        return None
    else:
        return data[0]["legs"]
def get_core_reused(data):
    if len(data) == 0:
        return None
    else:
        return data[0]["reused"]
def get_core_landing_attempt(data):
    if len(data) == 0:
        return None
    else:
        return data[0]["landing_attempt"]
def get_core_landing_success(data):
    if len(data) == 0:
        return None
    else:
        return data[0]["landing_success"]
def get_core_landing_type(data):
    if len(data) == 0:
        return None
    else:
        return data[0]["landing_type"]
def get_core_landpad(data):
    if len(data) == 0:
        return None
    else:
        return data[0]["landpad"]

In [None]:
launches_data['cores.core_id'] = launches_data['cores'].apply(get_core_id)
launches_data['cores.flight'] = launches_data['cores'].apply(get_core_flight)
launches_data['cores.gridfins'] = launches_data['cores'].apply(get_core_gridfins)
launches_data['cores.legs'] = launches_data['cores'].apply(get_core_legs)
launches_data['cores.reused'] = launches_data['cores'].apply(get_core_reused)
launches_data['cores.landing_attempt'] = launches_data['cores'].apply(get_core_landing_attempt)
launches_data['cores.landing_success'] = launches_data['cores'].apply(get_core_landing_success)
launches_data['cores.landing_type'] = launches_data['cores'].apply(get_core_landing_type)
launches_data['cores.landpad'] = launches_data['cores'].apply(get_core_landpad)

In [None]:
launches_data.drop(columns = ["cores"],inplace=True)

In [None]:
launches_data.columns

Index(['rocket', 'success', 'ships', 'payloads', 'launchpad', 'flight_number',
       'name', 'date_utc', 'id', 'cores.core_id', 'cores.flight',
       'cores.gridfins', 'cores.legs', 'cores.reused', 'cores.landing_attempt',
       'cores.landing_success', 'cores.landing_type', 'cores.landpad'],
      dtype='object')

In [None]:
launches_data.head()

Unnamed: 0,rocket,success,ships,payloads,launchpad,flight_number,name,date_utc,id,cores.core_id,cores.flight,cores.gridfins,cores.legs,cores.reused,cores.landing_attempt,cores.landing_success,cores.landing_type,cores.landpad
0,5e9d0d95eda69955f709d1eb,False,[],[5eb0e4b5b6c3bb0006eeb1e1],5e9e4502f5090995de566f86,1,FalconSat,2006-03-24T22:30:00.000Z,5eb87cd9ffd86e000604b32a,5e9e289df35918033d3b2623,1,False,False,False,False,,,
1,5e9d0d95eda69955f709d1eb,False,[],[5eb0e4b6b6c3bb0006eeb1e2],5e9e4502f5090995de566f86,2,DemoSat,2007-03-21T01:10:00.000Z,5eb87cdaffd86e000604b32b,5e9e289ef35918416a3b2624,1,False,False,False,False,,,
2,5e9d0d95eda69955f709d1eb,False,[],"[5eb0e4b6b6c3bb0006eeb1e3, 5eb0e4b6b6c3bb0006e...",5e9e4502f5090995de566f86,3,Trailblazer,2008-08-03T03:34:00.000Z,5eb87cdbffd86e000604b32c,5e9e289ef3591814873b2625,1,False,False,False,False,,,
3,5e9d0d95eda69955f709d1eb,True,[],[5eb0e4b7b6c3bb0006eeb1e5],5e9e4502f5090995de566f86,4,RatSat,2008-09-28T23:15:00.000Z,5eb87cdbffd86e000604b32d,5e9e289ef3591855dc3b2626,1,False,False,False,False,,,
4,5e9d0d95eda69955f709d1eb,True,[],[5eb0e4b7b6c3bb0006eeb1e6],5e9e4502f5090995de566f86,5,RazakSat,2009-07-13T03:35:00.000Z,5eb87cdcffd86e000604b32e,5e9e289ef359184f103b2627,1,False,False,False,False,,,


In [None]:
launches_data["cores.flight"].value_counts()

Unnamed: 0_level_0,count
cores.flight,Unnamed: 1_level_1
1,72
2,29
3,15
4,15
5,11
6,10
7,8
8,6
9,6
10,5


In [None]:
launches_data["cores.landing_success"].value_counts()

Unnamed: 0_level_0,count
cores.landing_success,Unnamed: 1_level_1
True,143
False,13


## We will get the booster version from the rocket Data

In [None]:
rockets_data = dataframes["rockets"]

In [None]:
rockets_data

Unnamed: 0,payload_weights,flickr_images,name,type,active,stages,boosters,cost_per_launch,success_rate_pct,first_flight,...,engines.number,engines.type,engines.version,engines.layout,engines.engine_loss_max,engines.propellant_1,engines.propellant_2,engines.thrust_to_weight,landing_legs.number,landing_legs.material
0,"[{'id': 'leo', 'name': 'Low Earth Orbit', 'kg'...","[https://imgur.com/DaCfMsj.jpg, https://imgur....",Falcon 1,rocket,False,2,0,6700000,40,2006-03-24,...,1,merlin,1C,single,0.0,liquid oxygen,RP-1 kerosene,96.0,0,
1,"[{'id': 'leo', 'name': 'Low Earth Orbit', 'kg'...",[https://farm1.staticflickr.com/929/2878733830...,Falcon 9,rocket,True,2,0,50000000,98,2010-06-04,...,9,merlin,1D+,octaweb,2.0,liquid oxygen,RP-1 kerosene,180.1,4,carbon fiber
2,"[{'id': 'leo', 'name': 'Low Earth Orbit', 'kg'...",[https://farm5.staticflickr.com/4599/385838292...,Falcon Heavy,rocket,True,2,2,90000000,100,2018-02-06,...,27,merlin,1D+,octaweb,6.0,liquid oxygen,RP-1 kerosene,180.1,12,carbon fiber
3,"[{'id': 'leo', 'name': 'Low Earth Orbit', 'kg'...",[https://live.staticflickr.com/65535/489541389...,Starship,rocket,False,2,0,7000000,0,2021-12-01,...,37,raptor,,,,liquid oxygen,liquid methane,107.0,6,stainless steel


In [None]:
rockets_data.columns

Index(['payload_weights', 'flickr_images', 'name', 'type', 'active', 'stages',
       'boosters', 'cost_per_launch', 'success_rate_pct', 'first_flight',
       'country', 'company', 'wikipedia', 'description', 'id', 'height.meters',
       'height.feet', 'diameter.meters', 'diameter.feet', 'mass.kg', 'mass.lb',
       'first_stage.thrust_sea_level.kN', 'first_stage.thrust_sea_level.lbf',
       'first_stage.thrust_vacuum.kN', 'first_stage.thrust_vacuum.lbf',
       'first_stage.reusable', 'first_stage.engines',
       'first_stage.fuel_amount_tons', 'first_stage.burn_time_sec',
       'second_stage.thrust.kN', 'second_stage.thrust.lbf',
       'second_stage.payloads.composite_fairing.height.meters',
       'second_stage.payloads.composite_fairing.height.feet',
       'second_stage.payloads.composite_fairing.diameter.meters',
       'second_stage.payloads.composite_fairing.diameter.feet',
       'second_stage.payloads.option_1', 'second_stage.reusable',
       'second_stage.engines', 's

In [None]:
def get_booster_version():
    rocket_names = rockets_data[['id', 'name']]
    modified_data = launches_data.merge(rocket_names, how='left', left_on='rocket', right_on='id')
    return modified_data



In [None]:
launches_data = get_booster_version()

In [None]:
launches_data.head()

Unnamed: 0,rocket,success,ships,payloads,launchpad,flight_number,name_x,date_utc,id_x,cores.core_id,cores.flight,cores.gridfins,cores.legs,cores.reused,cores.landing_attempt,cores.landing_success,cores.landing_type,cores.landpad,id_y,name_y
0,5e9d0d95eda69955f709d1eb,False,[],[5eb0e4b5b6c3bb0006eeb1e1],5e9e4502f5090995de566f86,1,FalconSat,2006-03-24T22:30:00.000Z,5eb87cd9ffd86e000604b32a,5e9e289df35918033d3b2623,1,False,False,False,False,,,,5e9d0d95eda69955f709d1eb,Falcon 1
1,5e9d0d95eda69955f709d1eb,False,[],[5eb0e4b6b6c3bb0006eeb1e2],5e9e4502f5090995de566f86,2,DemoSat,2007-03-21T01:10:00.000Z,5eb87cdaffd86e000604b32b,5e9e289ef35918416a3b2624,1,False,False,False,False,,,,5e9d0d95eda69955f709d1eb,Falcon 1
2,5e9d0d95eda69955f709d1eb,False,[],"[5eb0e4b6b6c3bb0006eeb1e3, 5eb0e4b6b6c3bb0006e...",5e9e4502f5090995de566f86,3,Trailblazer,2008-08-03T03:34:00.000Z,5eb87cdbffd86e000604b32c,5e9e289ef3591814873b2625,1,False,False,False,False,,,,5e9d0d95eda69955f709d1eb,Falcon 1
3,5e9d0d95eda69955f709d1eb,True,[],[5eb0e4b7b6c3bb0006eeb1e5],5e9e4502f5090995de566f86,4,RatSat,2008-09-28T23:15:00.000Z,5eb87cdbffd86e000604b32d,5e9e289ef3591855dc3b2626,1,False,False,False,False,,,,5e9d0d95eda69955f709d1eb,Falcon 1
4,5e9d0d95eda69955f709d1eb,True,[],[5eb0e4b7b6c3bb0006eeb1e6],5e9e4502f5090995de566f86,5,RazakSat,2009-07-13T03:35:00.000Z,5eb87cdcffd86e000604b32e,5e9e289ef359184f103b2627,1,False,False,False,False,,,,5e9d0d95eda69955f709d1eb,Falcon 1


In [None]:
launches_data.shape

(187, 20)

In [None]:
launches_data.drop(columns = ["id_y","rocket","name_x"],inplace=True)

In [None]:
launches_data.rename(columns = {"name_y":"BoosterVersion",
                                "id_x":"id"},inplace=True)

In [None]:
launches_data.head()

Unnamed: 0,success,ships,payloads,launchpad,flight_number,date_utc,id,cores.core_id,cores.flight,cores.gridfins,cores.legs,cores.reused,cores.landing_attempt,cores.landing_success,cores.landing_type,cores.landpad,BoosterVersion
0,False,[],[5eb0e4b5b6c3bb0006eeb1e1],5e9e4502f5090995de566f86,1,2006-03-24T22:30:00.000Z,5eb87cd9ffd86e000604b32a,5e9e289df35918033d3b2623,1,False,False,False,False,,,,Falcon 1
1,False,[],[5eb0e4b6b6c3bb0006eeb1e2],5e9e4502f5090995de566f86,2,2007-03-21T01:10:00.000Z,5eb87cdaffd86e000604b32b,5e9e289ef35918416a3b2624,1,False,False,False,False,,,,Falcon 1
2,False,[],"[5eb0e4b6b6c3bb0006eeb1e3, 5eb0e4b6b6c3bb0006e...",5e9e4502f5090995de566f86,3,2008-08-03T03:34:00.000Z,5eb87cdbffd86e000604b32c,5e9e289ef3591814873b2625,1,False,False,False,False,,,,Falcon 1
3,True,[],[5eb0e4b7b6c3bb0006eeb1e5],5e9e4502f5090995de566f86,4,2008-09-28T23:15:00.000Z,5eb87cdbffd86e000604b32d,5e9e289ef3591855dc3b2626,1,False,False,False,False,,,,Falcon 1
4,True,[],[5eb0e4b7b6c3bb0006eeb1e6],5e9e4502f5090995de566f86,5,2009-07-13T03:35:00.000Z,5eb87cdcffd86e000604b32e,5e9e289ef359184f103b2627,1,False,False,False,False,,,,Falcon 1


## We will get longitude, latitude and LaunchSite from the launchpad dataset

In [None]:
launchsite_data = dataframes["launchpads"]

In [None]:
launchsite_data.head()

Unnamed: 0,name,full_name,locality,region,latitude,longitude,launch_attempts,launch_successes,rockets,timezone,launches,status,details,id,images.large
0,VAFB SLC 3W,Vandenberg Space Force Base Space Launch Compl...,Vandenberg Space Force Base,California,34.64409,-120.593144,0,0,[5e9d0d95eda69955f709d1eb],America/Los_Angeles,[],retired,SpaceX's original west coast launch pad for Fa...,5e9e4501f5090910d4566f83,[https://i.imgur.com/7uXe1Kv.png]
1,CCSFS SLC 40,Cape Canaveral Space Force Station Space Launc...,Cape Canaveral,Florida,28.561857,-80.577366,99,97,[5e9d0d95eda69973a809d1ec],America/New_York,"[5eb87cddffd86e000604b32f, 5eb87cdeffd86e00060...",active,"SpaceX's primary Falcon 9 pad, where all east ...",5e9e4501f509094ba4566f84,[https://i.imgur.com/9oEMXwa.png]
2,STLS,SpaceX South Texas Launch Site,Boca Chica Village,Texas,25.997264,-97.156085,0,0,[],America/Chicago,[],under construction,SpaceX's new private launch site currently und...,5e9e4502f5090927f8566f85,[https://i.imgur.com/ZzTTC5p.png]
3,Kwajalein Atoll,Kwajalein Atoll Omelek Island,Omelek Island,Marshall Islands,9.047721,167.743129,5,2,[5e9d0d95eda69955f709d1eb],Pacific/Kwajalein,"[5eb87cd9ffd86e000604b32a, 5eb87cdaffd86e00060...",retired,"SpaceX's original pad, where all of the Falcon...",5e9e4502f5090995de566f86,[https://i.imgur.com/GGPgsVs.png]
4,VAFB SLC 4E,Vandenberg Space Force Base Space Launch Compl...,Vandenberg Space Force Base,California,34.632093,-120.610829,28,27,[5e9d0d95eda69973a809d1ec],America/Los_Angeles,"[5eb87ce1ffd86e000604b334, 5eb87cf0ffd86e00060...",active,SpaceX's primary west coast launch pad for pol...,5e9e4502f509092b78566f87,[https://i.imgur.com/asp5L08.png]


In [None]:
launchsite_data.columns

Index(['name', 'full_name', 'locality', 'region', 'latitude', 'longitude',
       'launch_attempts', 'launch_successes', 'rockets', 'timezone',
       'launches', 'status', 'details', 'id', 'images.large'],
      dtype='object')

In [None]:
def get_launchsite():
    launchsite_info = launchsite_data[['id', 'name', 'longitude', 'latitude']]
    modified_data = launches_data.merge(launchsite_info, how='left', left_on='launchpad', right_on='id')
    return modified_data


In [None]:
launches_data = get_launchsite()

In [None]:
launches_data.head()

Unnamed: 0,success,ships,payloads,launchpad,flight_number,date_utc,id_x,cores.core_id,cores.flight,cores.gridfins,...,cores.reused,cores.landing_attempt,cores.landing_success,cores.landing_type,cores.landpad,BoosterVersion,id_y,name,longitude,latitude
0,False,[],[5eb0e4b5b6c3bb0006eeb1e1],5e9e4502f5090995de566f86,1,2006-03-24T22:30:00.000Z,5eb87cd9ffd86e000604b32a,5e9e289df35918033d3b2623,1,False,...,False,False,,,,Falcon 1,5e9e4502f5090995de566f86,Kwajalein Atoll,167.743129,9.047721
1,False,[],[5eb0e4b6b6c3bb0006eeb1e2],5e9e4502f5090995de566f86,2,2007-03-21T01:10:00.000Z,5eb87cdaffd86e000604b32b,5e9e289ef35918416a3b2624,1,False,...,False,False,,,,Falcon 1,5e9e4502f5090995de566f86,Kwajalein Atoll,167.743129,9.047721
2,False,[],"[5eb0e4b6b6c3bb0006eeb1e3, 5eb0e4b6b6c3bb0006e...",5e9e4502f5090995de566f86,3,2008-08-03T03:34:00.000Z,5eb87cdbffd86e000604b32c,5e9e289ef3591814873b2625,1,False,...,False,False,,,,Falcon 1,5e9e4502f5090995de566f86,Kwajalein Atoll,167.743129,9.047721
3,True,[],[5eb0e4b7b6c3bb0006eeb1e5],5e9e4502f5090995de566f86,4,2008-09-28T23:15:00.000Z,5eb87cdbffd86e000604b32d,5e9e289ef3591855dc3b2626,1,False,...,False,False,,,,Falcon 1,5e9e4502f5090995de566f86,Kwajalein Atoll,167.743129,9.047721
4,True,[],[5eb0e4b7b6c3bb0006eeb1e6],5e9e4502f5090995de566f86,5,2009-07-13T03:35:00.000Z,5eb87cdcffd86e000604b32e,5e9e289ef359184f103b2627,1,False,...,False,False,,,,Falcon 1,5e9e4502f5090995de566f86,Kwajalein Atoll,167.743129,9.047721


In [None]:
launches_data.rename(columns = {"name":"LaunchSite",
                                "id_x" :"id"},inplace=True)

In [None]:
launches_data

Unnamed: 0,success,ships,payloads,launchpad,flight_number,date_utc,id,cores.core_id,cores.flight,cores.gridfins,...,cores.reused,cores.landing_attempt,cores.landing_success,cores.landing_type,cores.landpad,BoosterVersion,id_y,LaunchSite,longitude,latitude
0,False,[],[5eb0e4b5b6c3bb0006eeb1e1],5e9e4502f5090995de566f86,1,2006-03-24T22:30:00.000Z,5eb87cd9ffd86e000604b32a,5e9e289df35918033d3b2623,1,False,...,False,False,,,,Falcon 1,5e9e4502f5090995de566f86,Kwajalein Atoll,167.743129,9.047721
1,False,[],[5eb0e4b6b6c3bb0006eeb1e2],5e9e4502f5090995de566f86,2,2007-03-21T01:10:00.000Z,5eb87cdaffd86e000604b32b,5e9e289ef35918416a3b2624,1,False,...,False,False,,,,Falcon 1,5e9e4502f5090995de566f86,Kwajalein Atoll,167.743129,9.047721
2,False,[],"[5eb0e4b6b6c3bb0006eeb1e3, 5eb0e4b6b6c3bb0006e...",5e9e4502f5090995de566f86,3,2008-08-03T03:34:00.000Z,5eb87cdbffd86e000604b32c,5e9e289ef3591814873b2625,1,False,...,False,False,,,,Falcon 1,5e9e4502f5090995de566f86,Kwajalein Atoll,167.743129,9.047721
3,True,[],[5eb0e4b7b6c3bb0006eeb1e5],5e9e4502f5090995de566f86,4,2008-09-28T23:15:00.000Z,5eb87cdbffd86e000604b32d,5e9e289ef3591855dc3b2626,1,False,...,False,False,,,,Falcon 1,5e9e4502f5090995de566f86,Kwajalein Atoll,167.743129,9.047721
4,True,[],[5eb0e4b7b6c3bb0006eeb1e6],5e9e4502f5090995de566f86,5,2009-07-13T03:35:00.000Z,5eb87cdcffd86e000604b32e,5e9e289ef359184f103b2627,1,False,...,False,False,,,,Falcon 1,5e9e4502f5090995de566f86,Kwajalein Atoll,167.743129,9.047721
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
182,True,[],"[631614e9ffc78f3b85670717, 631617fbffc78f3b856...",5e9e4501f509094ba4566f84,183,2022-09-05T02:09:00.000Z,62f3b5330f55c50e192a4e6e,5e9e28a6f359183c413b265d,7,True,...,True,True,True,ASDS,5e9e3033383ecbb9e534e7cc,Falcon 9,5e9e4501f509094ba4566f84,CCSFS SLC 40,-80.577366,28.561857
183,True,[],"[63161610ffc78f3b85670718, 63161872ffc78f3b856...",5e9e4502f509094188566f88,184,2022-09-11T01:10:00.000Z,62a9f89a20413d2695d8871a,5e9e28a7f3591817f23b2663,14,True,...,True,True,True,ASDS,5e9e3033383ecb075134e7cd,Falcon 9,5e9e4502f509094188566f88,KSC LC 39A,-80.603956,28.608058
184,True,[],[63161699ffc78f3b85670719],5e9e4501f509094ba4566f84,185,2022-09-17T01:05:00.000Z,63161329ffc78f3b8567070b,60b800111f83cc1e59f16438,6,True,...,True,True,True,ASDS,5e9e3033383ecbb9e534e7cc,Falcon 9,5e9e4501f509094ba4566f84,CCSFS SLC 40,-80.577366,28.561857
185,True,[],[631616a7ffc78f3b8567071a],5e9e4501f509094ba4566f84,186,2022-09-24T23:30:00.000Z,63161339ffc78f3b8567070c,627843d657b51b752c5c5a53,4,True,...,True,True,True,ASDS,5e9e3033383ecbb9e534e7cc,Falcon 9,5e9e4501f509094ba4566f84,CCSFS SLC 40,-80.577366,28.561857


In [None]:
launches_data.drop(columns = ["id_y","launchpad"],inplace=True)

In [None]:
launches_data.head()

Unnamed: 0,success,ships,payloads,flight_number,date_utc,id,cores.core_id,cores.flight,cores.gridfins,cores.legs,cores.reused,cores.landing_attempt,cores.landing_success,cores.landing_type,cores.landpad,BoosterVersion,LaunchSite,longitude,latitude
0,False,[],[5eb0e4b5b6c3bb0006eeb1e1],1,2006-03-24T22:30:00.000Z,5eb87cd9ffd86e000604b32a,5e9e289df35918033d3b2623,1,False,False,False,False,,,,Falcon 1,Kwajalein Atoll,167.743129,9.047721
1,False,[],[5eb0e4b6b6c3bb0006eeb1e2],2,2007-03-21T01:10:00.000Z,5eb87cdaffd86e000604b32b,5e9e289ef35918416a3b2624,1,False,False,False,False,,,,Falcon 1,Kwajalein Atoll,167.743129,9.047721
2,False,[],"[5eb0e4b6b6c3bb0006eeb1e3, 5eb0e4b6b6c3bb0006e...",3,2008-08-03T03:34:00.000Z,5eb87cdbffd86e000604b32c,5e9e289ef3591814873b2625,1,False,False,False,False,,,,Falcon 1,Kwajalein Atoll,167.743129,9.047721
3,True,[],[5eb0e4b7b6c3bb0006eeb1e5],4,2008-09-28T23:15:00.000Z,5eb87cdbffd86e000604b32d,5e9e289ef3591855dc3b2626,1,False,False,False,False,,,,Falcon 1,Kwajalein Atoll,167.743129,9.047721
4,True,[],[5eb0e4b7b6c3bb0006eeb1e6],5,2009-07-13T03:35:00.000Z,5eb87cdcffd86e000604b32e,5e9e289ef359184f103b2627,1,False,False,False,False,,,,Falcon 1,Kwajalein Atoll,167.743129,9.047721


## We will get the payload mass and orbit from payload dataset

In [None]:
payloads_data = dataframes["payloads"]

In [None]:
payloads_data.head()

Unnamed: 0,name,type,reused,launch,customers,norad_ids,nationalities,manufacturers,mass_kg,mass_lbs,...,arg_of_pericenter,mean_anomaly,id,dragon.capsule,dragon.mass_returned_kg,dragon.mass_returned_lbs,dragon.flight_time_sec,dragon.manifest,dragon.water_landing,dragon.land_landing
0,FalconSAT-2,Satellite,False,5eb87cd9ffd86e000604b32a,[DARPA],[],[United States],[SSTL],20.0,43.0,...,,,5eb0e4b5b6c3bb0006eeb1e1,,,,,,,
1,DemoSAT,Satellite,False,5eb87cdaffd86e000604b32b,[DARPA],[],[United States],[SpaceX],,,...,,,5eb0e4b6b6c3bb0006eeb1e2,,,,,,,
2,Trailblazer,Satellite,False,5eb87cdbffd86e000604b32c,[NASA],[],[United States],[Space Dev],,,...,,,5eb0e4b6b6c3bb0006eeb1e3,,,,,,,
3,PRESat,Satellite,False,5eb87cdbffd86e000604b32c,[ORS],[],[United States],[],,,...,,,5eb0e4b6b6c3bb0006eeb1e4,,,,,,,
4,RatSat,Satellite,False,5eb87cdbffd86e000604b32d,[SpaceX],[33393],[United States],[SpaceX],165.0,363.0,...,331.2516,28.6966,5eb0e4b7b6c3bb0006eeb1e5,,,,,,,


In [None]:
payloads_data.columns

Index(['name', 'type', 'reused', 'launch', 'customers', 'norad_ids',
       'nationalities', 'manufacturers', 'mass_kg', 'mass_lbs', 'orbit',
       'reference_system', 'regime', 'longitude', 'semi_major_axis_km',
       'eccentricity', 'periapsis_km', 'apoapsis_km', 'inclination_deg',
       'period_min', 'lifespan_years', 'epoch', 'mean_motion', 'raan',
       'arg_of_pericenter', 'mean_anomaly', 'id', 'dragon.capsule',
       'dragon.mass_returned_kg', 'dragon.mass_returned_lbs',
       'dragon.flight_time_sec', 'dragon.manifest', 'dragon.water_landing',
       'dragon.land_landing'],
      dtype='object')

In [None]:
def getPayLoadData():
    # Extract relevant columns from the payloads dataset (including 'mass_kg' and 'orbit')
    payload_mass = payloads_data[['id', 'mass_kg', 'orbit']]

    # Explode the payloads list in the launches dataset to get one payload per row
    launches_exploded = launches_data.explode('payloads')

    # Merge the exploded launches data with the payload mass and orbit
    launches_with_mass_orbit = launches_exploded.merge(payload_mass, left_on='payloads', right_on='id', how='left')

    # Group by the launch id and sum the masses of the payloads for each launch
    total_mass = launches_with_mass_orbit.groupby('id_x').agg({'mass_kg': 'sum', 'orbit': 'first'}).reset_index()

    # Rename columns
    total_mass = total_mass.rename(columns={'mass_kg': 'PayloadMass', 'id_x': 'id', 'orbit': 'Orbit'})

    # Merge the total payload mass and orbit back into the original launches dataset
    modified_data = launches_data.merge(total_mass, on='id', how='left')

    return modified_data

In [None]:
launches_data = getPayLoadData()

In [None]:
launches_data.head()

Unnamed: 0,success,ships,payloads,flight_number,date_utc,id,cores.core_id,cores.flight,cores.gridfins,cores.legs,...,cores.landing_attempt,cores.landing_success,cores.landing_type,cores.landpad,BoosterVersion,LaunchSite,longitude,latitude,PayloadMass,Orbit
0,False,[],[5eb0e4b5b6c3bb0006eeb1e1],1,2006-03-24T22:30:00.000Z,5eb87cd9ffd86e000604b32a,5e9e289df35918033d3b2623,1,False,False,...,False,,,,Falcon 1,Kwajalein Atoll,167.743129,9.047721,20.0,LEO
1,False,[],[5eb0e4b6b6c3bb0006eeb1e2],2,2007-03-21T01:10:00.000Z,5eb87cdaffd86e000604b32b,5e9e289ef35918416a3b2624,1,False,False,...,False,,,,Falcon 1,Kwajalein Atoll,167.743129,9.047721,0.0,LEO
2,False,[],"[5eb0e4b6b6c3bb0006eeb1e3, 5eb0e4b6b6c3bb0006e...",3,2008-08-03T03:34:00.000Z,5eb87cdbffd86e000604b32c,5e9e289ef3591814873b2625,1,False,False,...,False,,,,Falcon 1,Kwajalein Atoll,167.743129,9.047721,0.0,LEO
3,True,[],[5eb0e4b7b6c3bb0006eeb1e5],4,2008-09-28T23:15:00.000Z,5eb87cdbffd86e000604b32d,5e9e289ef3591855dc3b2626,1,False,False,...,False,,,,Falcon 1,Kwajalein Atoll,167.743129,9.047721,165.0,LEO
4,True,[],[5eb0e4b7b6c3bb0006eeb1e6],5,2009-07-13T03:35:00.000Z,5eb87cdcffd86e000604b32e,5e9e289ef359184f103b2627,1,False,False,...,False,,,,Falcon 1,Kwajalein Atoll,167.743129,9.047721,200.0,LEO


## We will get the block and serial from cores dataset

In [None]:
cores_data = dataframes["cores"]

In [None]:
cores_data.head()

Unnamed: 0,block,reuse_count,rtls_attempts,rtls_landings,asds_attempts,asds_landings,last_update,launches,serial,status,id
0,,0,0,0,0,0,Engine failure at T+33 seconds resulted in los...,[5eb87cd9ffd86e000604b32a],Merlin1A,lost,5e9e289df35918033d3b2623
1,,0,0,0,0,0,Successful first-stage burn and transition to ...,[5eb87cdaffd86e000604b32b],Merlin2A,lost,5e9e289ef35918416a3b2624
2,,0,0,0,0,0,Residual stage-1 thrust led to collision betwe...,[5eb87cdbffd86e000604b32c],Merlin1C,lost,5e9e289ef3591814873b2625
3,,0,0,0,0,0,"Initially scheduled for 23–25 Sep, carried dum...",[5eb87cdbffd86e000604b32d],Merlin2C,lost,5e9e289ef3591855dc3b2626
4,,0,0,0,0,0,,[5eb87cdcffd86e000604b32e],Merlin3C,lost,5e9e289ef359184f103b2627


In [None]:
def getCoreData():
    cores_data[["id","block","serial"]]
    modified_data = launches_data.merge(cores_data, left_on='cores.core_id', right_on='id')
    return modified_data

In [None]:
launches_data = getCoreData()

In [None]:
launches_data.columns

Index(['success', 'ships', 'payloads', 'flight_number', 'date_utc', 'id_x',
       'cores.core_id', 'cores.flight', 'cores.gridfins', 'cores.legs',
       'cores.reused', 'cores.landing_attempt', 'cores.landing_success',
       'cores.landing_type', 'cores.landpad', 'BoosterVersion', 'LaunchSite',
       'longitude', 'latitude', 'PayloadMass', 'Orbit', 'block', 'reuse_count',
       'rtls_attempts', 'rtls_landings', 'asds_attempts', 'asds_landings',
       'last_update', 'launches', 'serial', 'status', 'id_y'],
      dtype='object')

In [None]:
launches_data.head()

Unnamed: 0,success,ships,payloads,flight_number,date_utc,id_x,cores.core_id,cores.flight,cores.gridfins,cores.legs,...,reuse_count,rtls_attempts,rtls_landings,asds_attempts,asds_landings,last_update,launches,serial,status,id_y
0,False,[],[5eb0e4b5b6c3bb0006eeb1e1],1,2006-03-24T22:30:00.000Z,5eb87cd9ffd86e000604b32a,5e9e289df35918033d3b2623,1,False,False,...,0,0,0,0,0,Engine failure at T+33 seconds resulted in los...,[5eb87cd9ffd86e000604b32a],Merlin1A,lost,5e9e289df35918033d3b2623
1,False,[],[5eb0e4b6b6c3bb0006eeb1e2],2,2007-03-21T01:10:00.000Z,5eb87cdaffd86e000604b32b,5e9e289ef35918416a3b2624,1,False,False,...,0,0,0,0,0,Successful first-stage burn and transition to ...,[5eb87cdaffd86e000604b32b],Merlin2A,lost,5e9e289ef35918416a3b2624
2,False,[],"[5eb0e4b6b6c3bb0006eeb1e3, 5eb0e4b6b6c3bb0006e...",3,2008-08-03T03:34:00.000Z,5eb87cdbffd86e000604b32c,5e9e289ef3591814873b2625,1,False,False,...,0,0,0,0,0,Residual stage-1 thrust led to collision betwe...,[5eb87cdbffd86e000604b32c],Merlin1C,lost,5e9e289ef3591814873b2625
3,True,[],[5eb0e4b7b6c3bb0006eeb1e5],4,2008-09-28T23:15:00.000Z,5eb87cdbffd86e000604b32d,5e9e289ef3591855dc3b2626,1,False,False,...,0,0,0,0,0,"Initially scheduled for 23–25 Sep, carried dum...",[5eb87cdbffd86e000604b32d],Merlin2C,lost,5e9e289ef3591855dc3b2626
4,True,[],[5eb0e4b7b6c3bb0006eeb1e6],5,2009-07-13T03:35:00.000Z,5eb87cdcffd86e000604b32e,5e9e289ef359184f103b2627,1,False,False,...,0,0,0,0,0,,[5eb87cdcffd86e000604b32e],Merlin3C,lost,5e9e289ef359184f103b2627


In [None]:
launches_data.drop(columns = ["id_y","cores.core_id"],inplace=True)

In [None]:
launches_data.rename(columns = {"id_x":"id"},inplace=True)

In [None]:
launches_data.head()

Unnamed: 0,success,ships,payloads,flight_number,date_utc,id,cores.flight,cores.gridfins,cores.legs,cores.reused,...,block,reuse_count,rtls_attempts,rtls_landings,asds_attempts,asds_landings,last_update,launches,serial,status
0,False,[],[5eb0e4b5b6c3bb0006eeb1e1],1,2006-03-24T22:30:00.000Z,5eb87cd9ffd86e000604b32a,1,False,False,False,...,,0,0,0,0,0,Engine failure at T+33 seconds resulted in los...,[5eb87cd9ffd86e000604b32a],Merlin1A,lost
1,False,[],[5eb0e4b6b6c3bb0006eeb1e2],2,2007-03-21T01:10:00.000Z,5eb87cdaffd86e000604b32b,1,False,False,False,...,,0,0,0,0,0,Successful first-stage burn and transition to ...,[5eb87cdaffd86e000604b32b],Merlin2A,lost
2,False,[],"[5eb0e4b6b6c3bb0006eeb1e3, 5eb0e4b6b6c3bb0006e...",3,2008-08-03T03:34:00.000Z,5eb87cdbffd86e000604b32c,1,False,False,False,...,,0,0,0,0,0,Residual stage-1 thrust led to collision betwe...,[5eb87cdbffd86e000604b32c],Merlin1C,lost
3,True,[],[5eb0e4b7b6c3bb0006eeb1e5],4,2008-09-28T23:15:00.000Z,5eb87cdbffd86e000604b32d,1,False,False,False,...,,0,0,0,0,0,"Initially scheduled for 23–25 Sep, carried dum...",[5eb87cdbffd86e000604b32d],Merlin2C,lost
4,True,[],[5eb0e4b7b6c3bb0006eeb1e6],5,2009-07-13T03:35:00.000Z,5eb87cdcffd86e000604b32e,1,False,False,False,...,,0,0,0,0,0,,[5eb87cdcffd86e000604b32e],Merlin3C,lost


## We can create a column that tells us the outcome of the launch whether the launch was successful or not and if it is what is the the lading type

In [None]:
launches_data["cores.landing_type"]

Unnamed: 0,cores.landing_type
0,
1,
2,
3,
4,
...,...
182,ASDS
183,ASDS
184,ASDS
185,ASDS


In [None]:
launches_data['Outcome'] = launches_data['cores.landing_success'].astype(str)+ ' - ' + launches_data['cores.landing_type'].astype(str)

In [None]:
launches_data["Outcome"]

Unnamed: 0,Outcome
0,None - None
1,None - None
2,None - None
3,None - None
4,None - None
...,...
182,True - ASDS
183,True - ASDS
184,True - ASDS
185,True - ASDS


In [None]:
launches_data.head()

Unnamed: 0,success,ships,payloads,flight_number,date_utc,id,cores.flight,cores.gridfins,cores.legs,cores.reused,...,reuse_count,rtls_attempts,rtls_landings,asds_attempts,asds_landings,last_update,launches,serial,status,Outcome
0,False,[],[5eb0e4b5b6c3bb0006eeb1e1],1,2006-03-24T22:30:00.000Z,5eb87cd9ffd86e000604b32a,1,False,False,False,...,0,0,0,0,0,Engine failure at T+33 seconds resulted in los...,[5eb87cd9ffd86e000604b32a],Merlin1A,lost,None - None
1,False,[],[5eb0e4b6b6c3bb0006eeb1e2],2,2007-03-21T01:10:00.000Z,5eb87cdaffd86e000604b32b,1,False,False,False,...,0,0,0,0,0,Successful first-stage burn and transition to ...,[5eb87cdaffd86e000604b32b],Merlin2A,lost,None - None
2,False,[],"[5eb0e4b6b6c3bb0006eeb1e3, 5eb0e4b6b6c3bb0006e...",3,2008-08-03T03:34:00.000Z,5eb87cdbffd86e000604b32c,1,False,False,False,...,0,0,0,0,0,Residual stage-1 thrust led to collision betwe...,[5eb87cdbffd86e000604b32c],Merlin1C,lost,None - None
3,True,[],[5eb0e4b7b6c3bb0006eeb1e5],4,2008-09-28T23:15:00.000Z,5eb87cdbffd86e000604b32d,1,False,False,False,...,0,0,0,0,0,"Initially scheduled for 23–25 Sep, carried dum...",[5eb87cdbffd86e000604b32d],Merlin2C,lost,None - None
4,True,[],[5eb0e4b7b6c3bb0006eeb1e6],5,2009-07-13T03:35:00.000Z,5eb87cdcffd86e000604b32e,1,False,False,False,...,0,0,0,0,0,,[5eb87cdcffd86e000604b32e],Merlin3C,lost,None - None


In [None]:
launches_data.drop(columns=["last_update","rtls_attempts"	,"payloads","rtls_landings"	,"id","asds_attempts",
                            "asds_landings","launches","status","success"	,"ships","cores.landing_attempt",
                            "cores.landing_success","cores.landing_type"],inplace =True)

In [None]:
launches_data

Unnamed: 0,flight_number,date_utc,cores.flight,cores.gridfins,cores.legs,cores.reused,cores.landpad,BoosterVersion,LaunchSite,longitude,latitude,PayloadMass,Orbit,block,reuse_count,serial,Outcome
0,1,2006-03-24T22:30:00.000Z,1,False,False,False,,Falcon 1,Kwajalein Atoll,167.743129,9.047721,20.0,LEO,,0,Merlin1A,None - None
1,2,2007-03-21T01:10:00.000Z,1,False,False,False,,Falcon 1,Kwajalein Atoll,167.743129,9.047721,0.0,LEO,,0,Merlin2A,None - None
2,3,2008-08-03T03:34:00.000Z,1,False,False,False,,Falcon 1,Kwajalein Atoll,167.743129,9.047721,0.0,LEO,,0,Merlin1C,None - None
3,4,2008-09-28T23:15:00.000Z,1,False,False,False,,Falcon 1,Kwajalein Atoll,167.743129,9.047721,165.0,LEO,,0,Merlin2C,None - None
4,5,2009-07-13T03:35:00.000Z,1,False,False,False,,Falcon 1,Kwajalein Atoll,167.743129,9.047721,200.0,LEO,,0,Merlin3C,None - None
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
182,183,2022-09-05T02:09:00.000Z,7,True,True,True,5e9e3033383ecbb9e534e7cc,Falcon 9,CCSFS SLC 40,-80.577366,28.561857,13440.0,VLEO,5.0,6,B1052,True - ASDS
183,184,2022-09-11T01:10:00.000Z,14,True,True,True,5e9e3033383ecb075134e7cd,Falcon 9,KSC LC 39A,-80.603956,28.608058,14760.0,VLEO,5.0,13,B1058,True - ASDS
184,185,2022-09-17T01:05:00.000Z,6,True,True,True,5e9e3033383ecbb9e534e7cc,Falcon 9,CCSFS SLC 40,-80.577366,28.561857,13260.0,VLEO,5.0,5,B1067,True - ASDS
185,186,2022-09-24T23:30:00.000Z,4,True,True,True,5e9e3033383ecbb9e534e7cc,Falcon 9,CCSFS SLC 40,-80.577366,28.561857,13260.0,VLEO,5.0,0,B1072,True - ASDS


## we may replace landpad code with its name

In [None]:
landpads_data = dataframes["landpads"]
landpads_data.head()

Unnamed: 0,name,full_name,status,type,locality,region,latitude,longitude,landing_attempts,landing_successes,wikipedia,details,launches,id,images.large
0,LZ-1,Landing Zone 1,active,RTLS,Cape Canaveral,Florida,28.485833,-80.544444,21,20,https://en.wikipedia.org/wiki/Landing_Zones_1_...,SpaceX's first east coast landing pad is Landi...,"[5eb87cefffd86e000604b342, 5eb87cf9ffd86e00060...",5e9e3032383ecb267a34e7c7,[https://i.imgur.com/KHBk6jO.png]
1,LZ-2,Landing Zone 2,active,RTLS,Cape Canaveral,Florida,28.485833,-80.544444,3,3,https://en.wikipedia.org/wiki/Landing_Zones_1_...,SpaceX's first east coast landing pad is Landi...,"[5eb87d13ffd86e000604b360, 5eb87d2dffd86e00060...",5e9e3032383ecb90a834e7c8,[https://i.imgur.com/YE8PMYX.png]
2,LZ-4,Landing Zone 4,active,RTLS,Vandenberg Air Force Base,California,34.632989,-120.615167,6,6,https://en.wikipedia.org/wiki/Vandenberg_AFB_S...,SpaceX's west coast landing pad. The pad is ad...,"[5eb87d23ffd86e000604b36e, 5eb87d31ffd86e00060...",5e9e3032383ecb554034e7c9,[https://i.imgur.com/647XbH6.png]
3,OCISLY,Of Course I Still Love You,active,ASDS,Port of Los Angeles,California,33.729186,-118.262015,61,54,https://en.wikipedia.org/wiki/Autonomous_space...,"The second ASDS barge, Of Course I Still Love ...","[5eb87ceeffd86e000604b341, 5eb87cf2ffd86e00060...",5e9e3032383ecb6bb234e7ca,[https://i.imgur.com/rbpkg01.png]
4,JRTI-1,Just Read The Instructions V1,retired,ASDS,Port Canaveral,Florida,28.4104,-80.6188,2,0,https://en.wikipedia.org/wiki/Autonomous_space...,The ASDS landing location for the first landin...,"[5eb87ce8ffd86e000604b33c, 5eb87cecffd86e00060...",5e9e3032383ecb761634e7cb,[https://i.imgur.com/gZUJ04V.png]


In [None]:
y =  landpads_data[["id","name"]]
y

Unnamed: 0,id,name
0,5e9e3032383ecb267a34e7c7,LZ-1
1,5e9e3032383ecb90a834e7c8,LZ-2
2,5e9e3032383ecb554034e7c9,LZ-4
3,5e9e3032383ecb6bb234e7ca,OCISLY
4,5e9e3032383ecb761634e7cb,JRTI-1
5,5e9e3033383ecbb9e534e7cc,JRTI
6,5e9e3033383ecb075134e7cd,ASOG


In [None]:
launches_data = launches_data.merge(y, how = "left",  right_on = "id", left_on = "cores.landpad")

In [None]:
launches_data.head()

Unnamed: 0,flight_number,date_utc,cores.flight,cores.gridfins,cores.legs,cores.reused,cores.landpad,BoosterVersion,LaunchSite,longitude,latitude,PayloadMass,Orbit,block,reuse_count,serial,Outcome,id,name
0,1,2006-03-24T22:30:00.000Z,1,False,False,False,,Falcon 1,Kwajalein Atoll,167.743129,9.047721,20.0,LEO,,0,Merlin1A,None - None,,
1,2,2007-03-21T01:10:00.000Z,1,False,False,False,,Falcon 1,Kwajalein Atoll,167.743129,9.047721,0.0,LEO,,0,Merlin2A,None - None,,
2,3,2008-08-03T03:34:00.000Z,1,False,False,False,,Falcon 1,Kwajalein Atoll,167.743129,9.047721,0.0,LEO,,0,Merlin1C,None - None,,
3,4,2008-09-28T23:15:00.000Z,1,False,False,False,,Falcon 1,Kwajalein Atoll,167.743129,9.047721,165.0,LEO,,0,Merlin2C,None - None,,
4,5,2009-07-13T03:35:00.000Z,1,False,False,False,,Falcon 1,Kwajalein Atoll,167.743129,9.047721,200.0,LEO,,0,Merlin3C,None - None,,


In [None]:
launches_data.drop(columns = ["id","cores.landpad"],inplace=True)
launches_data.rename(columns = {"name":"cores.landpad"},inplace=True)

In [None]:
launches_data.shape

(187, 17)

In [None]:
launches_data.head()

Unnamed: 0,flight_number,date_utc,cores.flight,cores.gridfins,cores.legs,cores.reused,BoosterVersion,LaunchSite,longitude,latitude,PayloadMass,Orbit,block,reuse_count,serial,Outcome,cores.landpad
0,1,2006-03-24T22:30:00.000Z,1,False,False,False,Falcon 1,Kwajalein Atoll,167.743129,9.047721,20.0,LEO,,0,Merlin1A,None - None,
1,2,2007-03-21T01:10:00.000Z,1,False,False,False,Falcon 1,Kwajalein Atoll,167.743129,9.047721,0.0,LEO,,0,Merlin2A,None - None,
2,3,2008-08-03T03:34:00.000Z,1,False,False,False,Falcon 1,Kwajalein Atoll,167.743129,9.047721,0.0,LEO,,0,Merlin1C,None - None,
3,4,2008-09-28T23:15:00.000Z,1,False,False,False,Falcon 1,Kwajalein Atoll,167.743129,9.047721,165.0,LEO,,0,Merlin2C,None - None,
4,5,2009-07-13T03:35:00.000Z,1,False,False,False,Falcon 1,Kwajalein Atoll,167.743129,9.047721,200.0,LEO,,0,Merlin3C,None - None,


In [None]:
launches_data.rename(columns ={"date_utc":"Date",
                               "cores.flight":"Flights",
                               "cores.gridfins":"GridFins",
                               "cores.legs":"Legs",
                               "cores.reused":"Reused",
                               "cores.landpad":"LandingPad",
                               "longitude":"Longitude",
                               "latitude":"Latitude",
                               "block":"Block",
                               "serial":"Serial",
                               "reuse_count":"ReusedCount",
                               "flight_number":"Flight_Number"},inplace=True)

In [None]:
launches_data.head()

Unnamed: 0,Flight_Number,Date,Flights,GridFins,Legs,Reused,BoosterVersion,LaunchSite,Longitude,Latitude,PayloadMass,Orbit,Block,ReusedCount,Serial,Outcome,LandingPad
0,1,2006-03-24T22:30:00.000Z,1,False,False,False,Falcon 1,Kwajalein Atoll,167.743129,9.047721,20.0,LEO,,0,Merlin1A,None - None,
1,2,2007-03-21T01:10:00.000Z,1,False,False,False,Falcon 1,Kwajalein Atoll,167.743129,9.047721,0.0,LEO,,0,Merlin2A,None - None,
2,3,2008-08-03T03:34:00.000Z,1,False,False,False,Falcon 1,Kwajalein Atoll,167.743129,9.047721,0.0,LEO,,0,Merlin1C,None - None,
3,4,2008-09-28T23:15:00.000Z,1,False,False,False,Falcon 1,Kwajalein Atoll,167.743129,9.047721,165.0,LEO,,0,Merlin2C,None - None,
4,5,2009-07-13T03:35:00.000Z,1,False,False,False,Falcon 1,Kwajalein Atoll,167.743129,9.047721,200.0,LEO,,0,Merlin3C,None - None,


In [None]:
reorder_columns=["Flight_Number","Date","BoosterVersion","PayloadMass","Orbit","LaunchSite","Outcome","Flights",
                 "GridFins","Reused","Legs","LandingPad","Block","ReusedCount","Serial","Longitude","Latitude"]
launches_data = launches_data[reorder_columns]

In [None]:
launches_data.shape

(187, 17)

In [None]:
launches_data.head()

Unnamed: 0,Flight_Number,Date,BoosterVersion,PayloadMass,Orbit,LaunchSite,Outcome,Flights,GridFins,Reused,Legs,LandingPad,Block,ReusedCount,Serial,Longitude,Latitude
0,1,2006-03-24T22:30:00.000Z,Falcon 1,20.0,LEO,Kwajalein Atoll,None - None,1,False,False,False,,,0,Merlin1A,167.743129,9.047721
1,2,2007-03-21T01:10:00.000Z,Falcon 1,0.0,LEO,Kwajalein Atoll,None - None,1,False,False,False,,,0,Merlin2A,167.743129,9.047721
2,3,2008-08-03T03:34:00.000Z,Falcon 1,0.0,LEO,Kwajalein Atoll,None - None,1,False,False,False,,,0,Merlin1C,167.743129,9.047721
3,4,2008-09-28T23:15:00.000Z,Falcon 1,165.0,LEO,Kwajalein Atoll,None - None,1,False,False,False,,,0,Merlin2C,167.743129,9.047721
4,5,2009-07-13T03:35:00.000Z,Falcon 1,200.0,LEO,Kwajalein Atoll,None - None,1,False,False,False,,,0,Merlin3C,167.743129,9.047721


## Now let's get Falcon9 data

In [None]:
falcon9_data = launches_data[launches_data["BoosterVersion"] == "Falcon 9"]
falcon9_data.head()

Unnamed: 0,Flight_Number,Date,BoosterVersion,PayloadMass,Orbit,LaunchSite,Outcome,Flights,GridFins,Reused,Legs,LandingPad,Block,ReusedCount,Serial,Longitude,Latitude
5,6,2010-06-04T18:45:00.000Z,Falcon 9,0.0,LEO,CCSFS SLC 40,None - None,1,False,False,False,,1.0,0,B0003,-80.577366,28.561857
6,7,2010-12-08T15:43:00.000Z,Falcon 9,0.0,LEO,CCSFS SLC 40,None - None,1,False,False,False,,1.0,0,B0004,-80.577366,28.561857
7,8,2012-05-22T07:44:00.000Z,Falcon 9,525.0,LEO,CCSFS SLC 40,None - None,1,False,False,False,,1.0,0,B0005,-80.577366,28.561857
8,9,2012-10-08T00:35:00.000Z,Falcon 9,800.0,ISS,CCSFS SLC 40,None - None,1,False,False,False,,1.0,0,B0006,-80.577366,28.561857
9,10,2013-03-01T19:10:00.000Z,Falcon 9,677.0,ISS,CCSFS SLC 40,None - None,1,False,False,False,,1.0,0,B0007,-80.577366,28.561857


In [None]:
falcon9_data['Flight_Number'] = list(range(1, falcon9_data.shape[0]+1))
falcon9_data.head()

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  falcon9_data['Flight_Number'] = list(range(1, falcon9_data.shape[0]+1))


Unnamed: 0,Flight_Number,Date,BoosterVersion,PayloadMass,Orbit,LaunchSite,Outcome,Flights,GridFins,Reused,Legs,LandingPad,Block,ReusedCount,Serial,Longitude,Latitude
5,1,2010-06-04T18:45:00.000Z,Falcon 9,0.0,LEO,CCSFS SLC 40,None - None,1,False,False,False,,1.0,0,B0003,-80.577366,28.561857
6,2,2010-12-08T15:43:00.000Z,Falcon 9,0.0,LEO,CCSFS SLC 40,None - None,1,False,False,False,,1.0,0,B0004,-80.577366,28.561857
7,3,2012-05-22T07:44:00.000Z,Falcon 9,525.0,LEO,CCSFS SLC 40,None - None,1,False,False,False,,1.0,0,B0005,-80.577366,28.561857
8,4,2012-10-08T00:35:00.000Z,Falcon 9,800.0,ISS,CCSFS SLC 40,None - None,1,False,False,False,,1.0,0,B0006,-80.577366,28.561857
9,5,2013-03-01T19:10:00.000Z,Falcon 9,677.0,ISS,CCSFS SLC 40,None - None,1,False,False,False,,1.0,0,B0007,-80.577366,28.561857


## We will convert the date data type to date instead of object

In [None]:
falcon9_data["Date"] = pd.to_datetime(falcon9_data["Date"])
falcon9_data["Date"]  = falcon9_data["Date"].dt.date

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  falcon9_data["Date"] = pd.to_datetime(falcon9_data["Date"])
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  falcon9_data["Date"]  = falcon9_data["Date"].dt.date


In [None]:
falcon9_data['Date'] = pd.to_datetime(falcon9_data['Date'], format='%d/%m/%Y')
falcon9_data = falcon9_data[falcon9_data['Date'] <= '2020-11-13']
falcon9_data = falcon9_data.sort_values(by='Date')

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  falcon9_data['Date'] = pd.to_datetime(falcon9_data['Date'], format='%d/%m/%Y')


## since there no payload mass equal to 0 and it happened when trying to get sum the masses so we will deal with the 0 as nan and fill it with the mean

In [None]:
x= falcon9_data['PayloadMass'].mean()
falcon9_data['PayloadMass'].replace(0.0,x, inplace=True)
falcon9_data.head()

The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  falcon9_data['PayloadMass'].replace(0.0,x, inplace=True)


Unnamed: 0,Flight_Number,Date,BoosterVersion,PayloadMass,Orbit,LaunchSite,Outcome,Flights,GridFins,Reused,Legs,LandingPad,Block,ReusedCount,Serial,Longitude,Latitude
5,1,2010-06-04,Falcon 9,5694.158673,LEO,CCSFS SLC 40,None - None,1,False,False,False,,1.0,0,B0003,-80.577366,28.561857
6,2,2010-12-08,Falcon 9,5694.158673,LEO,CCSFS SLC 40,None - None,1,False,False,False,,1.0,0,B0004,-80.577366,28.561857
7,3,2012-05-22,Falcon 9,525.0,LEO,CCSFS SLC 40,None - None,1,False,False,False,,1.0,0,B0005,-80.577366,28.561857
8,4,2012-10-08,Falcon 9,800.0,ISS,CCSFS SLC 40,None - None,1,False,False,False,,1.0,0,B0006,-80.577366,28.561857
9,5,2013-03-01,Falcon 9,677.0,ISS,CCSFS SLC 40,None - None,1,False,False,False,,1.0,0,B0007,-80.577366,28.561857


In [None]:
falcon9_data.shape

(98, 17)

In [None]:
falcon9_data.isnull().sum()

Unnamed: 0,0
Flight_Number,0
Date,0
BoosterVersion,0
PayloadMass,0
Orbit,0
LaunchSite,0
Outcome,0
Flights,0
GridFins,0
Reused,0


In [None]:
falcon9_data["Flight_Number"] = list(range(1,falcon9_data.shape[0]+1))

In [None]:
falcon9_data.to_csv("falcon9_data.csv",index=False)