## Fetch comprehensive launch data from the SpaceX-API

In [1]:
#cleaning and enriching the SpaceX launch data.


import requests
import pandas as pd

# Fetch launches
launches_url = "https://api.spacexdata.com/v4/launches"
launches = requests.get(launches_url).json()
df = pd.json_normalize(launches)

# Fetch rockets info
rockets_url = "https://api.spacexdata.com/v4/rockets"
rockets = requests.get(rockets_url).json()
rockets_df = pd.DataFrame(rockets)[['id', 'name']]

# Fetch launchpads info
launchpads_url = "https://api.spacexdata.com/v4/launchpads"
launchpads = requests.get(launchpads_url).json()
launchpads_df = pd.DataFrame(launchpads)[['id', 'name', 'locality', 'region']]

# Map rocket ID to rocket name
rocket_dict = rockets_df.set_index('id')['name'].to_dict()
df['rocket_name'] = df['rocket'].map(rocket_dict)

# Map launchpad ID to launchpad name and location
launchpad_dict = launchpads_df.set_index('id')['name'].to_dict()
launchpad_loc_dict = launchpads_df.set_index('id')['locality'].to_dict()
df['launchpad_name'] = df['launchpad'].map(launchpad_dict)
df['launchpad_locality'] = df['launchpad'].map(launchpad_loc_dict)

# Select relevant columns for analysis
df_selected = df[['name', 'date_utc', 'success', 'rocket_name', 'launchpad_name', 'launchpad_locality', 'payloads', 'cores']]

# Convert date to datetime type
df_selected['date_utc'] = pd.to_datetime(df_selected['date_utc'])

# Show cleaned sample
df_selected.head()

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_selected['date_utc'] = pd.to_datetime(df_selected['date_utc'])


Unnamed: 0,name,date_utc,success,rocket_name,launchpad_name,launchpad_locality,payloads,cores
0,FalconSat,2006-03-24 22:30:00+00:00,False,Falcon 1,Kwajalein Atoll,Omelek Island,[5eb0e4b5b6c3bb0006eeb1e1],"[{'core': '5e9e289df35918033d3b2623', 'flight'..."
1,DemoSat,2007-03-21 01:10:00+00:00,False,Falcon 1,Kwajalein Atoll,Omelek Island,[5eb0e4b6b6c3bb0006eeb1e2],"[{'core': '5e9e289ef35918416a3b2624', 'flight'..."
2,Trailblazer,2008-08-03 03:34:00+00:00,False,Falcon 1,Kwajalein Atoll,Omelek Island,"[5eb0e4b6b6c3bb0006eeb1e3, 5eb0e4b6b6c3bb0006e...","[{'core': '5e9e289ef3591814873b2625', 'flight'..."
3,RatSat,2008-09-28 23:15:00+00:00,True,Falcon 1,Kwajalein Atoll,Omelek Island,[5eb0e4b7b6c3bb0006eeb1e5],"[{'core': '5e9e289ef3591855dc3b2626', 'flight'..."
4,RazakSat,2009-07-13 03:35:00+00:00,True,Falcon 1,Kwajalein Atoll,Omelek Island,[5eb0e4b7b6c3bb0006eeb1e6],"[{'core': '5e9e289ef359184f103b2627', 'flight'..."


In [2]:
# Define coordinates for known launch sites
launch_coords = {
    'ccafs_slc_40': (28.5618571, -80.577366),
    'vafb_slc_4e': (34.632093, -120.610829),
    'ksc_lc_39a': (28.608197, -80.604089),
    'stls': (25.9972, -97.1566)
}

# Extract lat/lon from launchpad column
df['lat'] = df['launchpad'].map(lambda x: launch_coords.get(x, (None, None))[0])
df['lon'] = df['launchpad'].map(lambda x: launch_coords.get(x, (None, None))[1])


## Scrape additional data

In [3]:
# Load the CSV files
spacex_df = pd.read_csv("spacex_latest_launch.csv")
weather_df = pd.read_csv("Global Weather Data.csv")

# Select only the required weather columns (first N rows, matching SpaceX data length)
weather_subset = weather_df[['temperature', 'humidity', 'wind_speed']].head(len(spacex_df))

# Concatenate the weather data directly with SpaceX launch data
merged_df = pd.concat([spacex_df.reset_index(drop=True), weather_subset.reset_index(drop=True)], axis=1)

# Save the enriched dataset
merged_df.to_csv("enriched_spacex_launches.csv", index=False)

print("✅ Weather data merged successfully and saved as 'enriched_spacex_launches.csv'")

✅ Weather data merged successfully and saved as 'enriched_spacex_launches.csv'


  weather_df = pd.read_csv("Global Weather Data.csv")


In [4]:
import sklearn
from sklearn.preprocessing import LabelEncoder

def preprocess_data():
    df = pd.read_csv("enriched_spacex_launches.csv")

    # Drop missing target labels
    df = df[df['success'].notna()]

    # Select relevant features
    df = df[[
        'name',
        'rocket',
        'date_utc',
        'success',
        'payloads',
        'launchpad',
        'temperature',
        'humidity',
        'wind_speed'
    ]]

    # Fill missing weather values
    df[['temperature', 'humidity', 'wind_speed']] = df[['temperature', 'humidity', 'wind_speed']].fillna(method='ffill')

    # Convert launch date to datetime features
    df['date_utc'] = pd.to_datetime(df['date_utc'])
    df['year'] = df['date_utc'].dt.year
    df['month'] = df['date_utc'].dt.month
    df['day'] = df['date_utc'].dt.day
    df['hour'] = df['date_utc'].dt.hour

    # Encode categorical variables
    le_rocket = LabelEncoder()
    le_launchpad = LabelEncoder()

    df['rocket_encoded'] = le_rocket.fit_transform(df['rocket'].astype(str))
    df['launchpad_encoded'] = le_launchpad.fit_transform(df['launchpad'].astype(str))

    # Final feature set
    df_final = df[[
        'rocket_encoded',
        'launchpad_encoded',
        'temperature',
        'humidity',
        'wind_speed',
        'year',
        'month',
        'day',
        'hour',
        'success'
    ]]

    df_final.to_csv("processed_spacex_data.csv", index=False)
    print("Processed data saved.")

if __name__ == "__main__":
    preprocess_data()

Processed data saved.


  df[['temperature', 'humidity', 'wind_speed']] = df[['temperature', 'humidity', 'wind_speed']].fillna(method='ffill')


In [10]:
df = pd.read_csv("processed_spacex_data.csv")  # or your merged DataFrame

In [11]:
df

Unnamed: 0,rocket_encoded,launchpad_encoded,temperature,humidity,wind_speed,year,month,day,hour,success
0,0,3,3.02,87,2.06,2006,3,24,22,False
1,0,3,3.59,40,6.38,2007,3,21,1,False
2,0,3,-2.82,68,2.06,2008,8,3,3,False
3,0,3,-0.64,79,5.97,2008,9,28,23,True
4,0,3,6.29,62,5.97,2009,7,13,3,True
...,...,...,...,...,...,...,...,...,...,...
181,1,0,24.10,28,2.91,2022,9,5,2,True
182,1,2,30.87,16,2.24,2022,9,11,1,True
183,1,0,27.09,52,5.52,2022,9,17,1,True
184,1,0,26.12,70,2.30,2022,9,24,23,True


In [5]:
print(df.columns)


Index(['static_fire_date_utc', 'static_fire_date_unix', 'net', 'window',
       'rocket', 'success', 'failures', 'details', 'crew', 'ships', 'capsules',
       'payloads', 'launchpad', 'flight_number', 'name', 'date_utc',
       'date_unix', 'date_local', 'date_precision', 'upcoming', 'cores',
       'auto_update', 'tbd', 'launch_library_id', 'id', 'fairings.reused',
       'fairings.recovery_attempt', 'fairings.recovered', 'fairings.ships',
       'links.patch.small', 'links.patch.large', 'links.reddit.campaign',
       'links.reddit.launch', 'links.reddit.media', 'links.reddit.recovery',
       'links.flickr.small', 'links.flickr.original', 'links.presskit',
       'links.webcast', 'links.youtube_id', 'links.article', 'links.wikipedia',
       'fairings', 'rocket_name', 'launchpad_name', 'launchpad_locality',
       'lat', 'lon'],
      dtype='object')
