1. Let's import libraries and data

In [2]:
#Install needed libs
%pip install pandas matplotlib seaborn statsmodels

Collecting statsmodels
  Downloading statsmodels-0.14.5-cp313-cp313-win_amd64.whl.metadata (9.8 kB)
Collecting scipy!=1.9.2,>=1.8 (from statsmodels)
  Using cached scipy-1.16.1-cp313-cp313-win_amd64.whl.metadata (60 kB)
Collecting patsy>=0.5.6 (from statsmodels)
  Downloading patsy-1.0.1-py2.py3-none-any.whl.metadata (3.3 kB)
Downloading statsmodels-0.14.5-cp313-cp313-win_amd64.whl (9.6 MB)
   ---------------------------------------- 0.0/9.6 MB ? eta -:--:--
   --------- ------------------------------ 2.4/9.6 MB 18.6 MB/s eta 0:00:01
   ------------------------------------- -- 8.9/9.6 MB 27.4 MB/s eta 0:00:01
   ---------------------------------------- 9.6/9.6 MB 26.4 MB/s  0:00:00
Downloading patsy-1.0.1-py2.py3-none-any.whl (232 kB)
Using cached scipy-1.16.1-cp313-cp313-win_amd64.whl (38.5 MB)
Installing collected packages: scipy, patsy, statsmodels

   ---------------------------------------- 0/3 [scipy]
   ---------------------------------------- 0/3 [scipy]
   --------------------

In [3]:
import pandas as pd 
import matplotlib.pyplot as plt 
import seaborn as sns 

launches = pd.read_csv("../data/clean/launches.csv")
rockets = pd.read_csv("../data7clean/rockets.csv")
launchpads = pd.read_csv("../data7clean/launchpads.csv")
payloads = pd.read_csv("../data7clean/payloads.csv")
cores = pd.read_csv("../data7clean/cores.csv")

FileNotFoundError: [Errno 2] No such file or directory: '../data/clean/launches.csv'

2. Launches per Quarter + Cumulative Successes

In [None]:
launches["date_lisbon"] = pd.to_datetime(launches["date_lisbon"])

launches["quarter"] = launches["date_lisbon"].dt.to_period("Q")
quarterly = launches.groupby('quarter').agg(
    launches = ("id", "count"),
    successes = ("success", "sum")
).reset_index()

plt.figure(figsize = (10,5))
sns.lineplot(data = quarterly, x = 'quarter', y = 'launches', label = 'Launches')
sns.lineplot(data = quarterly, x = 'quarter', y = 'successes', label = 'Successes')
plt.xticks(rotation = 45)
plt.title('Launches and Successes per Quarter')
plt.legend()
plt.show()

3. Success rates by rocket family (regex machine)

In [None]:
import re

def rocket_family(name):
    if re.search(f"Falcon 9", name, re.I):
        return "Falcon 9"
    elif re.search(f"Falcon Heavy", name, re.I):
        return "Falcon Heavy"
    elif re.search(f"Starship", name, re.I):
        return "Starship"
    else:
        return "Other"
    

rockets["family"] = rockets["name"].apply(rocket_family)
launches = launches.merge(rockets[["id", "family"]], left_on = "rocket", right_on = "id", how = "left")

family_rates = launches.groupby('family').agg(
    launches = ("id_x", "count"),
    successes = ("success", "sum")
).reset_index()

family_rates["success_rate"] = family_rates["successes"] / family_rates["launches"] * 100

family_rates[["success_rate"]].plot(kind = "bar", legend = False)
plt.title("Success Rates by Rocket Family")
plt.ylabel("Success Rate (%)")
plt.show()

4. Launchpads Reliability (Wilson 95% CI vs NA)

In [None]:
from statsmodels.stats.proportion import proportion_confint

pad_stats = launches.groupby("launchpads").agg(
    launches = ("id", "count"), 
    successes = ("success", "sum")
).reset_index()


pad_stats['wilson_low'], pad_stats['wilson_upp'] = zip(*pad_stats.apply(
    lambda row: proportion_confint(row['successes'], row['launches'], method='wilson'), axis=1
))
pad_stats['normal_low'], pad_stats['normal_upp'] = zip(*pad_stats.apply(
    lambda row: proportion_confint(row['successes'], row['launches'], method = 'normal'), axis=1
))


plt.figure(figsize = (12,6))
plt.errorbar(pad_stats['launchpad'], pad_stats['successes']/pad_stats['launches'],
             yerr=[pad_stats['successes']/pad_stats['launches'] - pad_stats['wilson_low'],
                   pad_stats['wilson_upp'] - pad_stats['successes']/pad_stats['launches']],
             fmt = 'o', label = 'Wilson 95% CI')


plt.xticks(rotation = 90)
plt.title('Launchpad Reliability (Wilson 95% CI)')
plt.ylabel('Success Rate')
plt.legend()
plt.show()

5. Payload Mass vs Outcome

In [None]:
plt.figure(figsize = (9,6))
sns.boxplot(x = "success", y = "mass_kg", data=launches, data = payloads.merge(launches[["id","success"]], left_on = "launch", right_on = "id"))
plt.title("Payload Mass vs Launch Outcome")
plt.ylabel("Payload Mass (kg)")
plt.xlabel("Launch Outcome (0 = Failure, 1 = Success)")
plt.show()