In [1]:
import pandas as pd

df = pd.read_csv("data/spacex_era5_ksc_launches_2010_2024.csv")
df.head()

Unnamed: 0,id,name,date_utc,year,launchpad,rocket_name,launched_flag,weather_scrub_flag,details,date_hour,date_hour_utc_naive,time_hour,wind_speed_10m,t2m_C,tcc_frac,tp_mmhr,cp_mmhr,msl
0,5eb87cddffd86e000604b32f,Falcon 9 Test Flight,2010-06-04 18:45:00+00:00,2010,5e9e4501f509094ba4566f84,Falcon 9,1.0,False,,2010-06-04 18:00:00+00:00,2010-06-04 18:00:00,2010-06-04 18:00:00,1.9,27.712921,0.495819,0.0,0.0,101518.875
1,5eb87cdeffd86e000604b330,COTS 1,2010-12-08 15:43:00+00:00,2010,5e9e4501f509094ba4566f84,Falcon 9,1.0,False,,2010-12-08 15:00:00+00:00,2010-12-08 15:00:00,2010-12-08 15:00:00,2.469445,12.554108,0.012634,0.0,0.0,102057.875
2,5eb87cdfffd86e000604b331,COTS 2,2012-05-22 07:44:00+00:00,2012,5e9e4501f509094ba4566f84,Falcon 9,1.0,False,"Launch was scrubbed on first attempt, second l...",2012-05-22 07:00:00+00:00,2012-05-22 07:00:00,2012-05-22 07:00:00,3.432389,24.871124,0.0,0.0,0.0,101460.69
3,5eb87ce0ffd86e000604b332,CRS-1,2012-10-08 00:35:00+00:00,2012,5e9e4501f509094ba4566f84,Falcon 9,1.0,False,"CRS-1 successful, but the secondary payload wa...",2012-10-08 00:00:00+00:00,2012-10-08 00:00:00,2012-10-08 00:00:00,4.558444,26.949371,0.952881,0.005722,0.002384,101528.44
4,5eb87ce1ffd86e000604b333,CRS-2,2013-03-01 19:10:00+00:00,2013,5e9e4501f509094ba4566f84,Falcon 9,1.0,False,Last launch of the original Falcon 9 v1.0 laun...,2013-03-01 19:00:00+00:00,2013-03-01 19:00:00,2013-03-01 19:00:00,4.783648,15.626373,0.942108,0.0,0.0,101402.25


In [2]:
def flag_any_scrub_or_failure(details: str) -> bool:
    if not isinstance(details, str):
        return False
    text = details.lower()
    keywords = [
        "scrub", "scrubbed", "abort", "aborted",
        "delayed", "delay", "postpone", "postponed",
        "failure", "failed", "anomaly"
    ]
    return any(kw in text for kw in keywords)

df["text_scrub_like"] = df["details"].apply(flag_any_scrub_or_failure)


In [3]:
df["any_issue_flag"] = (
    (df["launched_flag"] == 0) | df["text_scrub_like"]
).astype(int)

df["any_issue_flag"].value_counts()


any_issue_flag
0    143
1     11
Name: count, dtype: int64

In [4]:
def classify_scrub_reason(details: str) -> str:
    if not isinstance(details, str):
        return "unknown"
    text = details.lower()

    weather_keywords = [
        "weather", "upper-level winds", "lightning", "storm",
        "cloud rule", "thick cloud", "rain", "precipitation",
        "conditions", "anvil"
    ]

    technical_keywords = [
        "engine", "merlin", "valve", "sensor", "pressur",
        "computer", "autosequence", "guidance", "leak",
        "fuel", "oxidizer", "fairing", "hardware"
    ]

    range_keywords = [
        "range violation", "range held", "boat in", "ship in",
        "keep-out zone", "airspace"
    ]

    if any(kw in text for kw in weather_keywords):
        return "weather"
    if any(kw in text for kw in technical_keywords):
        return "technical"
    if any(kw in text for kw in range_keywords):
        return "range"
    
    return "unknown"

df["scrub_reason"] = df["details"].apply(classify_scrub_reason)
df["scrub_reason"].value_counts()


scrub_reason
unknown      135
technical     14
weather        5
Name: count, dtype: int64

In [5]:
df["weather_issue_flag"] = (df["scrub_reason"] == "weather").astype(int)
df["weather_issue_flag"].value_counts()


weather_issue_flag
0    149
1      5
Name: count, dtype: int64

In [6]:
candidates = df[df["any_issue_flag"] == 1]
candidates.to_csv("data/scrub_candidates_for_manual_review.csv", index=False)


In [10]:
y = df["weather_issue_flag"]