In [129]:
#1. Imports
import pandas as pd
from sklearn.preprocessing import LabelEncoder
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score

## Load Data

In [99]:
df = pd.read_csv("Space_Corrected.csv")
df.columns = df.columns.str.strip()

## Extracting features

In [133]:
#3. Feature: Mission Destination from 'Detail'
def infer_destination(detail):
    if not isinstance(detail, str):
        return "Unknown"
    d = detail.lower()
    if any(x in d for x in ["starlink", "skysat", "flock", "cosmos", "yaogan", "iridium", "eo", "earth"]):
        return "LEO"
    elif any(x in d for x in ["gps", "galileo", "beidou", "glonass"]):
        return "MEO"
    elif any(x in d for x in ["intelsat", "inmarsat", "eutelsat", "thor", "arabsat"]):
        return "GEO"
    elif any(x in d for x in ["luna", "lunar", "chandrayaan", "artemis", "smart"]):
        return "Moon"
    elif any(x in d for x in ["mars", "maven", "marsnik", "tianwen"]):
        return "Mars"
    else:
        return "Unknown"

df["Destination"] = df["Detail"].apply(infer_destination)

In [135]:
#4. Date and Year Extraction
df["Datum"] = pd.to_datetime(df["Datum"], errors="coerce", utc=True)
df["Datum"] = df["Datum"].dt.tz_localize(None)
df = df.dropna(subset=["Datum"])
df["Year"] = df["Datum"].dt.year

In [137]:
#5. Cost and Rocket Name
df["Cost"] = df["Rocket"].replace(r'[\$,]', '', regex=True).astype(float)
df["Rocket Name"] = df["Detail"].apply(lambda x: x.split("|")[0].strip() if isinstance(x, str) else "Unknown")

In [139]:
#6. Drop missing and prepare model_df
model_df = df.dropna(subset=["Rocket Name", "Year", "Cost", "Status Mission", "Location", "Destination"]).copy()

## Encode data

In [142]:
#7. Encode categorical columns
import numpy as np

le_rocket = LabelEncoder()
le_status = LabelEncoder()
le_location = LabelEncoder()
le_dest = LabelEncoder()
le_country = LabelEncoder()

model_df["Rocket_enc"] = le_rocket.fit_transform(model_df["Rocket Name"])
# ✅ Add 'Unknown' label to the encoder manually if needed
if "Unknown" not in le_rocket.classes_:
    all_rockets = list(le_rocket.classes_)
    all_rockets.append("Unknown")
    le_rocket.classes_ = np.array(all_rockets)

model_df["Status_enc"] = le_status.fit_transform(model_df["Status Mission"])
model_df["Location_enc"] = le_location.fit_transform(model_df["Location"])
model_df["Destination_enc"] = le_dest.fit_transform(model_df["Destination"])

In [148]:
#8. Feature: Country from Location
model_df["Country"] = model_df["Location"].apply(lambda x: x.split(",")[-1].strip())
model_df["Country_enc"] = le_country.fit_transform(model_df["Country"])

## Final extracted features

In [146]:
#9. Features and Target
X = model_df[["Rocket_enc", "Year", "Cost", "Status_enc", "Destination_enc"]]
y = model_df["Location_enc"]

In [151]:
#10. Train/Test Split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

## Model training

In [156]:
#11. Random Forest Training with Class Weights
rf = RandomForestClassifier(n_estimators=200, random_state=42, class_weight='balanced')
rf.fit(X_train, y_train)

## Model evaluating

In [159]:
#12. Evaluate Model
y_pred = rf.predict(X_test)
accuracy = accuracy_score(y_test, y_pred)

print("🚀 Final Random Forest Accuracy:", round(accuracy * 100, 2), "%")

🚀 Final Random Forest Accuracy: 80.21 %


## Example user input

In [123]:
from datetime import datetime
import pandas as pd

# === User Mission Input ===
rocket_name = "Falcon 9 Block 5"  # custom rocket
launch_date = "2027-09-14"        # full date input
cost = 40 * 1_000_000             # in USD
destination = "Mars"

# Extract year from date
year = datetime.strptime(launch_date, "%Y-%m-%d").year

# === Encode Inputs ===

# Rocket (use "Unknown" fallback)
rocket_enc = le_rocket.transform([rocket_name])[0] if rocket_name in le_rocket.classes_ else le_rocket.transform(["Unknown"])[0]

# Destination
destination_enc = le_dest.transform([destination])[0] if destination in le_dest.classes_ else le_dest.transform(["Unknown"])[0]

status_enc = le_status.transform(["Success"])[0]

# === Create Input Vector ===
input_df = pd.DataFrame([{
    "Rocket_enc": rocket_enc,
    "Year": year,
    "Cost": cost,
    "Status_enc": status_enc,
    "Destination_enc": destination_enc
}])

# === Predict Launch Site ===
predicted_index = rf.predict(input_df)[0]
predicted_location = le_location.inverse_transform([predicted_index])[0]

# === Top 3 with Probabilities ===
probas = rf.predict_proba(input_df)[0]
top_indexes = probas.argsort()[-3:][::-1]
top_sites = le_location.inverse_transform(top_indexes)
top_scores = [round(probas[i]*100, 2) for i in top_indexes]

# === Output Results ===
print("🚀 Recommended Launch Site:", predicted_location)
print("\n📊 Top 3 Launch Sites:")
for site, score in zip(top_sites, top_scores):
    country = site.split(",")[-1].strip()
    print(f" - {site} ({country}): {score}%")


🚀 Recommended Launch Site: Site 110/37, Baikonur Cosmodrome, Kazakhstan

📊 Top 3 Launch Sites:
 - Site 110/37, Baikonur Cosmodrome, Kazakhstan (Kazakhstan): 23.5%
 - LA-Y2, Tanegashima Space Center, Japan (Japan): 22.5%
 - SLC-40, Cape Canaveral AFS, Florida, USA (USA): 12.44%


In [None]:
## Export the model and encoded data int

In [127]:
import joblib

# Save RandomForest model
joblib.dump(rf, "launch_location_model.pkl")

# Save label encoders too
joblib.dump(le_rocket, "le_rocket.pkl")
joblib.dump(le_dest, "le_dest.pkl")
joblib.dump(le_location, "le_location.pkl")
joblib.dump(le_status, "le_status.pkl")
joblib.dump(le_country, "le_country.pkl")

['le_country.pkl']