In [7]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler


df = pd.read_csv("../data/mission_launches.csv")


#drop irrelevant columns
df = df.drop(columns=["Unnamed: 0", "Unnamed: 0.1"], errors="ignore")

#converting to binary ( success = 1, anything else = 0)
df["y"] = (df["Mission_Status"].astype(str).str.strip().str.lower() == "success").astype(int)

#converting date and time to just the launch year
df["LaunchYear"] = pd.to_datetime(df["Date"], errors="coerce").dt.year

#cleaning prices
df["Price"] = (
    df["Price"]
    .astype(str)
    .str.replace("$", "", regex=False)
    .str.replace(",", "", regex=False)
)
df["Price"] = pd.to_numeric(df["Price"], errors="coerce")

#setting features and making copies
X = df[["Organisation", "Location", "Rocket_Status", "Price", "LaunchYear"]].copy()
y = df["y"].copy()

#joining x and y together and dropping rows that are missing essential data
data = pd.concat([X, y.rename("y")], axis=1).dropna(subset=["LaunchYear", "Price", "y"])
X = data.drop(columns=["y"])
y = data["y"]


#one-hot encoding
X_encoded = pd.get_dummies(X, columns=["Organisation", "Location", "Rocket_Status"], drop_first=True)

#drop any remaining NaNs 
data2 = pd.concat([X_encoded, y.rename("y")], axis=1).dropna()
X_clean = data2.drop(columns=["y"])
y_clean = data2["y"]



#train and test split
X_train, X_test, y_train, y_test = train_test_split(
    X_clean, y_clean, test_size=0.2, random_state=42, stratify=y_clean
    )

#fitting scaler on training data and transforming training and test data
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

In [5]:
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix, ConfusionMatrixDisplay, roc_auc_score

model = Sequential([
    Dense(32, activation = "relu", input_shape = (X_train_scaled.shape[1],)),
    Dense(16, activation = "relu"),
    Dense(1, activation = "sigmoid")
])

model.compile(
    optimizer = "adam",
    loss = "binary_crossentropy",
    metrics = ["accuracy"]
)

model.summary()



history = model.fit(
    X_train_scaled,
    y_train,
    epochs = 20,
    batch_size = 32,
    validation_split = 0.2,
    verbose = 1
)


y_prob_nn = model.predict(X_test_scaled).ravel()
y_pred_nn = (y_prob_nn >= 0.5).astype(int)

print("Accuracy:", accuracy_score(y_test, y_pred_nn))
print("ROC-AUC:", roc_auc_score(y_test, y_prob_nn))
print("\nClassification report:\n", classification_report(y_test, y_pred_nn))



2025-12-16 20:26:34.300119: I external/local_xla/xla/tsl/cuda/cudart_stub.cc:31] Could not find cuda drivers on your machine, GPU will not be used.
2025-12-16 20:26:34.349508: I tensorflow/core/util/port.cc:153] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
2025-12-16 20:26:35.986810: I tensorflow/core/platform/cpu_feature_guard.cc:210] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 AVX512F AVX512_VNNI FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.
2025-12-16 20:26:39.057992: I tensorflow/core/util/port.cc:153] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To tur

Epoch 1/20
[1m20/20[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 10ms/step - accuracy: 0.8562 - loss: 0.4599 - val_accuracy: 0.9351 - val_loss: 0.3638
Epoch 2/20
[1m20/20[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step - accuracy: 0.9493 - loss: 0.2805 - val_accuracy: 0.9351 - val_loss: 0.2803
Epoch 3/20
[1m20/20[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step - accuracy: 0.9477 - loss: 0.2217 - val_accuracy: 0.9351 - val_loss: 0.2552
Epoch 4/20
[1m20/20[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step - accuracy: 0.9477 - loss: 0.1969 - val_accuracy: 0.9351 - val_loss: 0.2457
Epoch 5/20
[1m20/20[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5ms/step - accuracy: 0.9493 - loss: 0.1816 - val_accuracy: 0.9351 - val_loss: 0.2397
Epoch 6/20
[1m20/20[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step - accuracy: 0.9493 - loss: 0.1709 - val_accuracy: 0.9351 - val_loss: 0.2359
Epoch 7/20
[1m20/20[0m [32m━━━━━━━━━

Conclusion 

The neural network did not significantly outperform logistic regression. This suggests the relationship between features and mission success is mostly
linear. The neural network adds complexity without improving the results.