# Falcon 9 Landing Prediction Model

In [51]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.linear_model import LogisticRegression
from sklearn.svm import SVC
from sklearn.tree import DecisionTreeClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.pipeline import Pipeline, make_pipeline
from category_encoders import OneHotEncoder
from sklearn.preprocessing import LabelEncoder

In [8]:
#Import data
df = pd.read_csv("features.csv")
df.head()

Unnamed: 0,FlightNumber,PayloadMass,Orbit,LaunchSite,Flights,GridFins,Reused,Legs,LandingPad,Block,ReusedCount,Serial,Class
0,1,6123.547647,LEO,CCSFS SLC 40,1,False,False,False,,1.0,0,B0003,0
1,2,525.0,LEO,CCSFS SLC 40,1,False,False,False,,1.0,0,B0005,0
2,3,677.0,ISS,CCSFS SLC 40,1,False,False,False,,1.0,0,B0007,0
3,4,500.0,PO,VAFB SLC 4E,1,False,False,False,,1.0,0,B1003,0
4,5,3170.0,GTO,CCSFS SLC 40,1,False,False,False,,1.0,0,B1004,0


In [10]:
#Check for high cardinality and leaky columns
df.nunique()

FlightNumber    90
PayloadMass     68
Orbit           11
LaunchSite       3
Flights          6
GridFins         2
Reused           2
Legs             2
LandingPad       5
Block            5
ReusedCount     10
Serial          53
Class            2
dtype: int64

In [12]:
df_final = df.drop(columns=["FlightNumber","Serial", "LandingPad"])
df_final.head()

Unnamed: 0,PayloadMass,Orbit,LaunchSite,Flights,GridFins,Reused,Legs,Block,ReusedCount,Class
0,6123.547647,LEO,CCSFS SLC 40,1,False,False,False,1.0,0,0
1,525.0,LEO,CCSFS SLC 40,1,False,False,False,1.0,0,0
2,677.0,ISS,CCSFS SLC 40,1,False,False,False,1.0,0,0
3,500.0,PO,VAFB SLC 4E,1,False,False,False,1.0,0,0
4,3170.0,GTO,CCSFS SLC 40,1,False,False,False,1.0,0,0


In [69]:
le = LabelEncoder()
cat_columns =['Orbit','LaunchSite', 'GridFins','Reused', 'Legs']
for column in cat_columns:
    df_final[column] = le.fit_transform(df[column])

In [71]:
df_final

Unnamed: 0,PayloadMass,Orbit,LaunchSite,Flights,GridFins,Reused,Legs,Block,ReusedCount,Class
0,6123.547647,5,0,1,0,0,0,1.0,0,0
1,525.000000,5,0,1,0,0,0,1.0,0,0
2,677.000000,4,0,1,0,0,0,1.0,0,0
3,500.000000,7,2,1,0,0,0,1.0,0,0
4,3170.000000,2,0,1,0,0,0,1.0,0,0
...,...,...,...,...,...,...,...,...,...,...
85,15600.000000,10,1,2,1,1,1,5.0,12,1
86,15600.000000,10,1,3,1,1,1,5.0,13,1
87,15600.000000,10,1,6,1,1,1,5.0,12,1
88,15600.000000,10,0,3,1,1,1,5.0,12,1


## Split

In [75]:
target = "Class"
X = df_final.drop(columns=target)
y = df_final[target]

Divide data into test and train set

In [77]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state = 42)

print("X_train shape:", X_train.shape)
print("y_train shape:", y_train.shape)
print("X_test shape:", X_test.shape)
print("y_test shape:", y_test.shape)

X_train shape: (72, 9)
y_train shape: (72,)
X_test shape: (18, 9)
y_test shape: (18,)


## Build Model 

In [79]:
#logistic regression classifier
lr_model = LogisticRegression(max_iter=1000, random_state=42)
# Fit model to training data
lr_model.fit(X_train, y_train)

In [83]:
#Decision tree classifier
training_acc = []
validation_acc = []
depth = []
for d in range(2, 10):
    test_model = DecisionTreeClassifier(max_depth=d, random_state=42)
    test_model.fit(X_train, y_train)
    depth.append(d)
    training_acc.append(test_model.score(X_train,y_train))
    validation_acc.append(test_model.score(X_test,y_test))

best_depth = {"Depth":depth,
              "Training Accuracy Scores": training_acc,
              "Validation Accuracy Scores": validation_acc}

In [85]:
x = pd.DataFrame(best_depth)
x

Unnamed: 0,Depth,Training Accuracy Scores,Validation Accuracy Scores
0,2,0.875,0.888889
1,3,0.902778,0.888889
2,4,0.902778,0.888889
3,5,0.958333,0.888889
4,6,0.958333,0.888889
5,7,1.0,0.888889
6,8,1.0,0.888889
7,9,1.0,0.888889


In [103]:
tree_model = DecisionTreeClassifier(max_depth=6, random_state=42)
# Fit model to training data
tree_model.fit(X_train, y_train)

In [89]:
#XGBoost classifier
import xgboost as xgb

In [91]:
xgb_model = xgb.XGBClassifier(
    objective='binary:logistic',
    learning_rate=0.1,
    max_depth=3,
    random_state=42
)

In [93]:
xgb_model.fit(X_train, y_train)

### Evaluate

In [95]:
lr_acc_train = lr_model.score(X_train, y_train)
lr_acc_test = lr_model.score(X_test, y_test)

print("Logistic Regression Training Accuracy:", round(lr_acc_train, 2))
print("Logistic Regression Test Accuracy:", round(lr_acc_test, 2))

Logistic Regression Training Accuracy: 0.85
Logistic Regression Test Accuracy: 0.94


In [105]:
tree_acc_train = tree_model.score(X_train, y_train)
tree_acc_test = tree_model.score(X_test, y_test)

print("Decision Tree Training Accuracy:", round(tree_acc_train, 2))
print("Decision Tree Test Accuracy:", round(tree_acc_test, 2))

Decision Tree Training Accuracy: 0.96
Decision Tree Test Accuracy: 0.89


In [101]:
xgb_acc_train = xgb_model.score(X_train, y_train)
xgb_acc_test = xgb_model.score(X_test, y_test)

print("XGBoost Training Accuracy:", round(xgb_acc_train, 2))
print("XGBoost Test Accuracy:", round(xgb_acc_test, 2))

XGBoost Training Accuracy: 0.99
XGBoost Test Accuracy: 0.94
