# 06 — Modeling: Classification (landing success 'Class')

In [None]:
import pandas as pd, numpy as np
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.preprocessing import OneHotEncoder
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report
from sklearn.linear_model import LogisticRegression
from sklearn.svm import SVC
from sklearn.tree import DecisionTreeClassifier

df = pd.read_csv('data/launches.csv') if os.path.exists('data/launches.csv') else pd.read_csv('data/sample_launches.csv')

y = df['Class'].astype(int)
X = df.drop(columns=['Class','Date','Serial'])

num = X.select_dtypes(include=['int64','float64']).columns.tolist()
cat = X.select_dtypes(include=['object']).columns.tolist()

pre = ColumnTransformer([('num','passthrough',num), ('cat', OneHotEncoder(handle_unknown='ignore'), cat)])

models = {
    'LR': LogisticRegression(max_iter=1000),
    'SVM_rbf': SVC(kernel='rbf'),
    'DT': DecisionTreeClassifier(random_state=42)
}

X_train, X_test, y_train, y_test = train_test_split(X,y,test_size=0.2,random_state=42,stratify=y)

results = {}
for name, model in models.items():
    pipe = Pipeline([('prep', pre), ('clf', model)])
    pipe.fit(X_train, y_train)
    acc = accuracy_score(y_test, pipe.predict(X_test))
    results[name] = acc

results


In [None]:
best = max(results, key=results.get)
best, results[best]


In [None]:
pipe = Pipeline([('prep', pre), ('clf', DecisionTreeClassifier(random_state=42))])
param_grid = {'clf__max_depth':[2,3,4,5,None], 'clf__min_samples_split':[2,4,8]}
grid = GridSearchCV(pipe, param_grid, cv=3)
grid.fit(X_train, y_train)
test_acc = accuracy_score(y_test, grid.predict(X_test))
print("Best params:", grid.best_params_, " Test accuracy:", round(test_acc,4))
print(confusion_matrix(y_test, grid.predict(X_test)))
