# Hands-on Lab: Complete the Machine Learning Prediction Lab
This notebook performs machine learning predictions using SpaceX launch data.
We will preprocess data, build classification models, and evaluate performance.

In [None]:
!pip install pandas numpy scikit-learn matplotlib seaborn

In [None]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LogisticRegression
from sklearn.tree import DecisionTreeClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.svm import SVC
from sklearn.metrics import classification_report, accuracy_score, confusion_matrix
import matplotlib.pyplot as plt
import seaborn as sns

## Step 1: Load the dataset

In [None]:
url = 'https://cf-courses-data.s3.us.cloud-object-storage.appdomain.cloud/IBM-DS0321EN-SkillsNetwork/datasets/spacex_launch_dash.csv'
df = pd.read_csv(url)
df.head()

## Step 2: Data preprocessing and feature selection

In [None]:
# Select features and target
features = df[['Payload Mass (kg)', 'Flight Number']]
target = df['class']

# Encode categorical columns if necessary
if 'Launch Site' in df.columns:
    features = pd.concat([features, pd.get_dummies(df['Launch Site'], prefix='site')], axis=1)
if 'Booster Version Category' in df.columns:
    features = pd.concat([features, pd.get_dummies(df['Booster Version Category'], prefix='booster')], axis=1)

X_train, X_test, y_train, y_test = train_test_split(features, target, test_size=0.2, random_state=42)

scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

## Step 3: Train and evaluate models

In [None]:
models = {
    'Logistic Regression': LogisticRegression(max_iter=1000),
    'Decision Tree': DecisionTreeClassifier(random_state=42),
    'K-Nearest Neighbors': KNeighborsClassifier(n_neighbors=5),
    'Support Vector Machine': SVC(kernel='rbf', probability=True)
}

results = []

for name, model in models.items():
    model.fit(X_train_scaled, y_train)
    y_pred = model.predict(X_test_scaled)
    acc = accuracy_score(y_test, y_pred)
    results.append({'Model': name, 'Accuracy': acc})
    print(f'
{name} Classification Report:
', classification_report(y_test, y_pred))

results_df = pd.DataFrame(results).sort_values(by='Accuracy', ascending=False)
results_df

## Step 4: Visualize model accuracy comparison

In [None]:
sns.barplot(data=results_df, x='Model', y='Accuracy', palette='viridis')
plt.xticks(rotation=45)
plt.title('Model Accuracy Comparison')
plt.show()

## Step 5: Confusion Matrix for the best model

In [None]:
best_model_name = results_df.iloc[0]['Model']
best_model = models[best_model_name]
y_pred_best = best_model.predict(X_test_scaled)
cm = confusion_matrix(y_test, y_pred_best)
sns.heatmap(cm, annot=True, fmt='d', cmap='Blues')
plt.title(f'Confusion Matrix - {best_model_name}')
plt.xlabel('Predicted')
plt.ylabel('Actual')
plt.show()

## Step 6: Conclusion
This notebook demonstrated training and evaluation of multiple classification models to predict SpaceX launch success.