In [1]:
# Libraries
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import accuracy_score

# 1. Load the data
url = "https://raw.githubusercontent.com/data-bootcamp-v4/data/main/spaceship_titanic.csv"
spaceship = pd.read_csv(url)
print("First five rows:")
print(spaceship.head())

# 2. Check the shape of the data
print("\nData shape:", spaceship.shape)

# 3. Check data types
print("\nData types:")
print(spaceship.dtypes)

# 4. Check missing values
print("\nMissing values per column:")
print(spaceship.isnull().sum())

# 5. Drop missing values
spaceship = spaceship.dropna()
print("\nData shape after dropping rows with missing values:", spaceship.shape)

# 6. Transform 'Cabin'
spaceship['Cabin'] = spaceship['Cabin'].apply(lambda x: x.split('/')[0] if isinstance(x, str) else x)

# 7. Drop 'PassengerId' and 'Name' as they are not useful for prediction.
spaceship = spaceship.drop(columns=['PassengerId', 'Name'])

# 8. Separate target from features before applying dummies.
y = spaceship['Transported']
X = spaceship.drop(columns=['Transported'])

# 9. For non-numerical columns, create dummies.
X = pd.get_dummies(X, drop_first=True)

# Optional: Check the final features
print("\nFinal features after encoding:")
print(X.head())

# 10. Perform Train-Test Split (80% train, 20% test)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# 11. Model Selection using K-Nearest Neighbors
knn = KNeighborsClassifier()

# 12. Fit the model to the training data.
knn.fit(X_train, y_train)

# 13. Evaluate the model on the test set.
y_pred = knn.predict(X_test)
accuracy = accuracy_score(y_test, y_pred)
print("\nKNN Model Accuracy on Test Set:", accuracy)



First five rows:
  PassengerId HomePlanet CryoSleep  Cabin  Destination   Age    VIP  \
0     0001_01     Europa     False  B/0/P  TRAPPIST-1e  39.0  False   
1     0002_01      Earth     False  F/0/S  TRAPPIST-1e  24.0  False   
2     0003_01     Europa     False  A/0/S  TRAPPIST-1e  58.0   True   
3     0003_02     Europa     False  A/0/S  TRAPPIST-1e  33.0  False   
4     0004_01      Earth     False  F/1/S  TRAPPIST-1e  16.0  False   

   RoomService  FoodCourt  ShoppingMall     Spa  VRDeck               Name  \
0          0.0        0.0           0.0     0.0     0.0    Maham Ofracculy   
1        109.0        9.0          25.0   549.0    44.0       Juanna Vines   
2         43.0     3576.0           0.0  6715.0    49.0      Altark Susent   
3          0.0     1283.0         371.0  3329.0   193.0       Solam Susent   
4        303.0       70.0         151.0   565.0     2.0  Willy Santantines   

   Transported  
0        False  
1         True  
2        False  
3        False  
4 