In [1]:
import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))


/kaggle/input/spaceship-titanic/sample_submission.csv
/kaggle/input/spaceship-titanic/train.csv
/kaggle/input/spaceship-titanic/test.csv


In [2]:
# Importing necessary libraries
import pandas as pd
import numpy as np
from sklearn.preprocessing import OrdinalEncoder
from sklearn.model_selection import train_test_split


import warnings
warnings.filterwarnings('ignore')

In [3]:
# Load the train and test dataset
data = pd.read_csv('/kaggle/input/spaceship-titanic/train.csv')

In [4]:
data.shape

(8693, 14)

In [5]:
data.head(2)

Unnamed: 0,PassengerId,HomePlanet,CryoSleep,Cabin,Destination,Age,VIP,RoomService,FoodCourt,ShoppingMall,Spa,VRDeck,Name,Transported
0,0001_01,Europa,False,B/0/P,TRAPPIST-1e,39.0,False,0.0,0.0,0.0,0.0,0.0,Maham Ofracculy,False
1,0002_01,Earth,False,F/0/S,TRAPPIST-1e,24.0,False,109.0,9.0,25.0,549.0,44.0,Juanna Vines,True


In [6]:
# Data preprocessing

# Drop Rows with Missing Values
data.fillna(0, inplace=True)


# Drop the Passenger ID Column as its the Unique Identifier
data.drop('PassengerId', inplace=True, axis=1)


# Convert mixed type columns to string
for col in data.columns:
    if data[col].dtype == 'object':
        data[col] = data[col].astype(str)
        
        
# Identify categorical columns
categorical_cols = [col for col in data.columns if data[col].dtype == 'object']


# Apply label encoding to categorical columns
oe = {}
for col in categorical_cols:
    oe[col] = OrdinalEncoder(handle_unknown='use_encoded_value', unknown_value=-1)
    data[col] = oe[col].fit_transform(data[col].values.reshape(-1, 1))

In [7]:
# Separating features and target
X = data.drop('Transported', axis=1)
y = data['Transported']

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [8]:
(X_train.shape, y_train.shape), (X_test.shape, y_test.shape)

(((6954, 12), (6954,)), ((1739, 12), (1739,)))

In [9]:
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, classification_report

# Initialize the model
model = RandomForestClassifier(random_state=42)

# Train the model on the training data
model.fit(X_train, y_train)

# Make predictions on the test data
y_pred = model.predict(X_test)

# Evaluate the model
accuracy = accuracy_score(y_test, y_pred)
print(f"Accuracy: {accuracy:.2f}")

# Print a detailed classification report
print(classification_report(y_test, y_pred))

Accuracy: 0.77
              precision    recall  f1-score   support

       False       0.77      0.77      0.77       861
        True       0.77      0.77      0.77       878

    accuracy                           0.77      1739
   macro avg       0.77      0.77      0.77      1739
weighted avg       0.77      0.77      0.77      1739



In [10]:
# Load Submission Data
submission_data = pd.read_csv('/kaggle/input/spaceship-titanic/test.csv')

# Drop Rows with Missing Values
submission_data.fillna(0, inplace=True)

# Drop the Passenger ID Column as it's the Unique Identifier
submission_data.drop('PassengerId', inplace=True, axis=1)

# Convert mixed type columns to string
for col in submission_data.columns:
    if submission_data[col].dtype == 'object':
        submission_data[col] = submission_data[col].astype(str)

# Apply label encoding to the submission data
for column, encoder in oe.items():
    submission_data[column] = encoder.transform(submission_data[[column]])

# Use the trained model to predict the 'Transported' status for the submission data
y_pred_submission = model.predict(submission_data)

# Add the predictions to the submission data
submission_data['Transported'] = y_pred_submission

# Keep only the 'PassengerId' and 'Transported' columns for submission
submission_data = pd.read_csv('/kaggle/input/spaceship-titanic/test.csv')[['PassengerId']]
submission_data['Transported'] = y_pred_submission

# Save the results to a CSV file
submission_data.to_csv('/kaggle/working/submission.csv', index=False, header=True)