In [None]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score
from sklearn.preprocessing import LabelEncoder
import os

In [None]:
train_path = '/Users/mohankirushna.r/Downloads/practise/kaggle/spaceshiptitanic/train.csv'
test_path = '/Users/mohankirushna.r/Downloads/practise/kaggle/spaceshiptitanic/test.csv'
submission_path = '/Users/mohankirushna.r/Downloads/practise/kaggle/spaceshiptitanic/sample_submission.csv'

In [None]:
try:
    train_df = pd.read_csv(train_path)
    test_df = pd.read_csv(test_path)
    submission_df = pd.read_csv(submission_path)
except FileNotFoundError as e:
    print(f"File not found: {e}")
    print("Please check that all file paths are correct.")
    exit()

In [None]:
def preprocess_data(df):
    df['Age'].fillna(df['Age'].median(), inplace=True)
    df['RoomService'].fillna(0, inplace=True)
    df['FoodCourt'].fillna(0, inplace=True)
    df['ShoppingMall'].fillna(0, inplace=True)
    df['Spa'].fillna(0, inplace=True)
    df['VRDeck'].fillna(0, inplace=True)
    df['CryoSleep'].fillna(False, inplace=True)
    df['VIP'].fillna(False, inplace=True)
    df['HomePlanet'].fillna("Earth", inplace=True)
    df['Destination'].fillna("TRAPPIST-1e", inplace=True)

    label_enc = LabelEncoder()
    df['HomePlanet'] = label_enc.fit_transform(df['HomePlanet'])
    df['CryoSleep'] = df['CryoSleep'].astype(int)
    df['Destination'] = label_enc.fit_transform(df['Destination'])
    df['VIP'] = df['VIP'].astype(int)

    features = ['HomePlanet', 'CryoSleep', 'Destination', 'Age', 'VIP', 'RoomService', 'FoodCourt', 'ShoppingMall', 'Spa', 'VRDeck']
    return df[features]

In [None]:
X = preprocess_data(train_df)
y = train_df['Transported'].astype(int)
X_test = preprocess_data(test_df)

In [None]:
X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=42)

In [None]:
model = RandomForestClassifier(n_estimators=1000000000, random_state=42)
model.fit(X_train, y_train)

In [None]:
y_pred = model.predict(X_val)
accuracy = accuracy_score(y_val, y_pred)

print(f'Validation Accuracy: {accuracy:.2f}')

In [None]:
test_predictions = model.predict(X_test)

In [None]:
submission_df['Transported'] = test_predictions.astype(bool)
output_path = '/Users/mohankirushna.r/Downloads/practise/kaggle/spaceshiptitanic/submission.csv'
submission_df.to_csv(output_path, index=False)
print(f"Submission file saved as {output_path}")