In [162]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

/kaggle/input/spaceship-titanic/sample_submission.csv
/kaggle/input/spaceship-titanic/train.csv
/kaggle/input/spaceship-titanic/test.csv


In [163]:
from sklearn.model_selection import train_test_split

data = pd.read_csv('/kaggle/input/spaceship-titanic/train.csv')

features = [
    'CryoSleep',
    'Cabin',
    'Age',
    'VIP',
    'RoomService',
    'FoodCourt',
    'Spa',
    'VRDeck',
    'deck',
    'num',
    'side',
    'group',
    'HomePlanet',
    'something',
]

# Spit passanger ID
# Encode HomePlanet - OneHotEncoder
# Split cabin
# Standardize values

data

Unnamed: 0,PassengerId,HomePlanet,CryoSleep,Cabin,Destination,Age,VIP,RoomService,FoodCourt,ShoppingMall,Spa,VRDeck,Name,Transported
0,0001_01,Europa,False,B/0/P,TRAPPIST-1e,39.0,False,0.0,0.0,0.0,0.0,0.0,Maham Ofracculy,False
1,0002_01,Earth,False,F/0/S,TRAPPIST-1e,24.0,False,109.0,9.0,25.0,549.0,44.0,Juanna Vines,True
2,0003_01,Europa,False,A/0/S,TRAPPIST-1e,58.0,True,43.0,3576.0,0.0,6715.0,49.0,Altark Susent,False
3,0003_02,Europa,False,A/0/S,TRAPPIST-1e,33.0,False,0.0,1283.0,371.0,3329.0,193.0,Solam Susent,False
4,0004_01,Earth,False,F/1/S,TRAPPIST-1e,16.0,False,303.0,70.0,151.0,565.0,2.0,Willy Santantines,True
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
8688,9276_01,Europa,False,A/98/P,55 Cancri e,41.0,True,0.0,6819.0,0.0,1643.0,74.0,Gravior Noxnuther,False
8689,9278_01,Earth,True,G/1499/S,PSO J318.5-22,18.0,False,0.0,0.0,0.0,0.0,0.0,Kurta Mondalley,False
8690,9279_01,Earth,False,G/1500/S,TRAPPIST-1e,26.0,False,0.0,0.0,1872.0,1.0,0.0,Fayey Connon,True
8691,9280_01,Europa,False,E/608/S,55 Cancri e,32.0,False,0.0,1049.0,0.0,353.0,3235.0,Celeon Hontichre,False


In [164]:
from sklearn.preprocessing import StandardScaler, OneHotEncoder
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
from sklearn.ensemble import RandomForestClassifier
from sklearn.impute import SimpleImputer
from xgboost import XGBClassifier

def pre_transforms(some_data):
    some_data['Cabin'] = some_data['Cabin'].map(lambda el: '//' if type(el) == float else el)

    some_data['deck'] = some_data['Cabin'].map(lambda val: val.split('/')[0])
    some_data['num'] = some_data['Cabin'].map(lambda val: val.split('/')[1])
    some_data['side'] = some_data['Cabin'].map(lambda val: val.split('/')[2])

    some_data['group'] = some_data['PassengerId'].map(lambda val: float(val.split('_')[0]))

    some_data['something'] = some_data['Age'] * data['ShoppingMall']

    some_data.drop(columns='Cabin')
    
    return some_data
    
data = pre_transforms(data)

X = data[features]
y = data['Transported']


X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=42)

numerical_transformer = Pipeline([
    ('imputer', SimpleImputer(strategy='mean')),
    ('scaler', StandardScaler())
])

categorical_transformer = Pipeline([
    ('imputer', SimpleImputer(strategy='most_frequent')),
    ('onehot', OneHotEncoder(handle_unknown='ignore'))
])

preprocessor = ColumnTransformer([
    ('num', numerical_transformer, ['Age', 'RoomService', 'FoodCourt', 'Spa', 'VRDeck', 'group', 'something']),
    ('cat', categorical_transformer, ['HomePlanet', 'deck', 'side'])
])

pipeline = Pipeline([
    ('preprocessor', preprocessor),
    ('model', XGBClassifier(
        n_estimators=800,
        learning_rate=0.05,
        random_state=42
    ))
])

# pipeline.fit(X, y)

X_test

Unnamed: 0,CryoSleep,Cabin,Age,VIP,RoomService,FoodCourt,Spa,VRDeck,deck,num,side,group,HomePlanet,something
304,False,F/63/S,19.0,False,417.0,349.0,3.0,1057.0,F,63,S,337.0,Mars,12046.0
2697,False,G/460/S,18.0,False,4.0,904.0,0.0,1.0,G,460,S,2891.0,Earth,0.0
8424,True,G/1449/S,41.0,False,0.0,0.0,0.0,0.0,G,1449,S,8998.0,Earth,0.0
1672,False,G/291/P,35.0,False,0.0,338.0,,0.0,G,291,P,1771.0,Earth,15260.0
8458,True,D/288/P,43.0,False,0.0,0.0,0.0,0.0,D,288,P,9034.0,Europa,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
4478,True,F/968/P,35.0,False,0.0,0.0,0.0,0.0,F,968,P,4762.0,Mars,0.0
2996,True,G/515/S,30.0,False,0.0,0.0,0.0,0.0,G,515,S,3239.0,Earth,0.0
7760,False,F/1703/P,34.0,False,0.0,0.0,22.0,0.0,F,1703,P,8280.0,Earth,29920.0
8181,,F/1802/P,19.0,False,179.0,16.0,280.0,0.0,F,1802,P,8743.0,Earth,4674.0


In [165]:
import torch
import torch.nn as nn
import torch.optim as optim
from sklearn.model_selection import train_test_split
from sklearn.datasets import make_classification
from torch.utils.data import DataLoader, TensorDataset

X_train = torch.tensor(preprocessor.fit_transform(X), dtype=torch.float32)
y_train = torch.tensor(y.to_numpy(), dtype=torch.float32).unsqueeze(1)
X_test = torch.tensor(preprocessor.transform(X_test), dtype=torch.float32)
y_test = torch.tensor(y_test.to_numpy(), dtype=torch.float32).unsqueeze(1)

# Create DataLoader
train_dataset = TensorDataset(X_train, y_train)
train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)

# Step 2: Define the Model
class BinaryNeuralNetwork(nn.Module):
    def __init__(self):
        super(BinaryNeuralNetwork, self).__init__()
        self.fc1 = nn.Linear(22, 16)
        self.fc2 = nn.Linear(16, 16)
        self.fc3 = nn.Linear(16, 1)
        self.sigmoid = nn.Sigmoid()
        
    def forward(self, x):
        x = torch.relu(self.fc1(x))
        x = torch.relu(self.fc2(x))
        x = self.sigmoid(self.fc3(x))
        return x

model = BinaryNeuralNetwork()

# Step 3: Define the Loss Function and Optimizer
criterion = nn.BCELoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

# Step 4: Train the Model
num_epochs = 20

for epoch in range(num_epochs):
    for inputs, labels in train_loader:
        outputs = model(inputs)
        loss = criterion(outputs, labels)
        
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
    
    print(f'Epoch [{epoch+1}/{num_epochs}], Loss: {loss.item():.4f}')

# Step 5: Evaluate the Model
model.eval()
with torch.no_grad():
    test = pd.read_csv('/kaggle/input/spaceship-titanic/test.csv')
    
    test = pre_transforms(test)
    
    y = test.loc[:,['PassengerId']]
    
    test_outputs = model(torch.tensor(preprocessor.transform(test), dtype=torch.float32))
    predicted = (test_outputs > 0.45).float()
    y['Transported'] = pd.Series(predicted.squeeze()).astype(bool)
    
    y = y.set_index('PassengerId')
    
    y.to_csv('submission.csv')
#     accuracy = (predicted == y_test).float().mean()
#     print(f'Accuracy: {accuracy:.4f}')
    


Epoch [1/20], Loss: 0.5505
Epoch [2/20], Loss: 0.2551
Epoch [3/20], Loss: 0.2168
Epoch [4/20], Loss: 0.5832
Epoch [5/20], Loss: 0.2983
Epoch [6/20], Loss: 0.2764
Epoch [7/20], Loss: 0.3441
Epoch [8/20], Loss: 0.6810
Epoch [9/20], Loss: 0.5110
Epoch [10/20], Loss: 0.3909
Epoch [11/20], Loss: 0.4075
Epoch [12/20], Loss: 0.3980
Epoch [13/20], Loss: 0.5233
Epoch [14/20], Loss: 0.3210
Epoch [15/20], Loss: 0.5918
Epoch [16/20], Loss: 0.3021
Epoch [17/20], Loss: 0.5033
Epoch [18/20], Loss: 0.3634
Epoch [19/20], Loss: 0.6582
Epoch [20/20], Loss: 0.3317


In [166]:
# from sklearn.metrics import precision_score

# test = pd.read_csv('/kaggle/input/spaceship-titanic/test.csv')

# precision_score(y_test, pipeline.predict(X_test))

# test = pre_transforms(test)

# y = test.loc[:,['PassengerId']]

# values = pipeline.predict(test)

# y['Transported'] = values.astype(bool)

# y = y.set_index('PassengerId')

# y.to_csv('submission.csv')

# y