In [562]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

/kaggle/input/spaceship-titanic/sample_submission.csv
/kaggle/input/spaceship-titanic/train.csv
/kaggle/input/spaceship-titanic/test.csv


In [563]:
from sklearn.model_selection import train_test_split

data = pd.read_csv('/kaggle/input/spaceship-titanic/train.csv')

features = [
    'CryoSleep',
    'Cabin',
    'Age',
    'VIP',
    'RoomService',
    'FoodCourt',
    'Spa',
    'VRDeck',
    'deck',
    'num',
    'side',
    'group',
    'HomePlanet',
    'something',
]

# Spit passanger ID
# Encode HomePlanet - OneHotEncoder
# Split cabin
# Standardize values

data

Unnamed: 0,PassengerId,HomePlanet,CryoSleep,Cabin,Destination,Age,VIP,RoomService,FoodCourt,ShoppingMall,Spa,VRDeck,Name,Transported
0,0001_01,Europa,False,B/0/P,TRAPPIST-1e,39.0,False,0.0,0.0,0.0,0.0,0.0,Maham Ofracculy,False
1,0002_01,Earth,False,F/0/S,TRAPPIST-1e,24.0,False,109.0,9.0,25.0,549.0,44.0,Juanna Vines,True
2,0003_01,Europa,False,A/0/S,TRAPPIST-1e,58.0,True,43.0,3576.0,0.0,6715.0,49.0,Altark Susent,False
3,0003_02,Europa,False,A/0/S,TRAPPIST-1e,33.0,False,0.0,1283.0,371.0,3329.0,193.0,Solam Susent,False
4,0004_01,Earth,False,F/1/S,TRAPPIST-1e,16.0,False,303.0,70.0,151.0,565.0,2.0,Willy Santantines,True
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
8688,9276_01,Europa,False,A/98/P,55 Cancri e,41.0,True,0.0,6819.0,0.0,1643.0,74.0,Gravior Noxnuther,False
8689,9278_01,Earth,True,G/1499/S,PSO J318.5-22,18.0,False,0.0,0.0,0.0,0.0,0.0,Kurta Mondalley,False
8690,9279_01,Earth,False,G/1500/S,TRAPPIST-1e,26.0,False,0.0,0.0,1872.0,1.0,0.0,Fayey Connon,True
8691,9280_01,Europa,False,E/608/S,55 Cancri e,32.0,False,0.0,1049.0,0.0,353.0,3235.0,Celeon Hontichre,False


In [564]:
from sklearn.preprocessing import StandardScaler, OneHotEncoder
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
from sklearn.ensemble import RandomForestClassifier
from sklearn.impute import SimpleImputer
from xgboost import XGBClassifier

def pre_transforms(some_data):
    some_data['Cabin'] = some_data['Cabin'].map(lambda el: '//' if type(el) == float else el)

    some_data['deck'] = some_data['Cabin'].map(lambda val: val.split('/')[0])
    some_data['num'] = some_data['Cabin'].map(lambda val: val.split('/')[1])
    some_data['side'] = some_data['Cabin'].map(lambda val: val.split('/')[2])

    some_data['group'] = some_data['PassengerId'].map(lambda val: float(val.split('_')[0]))

    some_data['something'] = some_data['Age'] * data['ShoppingMall']

    some_data.drop(columns='Cabin')
    
    return some_data
    
data = pre_transforms(data)

X = data[features]
y = data['Transported']


X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=42)

numerical_transformer = Pipeline([
    ('imputer', SimpleImputer(strategy='mean')),
    ('scaler', StandardScaler())
])

categorical_transformer = Pipeline([
    ('imputer', SimpleImputer(strategy='most_frequent')),
    ('onehot', OneHotEncoder(handle_unknown='ignore'))
])

preprocessor = ColumnTransformer([
    ('num', numerical_transformer, ['Age', 'RoomService', 'FoodCourt', 'Spa', 'VRDeck', 'group', 'something']),
    ('cat', categorical_transformer, ['HomePlanet', 'deck', 'side'])
])

pipeline = Pipeline([
    ('preprocessor', preprocessor),
    ('model', XGBClassifier(
        n_estimators=800,
        learning_rate=0.05,
        random_state=42
    ))
])

pd.DataFrame(preprocessor.fit_transform(X_train))

pipeline.fit(X, y)

In [565]:
# transformed = pd.DataFrame(preprocessor.fit_transform(X_train))

# transformed.columns = X_train.columns

# transformed

In [572]:
from torch import nn, optim, tensor

print(X_train)

tensor(preprocessor.fit_transform(X_train))

# class BinaryNeuralNetwork(nn.Module):
#     def __init__(self):
#         super(BinaryNeuralNetwork, self).__init__()
#         self.fc1 = nn.Linear(20, 16)
#         self.fc2 = nn.Linear(16, 1)
#         self.sigmoid = nn.Sigmoid()
        
#     def forward(self, x):
#         x = torch.relu(self.fc1(x))
#         x = self.sigmoid(self.fc2(x))
#         return x

# model = BinaryNeuralNetwork()

# # Step 3: Define the Loss Function and Optimizer
# criterion = nn.BCELoss()
# optimizer = optim.Adam(model.parameters(), lr=0.001)

# # Step 4: Train the Model
# num_epochs = 20

# for epoch in range(num_epochs):
#     for inputs, labels in train_loader:
#         outputs = model(inputs)
#         loss = criterion(outputs, labels)
        
#         optimizer.zero_grad()
#         loss.backward()
#         optimizer.step()
    
#     print(f'Epoch [{epoch+1}/{num_epochs}], Loss: {loss.item():.4f}')

# # Step 5: Evaluate the Model
# model.eval()
# with torch.no_grad():
#     test_outputs = model(X_test)
#     predicted = (test_outputs > 0.5).float()
#     accuracy = (predicted == y_test).float().mean()
#     print(f'Accuracy: {accuracy:.4f}')

     CryoSleep     Cabin   Age    VIP  RoomService  FoodCourt    Spa  VRDeck  \
5623     False  F/1140/S  27.0  False        441.0        0.0  471.0     0.0   
5253      True   B/213/S  45.0  False          0.0        0.0    0.0     0.0   
478       True    B/20/S  50.0  False          0.0        0.0    0.0     0.0   
1352      True   G/220/P   1.0  False          0.0        0.0    0.0     0.0   
5344     False   G/915/P  42.0  False          0.0       29.0  434.0    45.0   
...        ...       ...   ...    ...          ...        ...    ...     ...   
5734       NaN   G/988/S  18.0  False         14.0        2.0  610.0     0.0   
5191     False  F/1063/S  50.0    NaN        690.0        0.0  762.0   428.0   
5390     False  F/1194/P  22.0  False        158.0        0.0    0.0    26.0   
860      False   F/191/P  34.0  False        379.0        0.0    0.0     0.0   
7270     False   C/253/P  28.0  False          7.0      489.0    4.0  6027.0   

     deck   num side   group HomePlanet

tensor([[-0.1289,  0.3269, -0.2928,  ...,  0.0000,  0.0000,  1.0000],
        [ 1.1255, -0.3332, -0.2928,  ...,  0.0000,  0.0000,  1.0000],
        [ 1.4739, -0.3332, -0.2928,  ...,  0.0000,  0.0000,  1.0000],
        ...,
        [-0.4773, -0.0967, -0.2928,  ...,  0.0000,  1.0000,  0.0000],
        [ 0.3589,  0.2341, -0.2928,  ...,  0.0000,  1.0000,  0.0000],
        [-0.0592, -0.3227,  0.0181,  ...,  0.0000,  1.0000,  0.0000]],
       dtype=torch.float64)

In [566]:
from sklearn.metrics import precision_score

test = pd.read_csv('/kaggle/input/spaceship-titanic/test.csv')

precision_score(y_test, pipeline.predict(X_test))

test = pre_transforms(test)

y = test.loc[:,['PassengerId']]

values = pipeline.predict(test)

y['Transported'] = values.astype(bool)

y = y.set_index('PassengerId')

y.to_csv('submission.csv')

y

Unnamed: 0_level_0,Transported
PassengerId,Unnamed: 1_level_1
0013_01,False
0018_01,False
0019_01,True
0021_01,True
0023_01,False
...,...
9266_02,True
9269_01,False
9271_01,True
9273_01,True
