In [257]:
import pandas as pd
from sklearn import tree
from sklearn.tree import DecisionTreeClassifier
from sklearn.model_selection import train_test_split
import matplotlib.pyplot as plt
import seaborn as sns
%matplotlib inline
import numpy as np
from sklearn.model_selection import cross_val_score
from sklearn.model_selection import GridSearchCV
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import confusion_matrix
from sklearn.metrics import precision_score
from sklearn.metrics import recall_score

In [258]:
from sklearn import preprocessing

In [259]:
from sklearn.linear_model import LogisticRegression

In [260]:
from sklearn.metrics import classification_report

In [261]:
train_data = pd.read_csv('train.csv')

In [262]:
train_data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 8693 entries, 0 to 8692
Data columns (total 14 columns):
 #   Column        Non-Null Count  Dtype  
---  ------        --------------  -----  
 0   PassengerId   8693 non-null   object 
 1   HomePlanet    8492 non-null   object 
 2   CryoSleep     8476 non-null   object 
 3   Cabin         8494 non-null   object 
 4   Destination   8511 non-null   object 
 5   Age           8514 non-null   float64
 6   VIP           8490 non-null   object 
 7   RoomService   8512 non-null   float64
 8   FoodCourt     8510 non-null   float64
 9   ShoppingMall  8485 non-null   float64
 10  Spa           8510 non-null   float64
 11  VRDeck        8505 non-null   float64
 12  Name          8493 non-null   object 
 13  Transported   8693 non-null   bool   
dtypes: bool(1), float64(6), object(7)
memory usage: 891.5+ KB


In [263]:
train_data = train_data.fillna({'Age': train_data.Age.median()})

In [264]:
train_data['Age_levels'] = pd.cut(train_data['Age'],bins=[0,12,18,79],labels=['Age_1','Age_2','Age_3'], right=False)

In [265]:
train_data.head(3)

Unnamed: 0,PassengerId,HomePlanet,CryoSleep,Cabin,Destination,Age,VIP,RoomService,FoodCourt,ShoppingMall,Spa,VRDeck,Name,Transported,Age_levels
0,0001_01,Europa,False,B/0/P,TRAPPIST-1e,39.0,False,0.0,0.0,0.0,0.0,0.0,Maham Ofracculy,False,Age_3
1,0002_01,Earth,False,F/0/S,TRAPPIST-1e,24.0,False,109.0,9.0,25.0,549.0,44.0,Juanna Vines,True,Age_3
2,0003_01,Europa,False,A/0/S,TRAPPIST-1e,58.0,True,43.0,3576.0,0.0,6715.0,49.0,Altark Susent,False,Age_3


In [266]:
train_data['Cabin'] = train_data['Cabin'].astype(str)

In [267]:
train_data['Cabin'] = train_data['Cabin'].map(lambda x: x.strip(x[:-1]))

In [318]:
train_data.head(50)

Unnamed: 0,PassengerId,HomePlanet,CryoSleep,Cabin,Destination,Age,VIP,RoomService,FoodCourt,ShoppingMall,Spa,VRDeck,Name,Transported,Age_levels
0,0001_01,Europa,False,P,TRAPPIST-1e,39.0,False,0.0,0.0,0.0,0.0,0.0,Maham Ofracculy,False,Age_3
1,0002_01,Earth,False,S,TRAPPIST-1e,24.0,False,109.0,9.0,25.0,549.0,44.0,Juanna Vines,True,Age_3
2,0003_01,Europa,False,S,TRAPPIST-1e,58.0,True,43.0,3576.0,0.0,6715.0,49.0,Altark Susent,False,Age_3
3,0003_02,Europa,False,S,TRAPPIST-1e,33.0,False,0.0,1283.0,371.0,3329.0,193.0,Solam Susent,False,Age_3
4,0004_01,Earth,False,S,TRAPPIST-1e,16.0,False,303.0,70.0,151.0,565.0,2.0,Willy Santantines,True,Age_2
5,0005_01,Earth,False,P,PSO J318.5-22,44.0,False,0.0,483.0,0.0,291.0,0.0,Sandie Hinetthews,True,Age_3
6,0006_01,Earth,False,S,TRAPPIST-1e,26.0,False,42.0,1539.0,3.0,0.0,0.0,Billex Jacostaffey,True,Age_3
7,0006_02,Earth,True,S,TRAPPIST-1e,28.0,False,0.0,0.0,0.0,0.0,,Candra Jacostaffey,True,Age_3
8,0007_01,Earth,False,S,TRAPPIST-1e,35.0,False,0.0,785.0,17.0,216.0,0.0,Andona Beston,True,Age_3
9,0008_01,Europa,True,P,55 Cancri e,14.0,False,0.0,0.0,0.0,0.0,0.0,Erraiam Flatic,True,Age_2


In [269]:
df_train = pd.get_dummies(train_data, columns=['HomePlanet','CryoSleep', 'Cabin','Destination','VIP', 'Age_levels'])

In [270]:
df_train.columns

Index(['PassengerId', 'Age', 'RoomService', 'FoodCourt', 'ShoppingMall', 'Spa',
       'VRDeck', 'Name', 'Transported', 'HomePlanet_Earth',
       'HomePlanet_Europa', 'HomePlanet_Mars', 'CryoSleep_False',
       'CryoSleep_True', 'Cabin_', 'Cabin_P', 'Cabin_S',
       'Destination_55 Cancri e', 'Destination_PSO J318.5-22',
       'Destination_TRAPPIST-1e', 'VIP_False', 'VIP_True', 'Age_levels_Age_1',
       'Age_levels_Age_2', 'Age_levels_Age_3'],
      dtype='object')

In [271]:
df_train.drop(['Name','PassengerId', 'Age', 'Cabin_', 'Cabin_P', 'CryoSleep_False', 'VIP_False'], axis = 1, inplace = True)

In [272]:
df_train

Unnamed: 0,RoomService,FoodCourt,ShoppingMall,Spa,VRDeck,Transported,HomePlanet_Earth,HomePlanet_Europa,HomePlanet_Mars,CryoSleep_True,Cabin_S,Destination_55 Cancri e,Destination_PSO J318.5-22,Destination_TRAPPIST-1e,VIP_True,Age_levels_Age_1,Age_levels_Age_2,Age_levels_Age_3
0,0.0,0.0,0.0,0.0,0.0,False,0,1,0,0,0,0,0,1,0,0,0,1
1,109.0,9.0,25.0,549.0,44.0,True,1,0,0,0,1,0,0,1,0,0,0,1
2,43.0,3576.0,0.0,6715.0,49.0,False,0,1,0,0,1,0,0,1,1,0,0,1
3,0.0,1283.0,371.0,3329.0,193.0,False,0,1,0,0,1,0,0,1,0,0,0,1
4,303.0,70.0,151.0,565.0,2.0,True,1,0,0,0,1,0,0,1,0,0,1,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
8688,0.0,6819.0,0.0,1643.0,74.0,False,0,1,0,0,0,1,0,0,1,0,0,1
8689,0.0,0.0,0.0,0.0,0.0,False,1,0,0,1,1,0,1,0,0,0,0,1
8690,0.0,0.0,1872.0,1.0,0.0,True,1,0,0,0,1,0,0,1,0,0,0,1
8691,0.0,1049.0,0.0,353.0,3235.0,False,0,1,0,0,1,1,0,0,0,0,0,1


In [273]:
label_encoder = preprocessing.LabelEncoder()

In [274]:
df_train['Transported']= label_encoder.fit_transform(df_train['Transported'])

In [275]:
df_train

Unnamed: 0,RoomService,FoodCourt,ShoppingMall,Spa,VRDeck,Transported,HomePlanet_Earth,HomePlanet_Europa,HomePlanet_Mars,CryoSleep_True,Cabin_S,Destination_55 Cancri e,Destination_PSO J318.5-22,Destination_TRAPPIST-1e,VIP_True,Age_levels_Age_1,Age_levels_Age_2,Age_levels_Age_3
0,0.0,0.0,0.0,0.0,0.0,0,0,1,0,0,0,0,0,1,0,0,0,1
1,109.0,9.0,25.0,549.0,44.0,1,1,0,0,0,1,0,0,1,0,0,0,1
2,43.0,3576.0,0.0,6715.0,49.0,0,0,1,0,0,1,0,0,1,1,0,0,1
3,0.0,1283.0,371.0,3329.0,193.0,0,0,1,0,0,1,0,0,1,0,0,0,1
4,303.0,70.0,151.0,565.0,2.0,1,1,0,0,0,1,0,0,1,0,0,1,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
8688,0.0,6819.0,0.0,1643.0,74.0,0,0,1,0,0,0,1,0,0,1,0,0,1
8689,0.0,0.0,0.0,0.0,0.0,0,1,0,0,1,1,0,1,0,0,0,0,1
8690,0.0,0.0,1872.0,1.0,0.0,1,1,0,0,0,1,0,0,1,0,0,0,1
8691,0.0,1049.0,0.0,353.0,3235.0,0,0,1,0,0,1,1,0,0,0,0,0,1


In [276]:
df_train.fillna(df_train[['RoomService','FoodCourt','ShoppingMall','Spa','VRDeck']].median(),
                inplace=True) 

In [277]:
df_train.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 8693 entries, 0 to 8692
Data columns (total 18 columns):
 #   Column                     Non-Null Count  Dtype  
---  ------                     --------------  -----  
 0   RoomService                8693 non-null   float64
 1   FoodCourt                  8693 non-null   float64
 2   ShoppingMall               8693 non-null   float64
 3   Spa                        8693 non-null   float64
 4   VRDeck                     8693 non-null   float64
 5   Transported                8693 non-null   int64  
 6   HomePlanet_Earth           8693 non-null   uint8  
 7   HomePlanet_Europa          8693 non-null   uint8  
 8   HomePlanet_Mars            8693 non-null   uint8  
 9   CryoSleep_True             8693 non-null   uint8  
 10  Cabin_S                    8693 non-null   uint8  
 11  Destination_55 Cancri e    8693 non-null   uint8  
 12  Destination_PSO J318.5-22  8693 non-null   uint8  
 13  Destination_TRAPPIST-1e    8693 non-null   uint8

In [278]:
target_trans = df_train.pop('Transported')

In [279]:
# insert column using insert(position,column_name,
# first_column) function
df_train.insert(17, 'Transported', target_trans)

In [280]:
df_train

Unnamed: 0,RoomService,FoodCourt,ShoppingMall,Spa,VRDeck,HomePlanet_Earth,HomePlanet_Europa,HomePlanet_Mars,CryoSleep_True,Cabin_S,Destination_55 Cancri e,Destination_PSO J318.5-22,Destination_TRAPPIST-1e,VIP_True,Age_levels_Age_1,Age_levels_Age_2,Age_levels_Age_3,Transported
0,0.0,0.0,0.0,0.0,0.0,0,1,0,0,0,0,0,1,0,0,0,1,0
1,109.0,9.0,25.0,549.0,44.0,1,0,0,0,1,0,0,1,0,0,0,1,1
2,43.0,3576.0,0.0,6715.0,49.0,0,1,0,0,1,0,0,1,1,0,0,1,0
3,0.0,1283.0,371.0,3329.0,193.0,0,1,0,0,1,0,0,1,0,0,0,1,0
4,303.0,70.0,151.0,565.0,2.0,1,0,0,0,1,0,0,1,0,0,1,0,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
8688,0.0,6819.0,0.0,1643.0,74.0,0,1,0,0,0,1,0,0,1,0,0,1,0
8689,0.0,0.0,0.0,0.0,0.0,1,0,0,1,1,0,1,0,0,0,0,1,0
8690,0.0,0.0,1872.0,1.0,0.0,1,0,0,0,1,0,0,1,0,0,0,1,1
8691,0.0,1049.0,0.0,353.0,3235.0,0,1,0,0,1,1,0,0,0,0,0,1,0


In [281]:
df_train.columns

Index(['RoomService', 'FoodCourt', 'ShoppingMall', 'Spa', 'VRDeck',
       'HomePlanet_Earth', 'HomePlanet_Europa', 'HomePlanet_Mars',
       'CryoSleep_True', 'Cabin_S', 'Destination_55 Cancri e',
       'Destination_PSO J318.5-22', 'Destination_TRAPPIST-1e', 'VIP_True',
       'Age_levels_Age_1', 'Age_levels_Age_2', 'Age_levels_Age_3',
       'Transported'],
      dtype='object')

In [282]:
X = df_train.iloc[:,:-1]
y = df_train.iloc[:,-1]

In [283]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.4, random_state=42)

In [284]:
model_lr = LogisticRegression(max_iter=100)

In [285]:
model_lr.fit(X_train, y_train)

STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


LogisticRegression()

In [286]:
y_pred = model_lr.predict(X_test)

In [287]:
print(confusion_matrix(y_test, y_pred))
print(classification_report(y_test, y_pred))

[[1255  459]
 [ 295 1469]]
              precision    recall  f1-score   support

           0       0.81      0.73      0.77      1714
           1       0.76      0.83      0.80      1764

    accuracy                           0.78      3478
   macro avg       0.79      0.78      0.78      3478
weighted avg       0.79      0.78      0.78      3478



In [288]:
test_data = pd.read_csv('test.csv')

In [317]:
test_data.head(50)

Unnamed: 0,PassengerId,HomePlanet,CryoSleep,Cabin,Destination,Age,VIP,RoomService,FoodCourt,ShoppingMall,Spa,VRDeck,Name,Age_levels
0,0013_01,Earth,True,S,TRAPPIST-1e,27.0,False,0.0,0.0,0.0,0.0,0.0,Nelly Carsoning,Age_3
1,0018_01,Earth,False,S,TRAPPIST-1e,19.0,False,0.0,9.0,0.0,2823.0,0.0,Lerome Peckers,Age_3
2,0019_01,Europa,True,S,55 Cancri e,31.0,False,0.0,0.0,0.0,0.0,0.0,Sabih Unhearfus,Age_3
3,0021_01,Europa,False,S,TRAPPIST-1e,38.0,False,0.0,6652.0,0.0,181.0,585.0,Meratz Caltilter,Age_3
4,0023_01,Earth,False,S,TRAPPIST-1e,20.0,False,10.0,0.0,635.0,0.0,0.0,Brence Harperez,Age_3
5,0027_01,Earth,False,P,TRAPPIST-1e,31.0,False,0.0,1615.0,263.0,113.0,60.0,Karlen Ricks,Age_3
6,0029_01,Europa,True,P,55 Cancri e,21.0,False,0.0,,0.0,0.0,0.0,Aldah Ainserfle,Age_3
7,0032_01,Europa,True,S,TRAPPIST-1e,20.0,False,0.0,0.0,0.0,0.0,0.0,Acrabi Pringry,Age_3
8,0032_02,Europa,True,S,55 Cancri e,23.0,False,0.0,0.0,0.0,0.0,0.0,Dhena Pringry,Age_3
9,0033_01,Earth,False,S,55 Cancri e,24.0,False,0.0,639.0,0.0,0.0,0.0,Eliana Delazarson,Age_3


In [290]:
test_data = test_data.fillna({'Age': test_data.Age.median()})

In [291]:
test_data['Age_levels'] = pd.cut(test_data['Age'],bins=[0,12,18,79],labels=['Age_1','Age_2','Age_3'], right=False)

In [292]:
test_data['Cabin'] = test_data['Cabin'].astype(str)

In [293]:
test_data['Cabin'] = test_data['Cabin'].map(lambda x: x.strip(x[:-1]))

In [294]:
df_test = pd.get_dummies(test_data, columns=['HomePlanet','CryoSleep', 'Cabin', 'Destination','VIP', 'Age_levels'])

In [295]:
df_test.drop(['Name','PassengerId', 'Age', 'Cabin_', 'Cabin_P', 'CryoSleep_False', 'VIP_False'], axis = 1, inplace = True)

In [296]:
df_test

Unnamed: 0,RoomService,FoodCourt,ShoppingMall,Spa,VRDeck,HomePlanet_Earth,HomePlanet_Europa,HomePlanet_Mars,CryoSleep_True,Cabin_S,Destination_55 Cancri e,Destination_PSO J318.5-22,Destination_TRAPPIST-1e,VIP_True,Age_levels_Age_1,Age_levels_Age_2,Age_levels_Age_3
0,0.0,0.0,0.0,0.0,0.0,1,0,0,1,1,0,0,1,0,0,0,1
1,0.0,9.0,0.0,2823.0,0.0,1,0,0,0,1,0,0,1,0,0,0,1
2,0.0,0.0,0.0,0.0,0.0,0,1,0,1,1,1,0,0,0,0,0,1
3,0.0,6652.0,0.0,181.0,585.0,0,1,0,0,1,0,0,1,0,0,0,1
4,10.0,0.0,635.0,0.0,0.0,1,0,0,0,1,0,0,1,0,0,0,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
4272,0.0,0.0,0.0,0.0,0.0,1,0,0,1,1,0,0,1,0,0,0,1
4273,0.0,847.0,17.0,10.0,144.0,1,0,0,0,0,0,0,1,0,0,0,1
4274,0.0,0.0,0.0,0.0,0.0,0,0,1,1,0,1,0,0,0,0,0,1
4275,0.0,2680.0,0.0,0.0,523.0,0,1,0,0,0,0,0,0,0,0,0,1


In [297]:
df_test.fillna(df_test[['RoomService','FoodCourt','ShoppingMall','Spa','VRDeck']].median(),
                inplace=True)    

In [298]:
X_test=df_test.iloc[:,:]
y_pred = model_lr.predict(X_test)

In [299]:
test_pred = pd.concat([df_test, pd.DataFrame(y_pred)], axis=1)

In [300]:
test_pred

Unnamed: 0,RoomService,FoodCourt,ShoppingMall,Spa,VRDeck,HomePlanet_Earth,HomePlanet_Europa,HomePlanet_Mars,CryoSleep_True,Cabin_S,Destination_55 Cancri e,Destination_PSO J318.5-22,Destination_TRAPPIST-1e,VIP_True,Age_levels_Age_1,Age_levels_Age_2,Age_levels_Age_3,0
0,0.0,0.0,0.0,0.0,0.0,1,0,0,1,1,0,0,1,0,0,0,1,1
1,0.0,9.0,0.0,2823.0,0.0,1,0,0,0,1,0,0,1,0,0,0,1,0
2,0.0,0.0,0.0,0.0,0.0,0,1,0,1,1,1,0,0,0,0,0,1,1
3,0.0,6652.0,0.0,181.0,585.0,0,1,0,0,1,0,0,1,0,0,0,1,1
4,10.0,0.0,635.0,0.0,0.0,1,0,0,0,1,0,0,1,0,0,0,1,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
4272,0.0,0.0,0.0,0.0,0.0,1,0,0,1,1,0,0,1,0,0,0,1,1
4273,0.0,847.0,17.0,10.0,144.0,1,0,0,0,0,0,0,1,0,0,0,1,0
4274,0.0,0.0,0.0,0.0,0.0,0,0,1,1,0,1,0,0,0,0,0,1,1
4275,0.0,2680.0,0.0,0.0,523.0,0,1,0,0,0,0,0,0,0,0,0,1,1


In [301]:
test_pred.rename(columns={0: 'Transported'},
          inplace=True, errors='raise')

In [302]:
test_pred

Unnamed: 0,RoomService,FoodCourt,ShoppingMall,Spa,VRDeck,HomePlanet_Earth,HomePlanet_Europa,HomePlanet_Mars,CryoSleep_True,Cabin_S,Destination_55 Cancri e,Destination_PSO J318.5-22,Destination_TRAPPIST-1e,VIP_True,Age_levels_Age_1,Age_levels_Age_2,Age_levels_Age_3,Transported
0,0.0,0.0,0.0,0.0,0.0,1,0,0,1,1,0,0,1,0,0,0,1,1
1,0.0,9.0,0.0,2823.0,0.0,1,0,0,0,1,0,0,1,0,0,0,1,0
2,0.0,0.0,0.0,0.0,0.0,0,1,0,1,1,1,0,0,0,0,0,1,1
3,0.0,6652.0,0.0,181.0,585.0,0,1,0,0,1,0,0,1,0,0,0,1,1
4,10.0,0.0,635.0,0.0,0.0,1,0,0,0,1,0,0,1,0,0,0,1,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
4272,0.0,0.0,0.0,0.0,0.0,1,0,0,1,1,0,0,1,0,0,0,1,1
4273,0.0,847.0,17.0,10.0,144.0,1,0,0,0,0,0,0,1,0,0,0,1,0
4274,0.0,0.0,0.0,0.0,0.0,0,0,1,1,0,1,0,0,0,0,0,1,1
4275,0.0,2680.0,0.0,0.0,523.0,0,1,0,0,0,0,0,0,0,0,0,1,1


In [303]:
output_log = pd.DataFrame({'PassengerId':test_data.PassengerId, 'Transported':test_pred.Transported})
output_log["Transported"] = output_log["Transported"].astype(bool)
output_log.to_csv('my_submission_log_Cabin.csv', index=False)

In [304]:
output_log.describe()

Unnamed: 0,PassengerId,Transported
count,4277,4277
unique,4277,2
top,0013_01,True
freq,1,2292


RANDOM FOREST

In [305]:
X_train_rf, X_test_rf, y_train_rf, y_test_rf = train_test_split(X, y, test_size = 0.4, random_state=42)

In [306]:
model_rf = RandomForestClassifier()
paramertrs = {'criterion': ["entropy", "gini"], 'n_estimators': range(10, 25, 5), 'max_depth': range(3, 10),
             'min_samples_split': range(2, 10), 'min_samples_leaf': range(2, 10)}
grid_search_cv = GridSearchCV(model_rf, paramertrs, cv=10)
grid_search_cv.fit(X_train_rf, y_train_rf)

GridSearchCV(cv=10, estimator=RandomForestClassifier(),
             param_grid={'criterion': ['entropy', 'gini'],
                         'max_depth': range(3, 10),
                         'min_samples_leaf': range(2, 10),
                         'min_samples_split': range(2, 10),
                         'n_estimators': range(10, 25, 5)})

In [307]:
grid_search_cv.best_params_

{'criterion': 'entropy',
 'max_depth': 8,
 'min_samples_leaf': 5,
 'min_samples_split': 2,
 'n_estimators': 20}

In [308]:
best_rf = grid_search_cv.best_estimator_

In [309]:
y_pred_rf = best_rf.predict(X_test_rf)

In [310]:
print(confusion_matrix(y_test_rf, y_pred_rf))
print(classification_report(y_test_rf, y_pred_rf))

[[1288  426]
 [ 323 1441]]
              precision    recall  f1-score   support

           0       0.80      0.75      0.77      1714
           1       0.77      0.82      0.79      1764

    accuracy                           0.78      3478
   macro avg       0.79      0.78      0.78      3478
weighted avg       0.79      0.78      0.78      3478



In [311]:
X_test_rf_fin=df_test.iloc[:,:]
y_pred_rf_fin = best_rf.predict(X_test_rf_fin)

In [312]:
test_pred_rf = pd.concat([df_test, pd.DataFrame(y_pred_rf_fin)], axis=1)

In [313]:
test_pred_rf.head(5)

Unnamed: 0,RoomService,FoodCourt,ShoppingMall,Spa,VRDeck,HomePlanet_Earth,HomePlanet_Europa,HomePlanet_Mars,CryoSleep_True,Cabin_S,Destination_55 Cancri e,Destination_PSO J318.5-22,Destination_TRAPPIST-1e,VIP_True,Age_levels_Age_1,Age_levels_Age_2,Age_levels_Age_3,0
0,0.0,0.0,0.0,0.0,0.0,1,0,0,1,1,0,0,1,0,0,0,1,1
1,0.0,9.0,0.0,2823.0,0.0,1,0,0,0,1,0,0,1,0,0,0,1,0
2,0.0,0.0,0.0,0.0,0.0,0,1,0,1,1,1,0,0,0,0,0,1,1
3,0.0,6652.0,0.0,181.0,585.0,0,1,0,0,1,0,0,1,0,0,0,1,1
4,10.0,0.0,635.0,0.0,0.0,1,0,0,0,1,0,0,1,0,0,0,1,1


In [314]:
test_pred_rf.rename(columns={0: 'Transported'},
          inplace=True, errors='raise')

In [315]:
output_rf = pd.DataFrame({'PassengerId':test_data.PassengerId, 'Transported':test_pred_rf.Transported})
output_rf["Transported"] = output_rf["Transported"].astype(bool)
output_rf.to_csv('my_submission_Cabin.csv', index=False)

In [316]:
output_rf.value_counts().sum()

4277

In [190]:
output_rf.describe()

Unnamed: 0,PassengerId,Transported
count,4277,4277
unique,4277,2
top,0013_01,False
freq,1,2140
