In [1]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

/kaggle/input/titanic/train.csv
/kaggle/input/titanic/test.csv
/kaggle/input/titanic/gender_submission.csv


In [2]:
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score
from sklearn.preprocessing import LabelEncoder
import warnings
warnings.filterwarnings('ignore')

In [3]:
trainOriginalData = pd.read_csv('/kaggle/input/titanic/train.csv')
testOriginalData = pd.read_csv('/kaggle/input/titanic/test.csv')
print(trainOriginalData.head(5))
print('\n')
print(testOriginalData.head(5))

   PassengerId  Survived  Pclass  \
0            1         0       3   
1            2         1       1   
2            3         1       3   
3            4         1       1   
4            5         0       3   

                                                Name     Sex   Age  SibSp  \
0                            Braund, Mr. Owen Harris    male  22.0      1   
1  Cumings, Mrs. John Bradley (Florence Briggs Th...  female  38.0      1   
2                             Heikkinen, Miss. Laina  female  26.0      0   
3       Futrelle, Mrs. Jacques Heath (Lily May Peel)  female  35.0      1   
4                           Allen, Mr. William Henry    male  35.0      0   

   Parch            Ticket     Fare Cabin Embarked  
0      0         A/5 21171   7.2500   NaN        S  
1      0          PC 17599  71.2833   C85        C  
2      0  STON/O2. 3101282   7.9250   NaN        S  
3      0            113803  53.1000  C123        S  
4      0            373450   8.0500   NaN        S  




In [None]:
print(trainOriginalData.isna().sum(),"\n")
print(trainOriginalData.isna().sum())


In [None]:
trainData = trainOriginalData.drop(["Name","Sex","Ticket","Cabin","Embarked","Cabin"], axis = 1)
testData = testOriginalData.drop(["Name", "Sex", "Ticket", "Cabin","Embarked","Cabin"], axis = 1)

In [None]:
yTrain = trainData["Survived"]
xTrain = trainData.fillna(trainData.mode())
xTrain = xTrain.drop(["Survived"],axis = 1)
xTest = testData.fillna(trainData.mode())

In [None]:
print(xTrain.isna().sum(),"\n")
print(xTest.isna().sum())
xTrain['Fare'] = xTrain['Fare'].fillna(xTrain['Fare'].mean())
xTrain['Age'] = xTrain['Age'].fillna(xTrain['Age'].mean())
xTest['Age'] = xTest['Age'].fillna(xTest['Age'].mean())
xTest['Fare'] = xTest['Fare'].fillna(xTest['Fare'].mean())
print(xTest.isna().sum())

In [None]:
model = RandomForestClassifier(n_estimators=100, max_depth=10, random_state=100, min_samples_leaf= 3)
model.fit(xTrain, yTrain)

In [None]:
predictions = model.predict(xTest)

output = pd.DataFrame({'PassengerId': xTest.PassengerId, 'Survived': predictions})
output.to_csv('submissionBasicRF.csv', index=False)
print("Your submission was successfully saved!")

In [None]:
from sklearn.neural_network import MLPClassifier

In [None]:
num_epochs = 30
num_hidden_units = 2048
learning_rate_init = 0.001
activation = 'relu'
SEED = 100

In [None]:
mlp = MLPClassifier(
    hidden_layer_sizes=(num_hidden_units,),
    activation=activation,
    learning_rate_init=learning_rate_init,
    warm_start=True,
    verbose=True,
    random_state=SEED,
    early_stopping=True,
    n_iter_no_change=100, # large number of iterations to avoid early stopping message
)

In [None]:
mlp.fit(xTrain, yTrain)

In [None]:
predictions = mlp.predict(xTest)
output = pd.DataFrame({'PassengerId': xTest.PassengerId, 'Survived': predictions})
output.to_csv('submissionBasicCNN.csv', index=False)
print("Your submission was successfully saved!")

## Adding feature treatment

In [13]:
trainOriginalData = pd.read_csv('/kaggle/input/titanic/train.csv')
testOriginalData = pd.read_csv('/kaggle/input/titanic/test.csv')

In [14]:
trainOriginalData

Unnamed: 0,PassengerId,Survived,Pclass,Name,Sex,Age,SibSp,Parch,Ticket,Fare,Cabin,Embarked
0,1,0,3,"Braund, Mr. Owen Harris",male,22.0,1,0,A/5 21171,7.2500,,S
1,2,1,1,"Cumings, Mrs. John Bradley (Florence Briggs Th...",female,38.0,1,0,PC 17599,71.2833,C85,C
2,3,1,3,"Heikkinen, Miss. Laina",female,26.0,0,0,STON/O2. 3101282,7.9250,,S
3,4,1,1,"Futrelle, Mrs. Jacques Heath (Lily May Peel)",female,35.0,1,0,113803,53.1000,C123,S
4,5,0,3,"Allen, Mr. William Henry",male,35.0,0,0,373450,8.0500,,S
...,...,...,...,...,...,...,...,...,...,...,...,...
886,887,0,2,"Montvila, Rev. Juozas",male,27.0,0,0,211536,13.0000,,S
887,888,1,1,"Graham, Miss. Margaret Edith",female,19.0,0,0,112053,30.0000,B42,S
888,889,0,3,"Johnston, Miss. Catherine Helen ""Carrie""",female,,1,2,W./C. 6607,23.4500,,S
889,890,1,1,"Behr, Mr. Karl Howell",male,26.0,0,0,111369,30.0000,C148,C


In [15]:
trainOriginalData['Sex'] = pd.get_dummies(trainOriginalData['Sex'], dtype=int)["male"]

In [16]:
trainOriginalData

Unnamed: 0,PassengerId,Survived,Pclass,Name,Sex,Age,SibSp,Parch,Ticket,Fare,Cabin,Embarked
0,1,0,3,"Braund, Mr. Owen Harris",1,22.0,1,0,A/5 21171,7.2500,,S
1,2,1,1,"Cumings, Mrs. John Bradley (Florence Briggs Th...",0,38.0,1,0,PC 17599,71.2833,C85,C
2,3,1,3,"Heikkinen, Miss. Laina",0,26.0,0,0,STON/O2. 3101282,7.9250,,S
3,4,1,1,"Futrelle, Mrs. Jacques Heath (Lily May Peel)",0,35.0,1,0,113803,53.1000,C123,S
4,5,0,3,"Allen, Mr. William Henry",1,35.0,0,0,373450,8.0500,,S
...,...,...,...,...,...,...,...,...,...,...,...,...
886,887,0,2,"Montvila, Rev. Juozas",1,27.0,0,0,211536,13.0000,,S
887,888,1,1,"Graham, Miss. Margaret Edith",0,19.0,0,0,112053,30.0000,B42,S
888,889,0,3,"Johnston, Miss. Catherine Helen ""Carrie""",0,,1,2,W./C. 6607,23.4500,,S
889,890,1,1,"Behr, Mr. Karl Howell",1,26.0,0,0,111369,30.0000,C148,C


In [17]:
trainOriginalData["Age"] = trainOriginalData["Age"]/trainOriginalData["Age"].max()
trainOriginalData['Age'] = trainOriginalData['Age'].fillna(trainOriginalData['Age'].mean())

In [18]:
trainOriginalData

Unnamed: 0,PassengerId,Survived,Pclass,Name,Sex,Age,SibSp,Parch,Ticket,Fare,Cabin,Embarked
0,1,0,3,"Braund, Mr. Owen Harris",1,0.275000,1,0,A/5 21171,7.2500,,S
1,2,1,1,"Cumings, Mrs. John Bradley (Florence Briggs Th...",0,0.475000,1,0,PC 17599,71.2833,C85,C
2,3,1,3,"Heikkinen, Miss. Laina",0,0.325000,0,0,STON/O2. 3101282,7.9250,,S
3,4,1,1,"Futrelle, Mrs. Jacques Heath (Lily May Peel)",0,0.437500,1,0,113803,53.1000,C123,S
4,5,0,3,"Allen, Mr. William Henry",1,0.437500,0,0,373450,8.0500,,S
...,...,...,...,...,...,...,...,...,...,...,...,...
886,887,0,2,"Montvila, Rev. Juozas",1,0.337500,0,0,211536,13.0000,,S
887,888,1,1,"Graham, Miss. Margaret Edith",0,0.237500,0,0,112053,30.0000,B42,S
888,889,0,3,"Johnston, Miss. Catherine Helen ""Carrie""",0,0.371239,1,2,W./C. 6607,23.4500,,S
889,890,1,1,"Behr, Mr. Karl Howell",1,0.325000,0,0,111369,30.0000,C148,C


In [21]:
trainOriginalData = trainOriginalData.drop(["Ticket","Cabin","Name"], axis = 1)
testOriginalData = testOriginalData.drop(["Ticket","Cabin","Name"], axis = 1)

In [22]:
trainOriginalData

Unnamed: 0,PassengerId,Survived,Pclass,Sex,Age,SibSp,Parch,Fare,Embarked
0,1,0,3,1,0.275000,1,0,7.2500,S
1,2,1,1,0,0.475000,1,0,71.2833,C
2,3,1,3,0,0.325000,0,0,7.9250,S
3,4,1,1,0,0.437500,1,0,53.1000,S
4,5,0,3,1,0.437500,0,0,8.0500,S
...,...,...,...,...,...,...,...,...,...
886,887,0,2,1,0.337500,0,0,13.0000,S
887,888,1,1,0,0.237500,0,0,30.0000,S
888,889,0,3,0,0.371239,1,2,23.4500,S
889,890,1,1,1,0.325000,0,0,30.0000,C


In [23]:
pd.get_dummies(trainOriginalData['Embarked'], dtype=int)

Unnamed: 0,C,Q,S
0,0,0,1
1,1,0,0
2,0,0,1
3,0,0,1
4,0,0,1
...,...,...,...
886,0,0,1
887,0,0,1
888,0,0,1
889,1,0,0


In [24]:
pd.concat([trainOriginalData, pd.get_dummies(trainOriginalData['Embarked'], dtype=int)], axis=1)

Unnamed: 0,PassengerId,Survived,Pclass,Sex,Age,SibSp,Parch,Fare,Embarked,C,Q,S
0,1,0,3,1,0.275000,1,0,7.2500,S,0,0,1
1,2,1,1,0,0.475000,1,0,71.2833,C,1,0,0
2,3,1,3,0,0.325000,0,0,7.9250,S,0,0,1
3,4,1,1,0,0.437500,1,0,53.1000,S,0,0,1
4,5,0,3,1,0.437500,0,0,8.0500,S,0,0,1
...,...,...,...,...,...,...,...,...,...,...,...,...
886,887,0,2,1,0.337500,0,0,13.0000,S,0,0,1
887,888,1,1,0,0.237500,0,0,30.0000,S,0,0,1
888,889,0,3,0,0.371239,1,2,23.4500,S,0,0,1
889,890,1,1,1,0.325000,0,0,30.0000,C,1,0,0
