In [1]:
import numpy as np
import pandas as pd
%matplotlib inline
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.preprocessing import LabelEncoder,OneHotEncoder
from sklearn.impute import SimpleImputer

In [2]:
train_set=pd.read_csv('train.csv')
test_set=pd.read_csv('test.csv')
gender_submission=pd.read_csv('gender_submission.csv')

In [3]:
train_set = train_set.dropna(subset=['Embarked'])
train_set.shape

(889, 12)

In [4]:
X_train_all=train_set.drop(['PassengerId','Survived','Name','Ticket','Cabin'],axis=1)
X_test=test_set.drop(['PassengerId','Name','Ticket','Cabin'],axis=1)
Y_train_all=train_set['Survived']

In [5]:
X_train_all.head(10)

Unnamed: 0,Pclass,Sex,Age,SibSp,Parch,Fare,Embarked
0,3,male,22.0,1,0,7.25,S
1,1,female,38.0,1,0,71.2833,C
2,3,female,26.0,0,0,7.925,S
3,1,female,35.0,1,0,53.1,S
4,3,male,35.0,0,0,8.05,S
5,3,male,,0,0,8.4583,Q
6,1,male,54.0,0,0,51.8625,S
7,3,male,2.0,3,1,21.075,S
8,3,female,27.0,0,2,11.1333,S
9,2,female,14.0,1,0,30.0708,C


In [6]:
s = (X_train_all.dtypes == 'object')
object_cols = list(s[s].index)
object_cols

['Sex', 'Embarked']

In [7]:
t= (X_train_all.dtypes != 'object')
numerical_cols = list(t[t].index)
numerical_cols

['Pclass', 'Age', 'SibSp', 'Parch', 'Fare']

In [8]:
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline

In [44]:
numerical_transformer = SimpleImputer(strategy='constant')
numerical_transformer1 = SimpleImputer(strategy='most_frequent')
categorical_transformer = Pipeline(steps=[
    ('imputer', SimpleImputer(strategy='most_frequent')),
    ('onehot', OneHotEncoder(handle_unknown='ignore'))
])
preprocessor = ColumnTransformer(
    transformers=[
        ('num', numerical_transformer, numerical_cols),
        ('cat', categorical_transformer, object_cols)
    ])
preprocessor1 = ColumnTransformer(
    transformers=[
        ('num', numerical_transformer1, numerical_cols),
        ('cat', categorical_transformer, object_cols)
    ])

In [76]:
from sklearn.neural_network import MLPClassifier

model = MLPClassifier()
model1= MLPClassifier(alpha=0.00001,hidden_layer_sizes=(5, 2), random_state=1)

In [11]:
help(MLPClassifier)

Help on class MLPClassifier in module sklearn.neural_network._multilayer_perceptron:

class MLPClassifier(sklearn.base.ClassifierMixin, BaseMultilayerPerceptron)
 |  MLPClassifier(hidden_layer_sizes=(100,), activation='relu', solver='adam', alpha=0.0001, batch_size='auto', learning_rate='constant', learning_rate_init=0.001, power_t=0.5, max_iter=200, shuffle=True, random_state=None, tol=0.0001, verbose=False, warm_start=False, momentum=0.9, nesterovs_momentum=True, early_stopping=False, validation_fraction=0.1, beta_1=0.9, beta_2=0.999, epsilon=1e-08, n_iter_no_change=10, max_fun=15000)
 |  
 |  Multi-layer Perceptron classifier.
 |  
 |  This model optimizes the log-loss function using LBFGS or stochastic
 |  gradient descent.
 |  
 |  .. versionadded:: 0.18
 |  
 |  Parameters
 |  ----------
 |  hidden_layer_sizes : tuple, length = n_layers - 2, default=(100,)
 |      The ith element represents the number of neurons in the ith
 |      hidden layer.
 |  
 |  activation : {'identity', 

In [77]:
my_pipeline = Pipeline(steps=[('preprocessor', preprocessor1),
                              ('model', model1)
                             ])

In [13]:
from sklearn.model_selection import train_test_split

In [14]:
X_train,X_val,y_train,y_val = train_test_split(X_train_all, Y_train_all, random_state = 0,test_size=0.2)

In [78]:
my_pipeline.fit(X_train, y_train)



Pipeline(memory=None,
         steps=[('preprocessor',
                 ColumnTransformer(n_jobs=None, remainder='drop',
                                   sparse_threshold=0.3,
                                   transformer_weights=None,
                                   transformers=[('num',
                                                  SimpleImputer(add_indicator=False,
                                                                copy=True,
                                                                fill_value=None,
                                                                missing_values=nan,
                                                                strategy='most_frequent',
                                                                verbose=0),
                                                  ['Pclass', 'Age', 'SibSp',
                                                   'Parch', 'Fare']),
                                                 ('cat',
         

In [73]:
y_pred=my_pipeline.predict(X_val)

In [17]:
from sklearn.metrics import classification_report, confusion_matrix

In [74]:
confusion_matrix(y_val,y_pred)

array([[98,  7],
       [43, 30]], dtype=int64)

In [75]:
classification_report(y_val,y_pred)

'              precision    recall  f1-score   support\n\n           0       0.70      0.93      0.80       105\n           1       0.81      0.41      0.55        73\n\n    accuracy                           0.72       178\n   macro avg       0.75      0.67      0.67       178\nweighted avg       0.74      0.72      0.69       178\n'

In [21]:
my_pipeline.fit(X_train_all, Y_train_all)



Pipeline(memory=None,
         steps=[('preprocessor',
                 ColumnTransformer(n_jobs=None, remainder='drop',
                                   sparse_threshold=0.3,
                                   transformer_weights=None,
                                   transformers=[('num',
                                                  SimpleImputer(add_indicator=False,
                                                                copy=True,
                                                                fill_value=None,
                                                                missing_values=nan,
                                                                strategy='constant',
                                                                verbose=0),
                                                  ['Pclass', 'Age', 'SibSp',
                                                   'Parch', 'Fare']),
                                                 ('cat',
              

In [22]:
preds = my_pipeline.predict(X_test)

In [23]:
preds = np.where(preds > 0.5, 1, 0)

In [24]:
preds

array([0, 0, 1, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 1, 1, 0, 0, 1, 1, 0, 0,
       1, 0, 1, 0, 1, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 1,
       1, 0, 0, 0, 1, 1, 0, 0, 1, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 1, 1,
       1, 0, 0, 1, 1, 0, 1, 0, 1, 0, 0, 1, 0, 1, 1, 0, 0, 0, 0, 0, 1, 0,
       1, 1, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0,
       0, 1, 1, 1, 1, 0, 0, 1, 0, 1, 1, 0, 1, 0, 0, 1, 0, 1, 0, 0, 0, 0,
       0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 1, 0, 1, 0, 0, 0, 1, 0, 1, 0, 0, 1,
       0, 0, 1, 0, 1, 1, 1, 0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 1,
       1, 0, 1, 1, 0, 0, 1, 0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0,
       0, 1, 1, 1, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 1, 0, 1, 0,
       1, 0, 1, 0, 1, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1,
       0, 0, 0, 0, 1, 0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 1,
       0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 1, 0, 0, 0, 0, 0, 1, 1, 0, 1, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0,

In [25]:
ss1=pd.read_csv('gender_submission.csv')
ss1=ss1.drop('Survived',axis=1)
ss1['Survived']=preds
ss1.to_csv('submission10.csv',index=False)