In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 5GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

In [None]:
#Importing the datasets

df_train = pd.read_csv('/kaggle/input/titanic/train.csv')
df_test = pd.read_csv('/kaggle/input/titanic/test.csv')

In [None]:
#Shape of the two datasets

print(df_train.shape)
print(df_test.shape)

In [None]:
df_train.head()

In [None]:
df_test.head()

## *Checking the information & missing values 

In [None]:
df_train.info()

In [None]:
df_test.info()

In [None]:
df_train.isnull().sum()

In [None]:
df_test.isnull().sum()

## *Data Preprocessing

### 1.Filling the missing values

In [None]:
df_train['Cabin'].fillna(0,inplace=True)      #Filling Cabin Column

In [None]:
def transform_cabin(cabin):
    if cabin!=0:
        return 1
    else:
        return 0

In [None]:
df_train['Cabin'] = df_train['Cabin'].apply(transform_cabin)

In [None]:
df_test['Cabin'].fillna(0,inplace=True)
df_test['Cabin'] = df_test['Cabin'].apply(transform_cabin)

In [None]:
df_train.head()

In [None]:
df_train['Age'].fillna(method='backfill',inplace=True)    #Filling Age Column
df_test['Age'].fillna(method='pad',inplace=True)

In [None]:
#Filling Fare column in test

df_test['Fare'].fillna(0,inplace=True)

In [None]:
#Filling Embarked column in train

df_train['Embarked'].fillna('S',inplace=True)

In [None]:
df_train.head()

### 2.EDA

In [None]:
# Checking the relation of the columns with output

import matplotlib.pyplot as plt

In [None]:
df_train.groupby('Pclass').mean()['Survived'].plot(kind='bar')

In [None]:
df_train.groupby('SibSp').mean()['Survived'].plot(kind='bar')

In [None]:
df_train.groupby('Parch').mean()['Survived'].plot(kind='bar')

In [None]:
df_train.groupby('Cabin').mean()['Survived'].plot(kind='bar')

In [None]:
df_train.groupby('Embarked').mean()['Survived'].plot(kind='pie' , autopct='%1.2f%%')

In [None]:
df_train.groupby('Sex').mean()['Survived'].plot(kind='bar')

In [None]:
#Dropping some columns

df_train.drop(columns=['Name','Ticket'],inplace=True)
df_test.drop(columns=['Name','Ticket'],inplace=True)

In [None]:
#Creating a new columns Family

df_train['Family']=df_train['SibSp']+df_train['Parch']+1
df_test['Family']=df_test['SibSp']+df_test['Parch']+1

In [None]:
df_train.groupby('Family').mean()['Survived'].plot(kind='bar')

In [None]:
#New column Family_type

def family_type(size):
    if size==1:
        return 'Alone'
    elif size<=4:
        return 'Small'
    else:
        return 'Large'

In [None]:
df_train['Family_type'] = df_train['Family'].apply(family_type)
df_test['Family_type'] = df_test['Family'].apply(family_type)

In [None]:
df_train.drop(columns=['PassengerId','SibSp','Parch','Family'],inplace=True)
df_test.drop(columns=['PassengerId','SibSp','Parch','Family'],inplace=True)

In [None]:
#LabelEncoding

from sklearn.preprocessing import LabelEncoder
encoder = LabelEncoder()

In [None]:
df_train['Sex'] = encoder.fit_transform(df_train['Sex'])
df_test['Sex'] = encoder.fit_transform(df_test['Sex'])

In [None]:
df_train['Embarked'] = encoder.fit_transform(df_train['Embarked'])
df_test['Embarked'] = encoder.fit_transform(df_test['Embarked'])

In [None]:
df_train['Family_type'] = encoder.fit_transform(df_train['Family_type'])
df_test['Family_type'] = encoder.fit_transform(df_test['Family_type'])

In [None]:
df_train.head()

In [None]:
df_test.head()

## *Fetching X and y

In [None]:
X = df_train.iloc[:,1:].values
y = df_train.iloc[:,0].values

## *Train_Test Split

In [None]:
from sklearn.model_selection import train_test_split
X_train,X_test,y_train,y_test = train_test_split(X,y,test_size=0.2,random_state=0)

In [None]:
print(X_train.shape)
print(y_train.shape)

In [None]:
from sklearn.tree import DecisionTreeClassifier
clf=DecisionTreeClassifier()

In [None]:
clf.fit(X_train,y_train)

In [None]:
y_pred=clf.predict(X_test)

In [None]:
from sklearn.metrics import accuracy_score
accuracy_score(y_test,y_pred)

## *Applying ANN

In [None]:
# Importing keras

import tensorflow
from tensorflow import keras
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense

In [None]:
model = Sequential()

In [None]:
X_train.shape

In [None]:
# Adding layers

#adding the first hidden layer
model.add(Dense(units = 5 ,activation ='softmax' , input_dim = X.shape[1]))

model.add(Dense(units = 5 , activation ='softmax'))

#adding output layer
model.add(Dense(units = 1))


In [None]:
model.summary()

In [None]:
# Compliling ANN

model.compile(optimizer='Adam' , loss='binary_crossentropy' , metrics=['accuracy'])

In [None]:
model.fit(X_train, y_train, batch_size=10, epochs=100 , verbose=1)

In [None]:
y_pred = model.predict_classes(X_test)

In [None]:
y_pred

In [None]:
len(y_pred)