# We are gonna use the concept of multiple linear regression and use the framework of tensorflow to predict if a passenger will survive or not

In [44]:
import numpy as np
import pandas as pd
import tensorflow as tf
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler

In [3]:
data = pd.read_csv('tested.csv')

In [4]:
data

Unnamed: 0,PassengerId,Survived,Pclass,Name,Sex,Age,SibSp,Parch,Ticket,Fare,Cabin,Embarked
0,892,0,3,"Kelly, Mr. James",male,34.5,0,0,330911,7.8292,,Q
1,893,1,3,"Wilkes, Mrs. James (Ellen Needs)",female,47.0,1,0,363272,7.0000,,S
2,894,0,2,"Myles, Mr. Thomas Francis",male,62.0,0,0,240276,9.6875,,Q
3,895,0,3,"Wirz, Mr. Albert",male,27.0,0,0,315154,8.6625,,S
4,896,1,3,"Hirvonen, Mrs. Alexander (Helga E Lindqvist)",female,22.0,1,1,3101298,12.2875,,S
...,...,...,...,...,...,...,...,...,...,...,...,...
413,1305,0,3,"Spector, Mr. Woolf",male,,0,0,A.5. 3236,8.0500,,S
414,1306,1,1,"Oliva y Ocana, Dona. Fermina",female,39.0,0,0,PC 17758,108.9000,C105,C
415,1307,0,3,"Saether, Mr. Simon Sivertsen",male,38.5,0,0,SOTON/O.Q. 3101262,7.2500,,S
416,1308,0,3,"Ware, Mr. Frederick",male,,0,0,359309,8.0500,,S


In [6]:
data.isnull().sum()

PassengerId      0
Survived         0
Pclass           0
Name             0
Sex              0
Age             86
SibSp            0
Parch            0
Ticket           0
Fare             1
Cabin          327
Embarked         0
dtype: int64

# We can see the sum of all the null values above

# now we will drop cabin as there is almost no use of cabin for prediction

In [7]:
data=data.drop(columns="Cabin",axis=1)

In [8]:
data

Unnamed: 0,PassengerId,Survived,Pclass,Name,Sex,Age,SibSp,Parch,Ticket,Fare,Embarked
0,892,0,3,"Kelly, Mr. James",male,34.5,0,0,330911,7.8292,Q
1,893,1,3,"Wilkes, Mrs. James (Ellen Needs)",female,47.0,1,0,363272,7.0000,S
2,894,0,2,"Myles, Mr. Thomas Francis",male,62.0,0,0,240276,9.6875,Q
3,895,0,3,"Wirz, Mr. Albert",male,27.0,0,0,315154,8.6625,S
4,896,1,3,"Hirvonen, Mrs. Alexander (Helga E Lindqvist)",female,22.0,1,1,3101298,12.2875,S
...,...,...,...,...,...,...,...,...,...,...,...
413,1305,0,3,"Spector, Mr. Woolf",male,,0,0,A.5. 3236,8.0500,S
414,1306,1,1,"Oliva y Ocana, Dona. Fermina",female,39.0,0,0,PC 17758,108.9000,C
415,1307,0,3,"Saether, Mr. Simon Sivertsen",male,38.5,0,0,SOTON/O.Q. 3101262,7.2500,S
416,1308,0,3,"Ware, Mr. Frederick",male,,0,0,359309,8.0500,S


In [9]:
data1=data

## we will drop more irrelevant columns

In [11]:
data = data.drop(['PassengerId', 'Name', 'Ticket', 'Embarked'], axis=1)


In [12]:
data

Unnamed: 0,Survived,Pclass,Sex,Age,SibSp,Parch,Fare
0,0,3,male,34.5,0,0,7.8292
1,1,3,female,47.0,1,0,7.0000
2,0,2,male,62.0,0,0,9.6875
3,0,3,male,27.0,0,0,8.6625
4,1,3,female,22.0,1,1,12.2875
...,...,...,...,...,...,...,...
413,0,3,male,,0,0,8.0500
414,1,1,female,39.0,0,0,108.9000
415,0,3,male,38.5,0,0,7.2500
416,0,3,male,,0,0,8.0500


In [None]:
data = pd.get_dummies(data, columns=['Sex'])

# # Convert categorical variables to one-hot encoding

In [17]:
data

Unnamed: 0,Survived,Pclass,Age,SibSp,Parch,Fare,Sex_female,Sex_male
0,0,3,34.5,0,0,7.8292,0,1
1,1,3,47.0,1,0,7.0000,1,0
2,0,2,62.0,0,0,9.6875,0,1
3,0,3,27.0,0,0,8.6625,0,1
4,1,3,22.0,1,1,12.2875,1,0
...,...,...,...,...,...,...,...,...
413,0,3,,0,0,8.0500,0,1
414,1,1,39.0,0,0,108.9000,1,0
415,0,3,38.5,0,0,7.2500,0,1
416,0,3,,0,0,8.0500,0,1


## filling the missing values with the mean 

In [18]:
data = data.fillna(data.mean())

In [19]:
data

Unnamed: 0,Survived,Pclass,Age,SibSp,Parch,Fare,Sex_female,Sex_male
0,0,3,34.50000,0,0,7.8292,0,1
1,1,3,47.00000,1,0,7.0000,1,0
2,0,2,62.00000,0,0,9.6875,0,1
3,0,3,27.00000,0,0,8.6625,0,1
4,1,3,22.00000,1,1,12.2875,1,0
...,...,...,...,...,...,...,...,...
413,0,3,30.27259,0,0,8.0500,0,1
414,1,1,39.00000,0,0,108.9000,1,0
415,0,3,38.50000,0,0,7.2500,0,1
416,0,3,30.27259,0,0,8.0500,0,1


In [20]:
data.isnull().sum()

Survived      0
Pclass        0
Age           0
SibSp         0
Parch         0
Fare          0
Sex_female    0
Sex_male      0
dtype: int64

## we have no more null values

## setting up training and testing data

## here x acts as the independent variable and y as independent 

In [22]:
X = data.drop('Survived', axis=1)
y = data['Survived']

In [23]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

## standardizing our test and train data

In [24]:
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

# Creating our neural network


In [26]:
model = tf.keras.models.Sequential([
    tf.keras.layers.Dense(64, activation='relu', input_shape=(X_train.shape[1],)),
    tf.keras.layers.Dense(64, activation='relu'),
    tf.keras.layers.Dense(1, activation='sigmoid')
])

In [27]:
# Compile the model
model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])

In [28]:
model.fit(X_train, y_train, epochs=50, batch_size=32, validation_data=(X_test, y_test))

Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 30/50
Epoch 31/50
Epoch 32/50
Epoch 33/50
Epoch 34/50
Epoch 35/50
Epoch 36/50
Epoch 37/50
Epoch 38/50
Epoch 39/50
Epoch 40/50
Epoch 41/50
Epoch 42/50
Epoch 43/50
Epoch 44/50
Epoch 45/50
Epoch 46/50
Epoch 47/50
Epoch 48/50
Epoch 49/50
Epoch 50/50


<keras.callbacks.History at 0x20ed199c640>

In [35]:
loss, accuracy = model.evaluate(X_test, y_test)
print(f'Test loss: {loss:.2f}')
print(f'Test accuracy: {accuracy:.2f}')

Test loss: 0.00
Test accuracy: 1.00


# now we will use prediction

## We are taking an imaginary passenger whom
## 2: Class
## 19: Age
## 1: number of siblings
## 0: Parch
## 27: Fare
## 1: Female [True]
## 0: Male [False]

In [41]:
random_data = np.array([[2,19,1,0,27,1,0]])
scaled_data = scaler.transform(random_data)
predictions = model.predict(scaled_data)



## if prediction > 0.5 then passenger will survive

In [42]:
# Convert predictions to binary labels (0 or 1)
binary_predictions = (predictions > 0.5).astype(int)

# Print the predictions
if binary_predictions[0][0] == 0:
    print("The passenger is predicted to not survive.")
else:
    print("The passenger is predicted to survive.")

The passenger is predicted to survive.
