In [21]:
# Import tools
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler

In [22]:
# Load the passenger data
passengers = pd.read_csv('passengers.csv')
passengers.head()

Unnamed: 0,PassengerId,Survived,Pclass,Name,Sex,Age,SibSp,Parch,Ticket,Fare,Cabin,Embarked
0,1,0,3,"Braund, Mr. Owen Harris",male,22.0,1,0,A/5 21171,7.25,,S
1,2,1,1,"Cumings, Mrs. John Bradley (Florence Briggs Th...",female,38.0,1,0,PC 17599,71.2833,C85,C
2,3,1,3,"Heikkinen, Miss. Laina",female,26.0,0,0,STON/O2. 3101282,7.925,,S
3,4,1,1,"Futrelle, Mrs. Jacques Heath (Lily May Peel)",female,35.0,1,0,113803,53.1,C123,S
4,5,0,3,"Allen, Mr. William Henry",male,35.0,0,0,373450,8.05,,S


In [23]:
# Fill the nan values in the age column
passengers['Age'].fillna(value = round(passengers['Age'].mean()), inplace = True)
passengers.head()

Unnamed: 0,PassengerId,Survived,Pclass,Name,Sex,Age,SibSp,Parch,Ticket,Fare,Cabin,Embarked
0,1,0,3,"Braund, Mr. Owen Harris",male,22.0,1,0,A/5 21171,7.25,,S
1,2,1,1,"Cumings, Mrs. John Bradley (Florence Briggs Th...",female,38.0,1,0,PC 17599,71.2833,C85,C
2,3,1,3,"Heikkinen, Miss. Laina",female,26.0,0,0,STON/O2. 3101282,7.925,,S
3,4,1,1,"Futrelle, Mrs. Jacques Heath (Lily May Peel)",female,35.0,1,0,113803,53.1,C123,S
4,5,0,3,"Allen, Mr. William Henry",male,35.0,0,0,373450,8.05,,S


In [24]:
# Create a first class column
passengers['FirstClass'] = passengers.Pclass.apply( lambda p: 1 if p == 1 else 0)
passengers.head()

Unnamed: 0,PassengerId,Survived,Pclass,Name,Sex,Age,SibSp,Parch,Ticket,Fare,Cabin,Embarked,FirstClass
0,1,0,3,"Braund, Mr. Owen Harris",male,22.0,1,0,A/5 21171,7.25,,S,0
1,2,1,1,"Cumings, Mrs. John Bradley (Florence Briggs Th...",female,38.0,1,0,PC 17599,71.2833,C85,C,1
2,3,1,3,"Heikkinen, Miss. Laina",female,26.0,0,0,STON/O2. 3101282,7.925,,S,0
3,4,1,1,"Futrelle, Mrs. Jacques Heath (Lily May Peel)",female,35.0,1,0,113803,53.1,C123,S,1
4,5,0,3,"Allen, Mr. William Henry",male,35.0,0,0,373450,8.05,,S,0


In [25]:
passengers['SecondClass'] = passengers.Pclass.apply( lambda p: 1 if p == 2 else 0)
passengers['Sex_binary'] = passengers.Sex.map({"male": 0, "female": 1})
passengers.head()

Unnamed: 0,PassengerId,Survived,Pclass,Name,Sex,Age,SibSp,Parch,Ticket,Fare,Cabin,Embarked,FirstClass,SecondClass,Sex_binary
0,1,0,3,"Braund, Mr. Owen Harris",male,22.0,1,0,A/5 21171,7.25,,S,0,0,0
1,2,1,1,"Cumings, Mrs. John Bradley (Florence Briggs Th...",female,38.0,1,0,PC 17599,71.2833,C85,C,1,0,1
2,3,1,3,"Heikkinen, Miss. Laina",female,26.0,0,0,STON/O2. 3101282,7.925,,S,0,0,1
3,4,1,1,"Futrelle, Mrs. Jacques Heath (Lily May Peel)",female,35.0,1,0,113803,53.1,C123,S,1,0,1
4,5,0,3,"Allen, Mr. William Henry",male,35.0,0,0,373450,8.05,,S,0,0,0


In [26]:
# Select the desired features
features = passengers[['Sex_binary', 'Age', 'FirstClass', 'SecondClass']]
survival = passengers['Survived']

In [27]:
# Perform train, test, split
train_features, test_features, train_labels, test_labels = train_test_split(features, survival)

In [28]:
# Scale the feature data so it has mean = 0 and standard deviation = 1
scaler = StandardScaler()
train_features = scaler.fit_transform(train_features)
test_features = scaler.transform(test_features)

In [29]:
# Create and train the model
model = LogisticRegression()
model.fit(train_features, train_labels)

In [30]:
# Score the model on the train data
print(model.score(train_features, train_labels))

0.7829341317365269


In [31]:
# Score the model on the test data
print(model.score(test_features, test_labels))

0.7937219730941704


In [32]:
# Analyze the coefficients
print(model.coef_)

[[ 1.22278934 -0.43743178  1.02688491  0.4682454 ]]


In [33]:
# Sample passenger features
Jack = np.array([0.0,20.0,0.0,0.0])
Rose = np.array([1.0,17.0,1.0,0.0])
You = np.array([0.0,27.0,0.0,1.0])

In [34]:
# Combine passenger arrays
sample_passengers = np.array([Jack, Rose, You])

In [35]:
# Scale the sample passenger features
sample_passengers = scaler.transform(sample_passengers)



In [36]:
# Make survival predictions!
print(model.predict(sample_passengers))
print(model.predict_proba(sample_passengers))

[0 1 0]
[[0.87466386 0.12533614]
 [0.04118329 0.95881671]
 [0.73690531 0.26309469]]


The [0 1 0] means Jack = "Dead" , Rose = "Survived" , Me = "Dead"

    [[0.87466386 0.12533614]    [Chance of Death vs. Chance of Survival]

    [0.04118329 0.95881671]     [Chance of Death vs. Chance of Survival]
    
    [0.73690531 0.26309469]]    [Chance of Death vs. Chance of Survival]