In [None]:
import codecademylib3_seaborn
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler

# Load the passenger data
passengers = pd.read_csv('passengers.csv')
print(passengers.head())


# Update sex column to numerical
passengers.replace('female',1,inplace = True)
passengers.replace('male',0,inplace = True)

# Fill the nan values in the age column
passengers.fillna(passengers.Age.mean(),inplace=True)
print(passengers.Age)


# Create a first class column
passengers['FirstClass'] = passengers.Pclass
#passengers.replace(2,0,inplace = True)
passengers.FirstClass.replace(3,0,inplace = True)
passengers.FirstClass.replace(2,0,inplace = True)


# Create a second class column
passengers['SecondClass'] = passengers.Pclass
passengers.SecondClass.replace(3,0,inplace = True)
passengers.SecondClass.replace(2,1,inplace = True)
passengers.SecondClass.replace(1,0,inplace = True)

print(passengers.head(50))

# Select the desired features
features = passengers[['Sex','Age','FirstClass','SecondClass']]
survival = passengers[['Survived']]


# Perform train, test, split
X_train,X_test,y_train,y_test = train_test_split(features,survival,test_size = 0.8,random_state=50)


# Scale the feature data so it has mean = 0 and standard deviation = 1
scaler = StandardScaler()
X_train_standart = scaler.fit_transform(X_train)
X_test_standart = scaler.transform(X_test)


# Create and train the model
model = LogisticRegression().fit(X_train_standart,y_train)


# Score the model on the train data
score = model.score(X_train_standart,y_train)
print("Model Score on train set is {}".format(score))


# Score the model on the test data
score = model.score(X_test_standart,y_test)
print("Model Score on test set is {}".format(score))

# Analyze the coefficients
print("Regression coefficient: {}".format(model.coef_))


# Sample passenger features
Jack = np.array([[0.0,20.0,0.0,0.0]])
Rose = np.array([[1.0,17.0,1.0,0.0]])
You = np.array([[0.0,41.0,1.0,0.0]])
Mary = np.array([[1.0,37.0,1.0,0.0]])
Liza = np.array([[1.0,3.5,1.0,0.0]])

# Combine passenger arrays
sample_passengers = np.concatenate((Jack,Rose,You,Mary,Liza))
print(sample_passengers)

# Scale the sample passenger features
sample_passengers_standart = scaler.transform(sample_passengers) 
print(sample_passengers_standart)


# Make survival predictions!
survive_me = model.predict_proba(sample_passengers)
print('Will I survive? {}'.format(survive_me))