In [4]:
#Import libraries
import pandas as pd
import seaborn as sns
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import classification_report, confusion_matrix

In [5]:
#Import dataset
df= sns.load_dataset("titanic")
df.head()

Unnamed: 0,survived,pclass,sex,age,sibsp,parch,fare,embarked,class,who,adult_male,deck,embark_town,alive,alone
0,0,3,male,22.0,1,0,7.25,S,Third,man,True,,Southampton,no,False
1,1,1,female,38.0,1,0,71.2833,C,First,woman,False,C,Cherbourg,yes,False
2,1,3,female,26.0,0,0,7.925,S,Third,woman,False,,Southampton,yes,True
3,1,1,female,35.0,1,0,53.1,S,First,woman,False,C,Southampton,yes,False
4,0,3,male,35.0,0,0,8.05,S,Third,man,True,,Southampton,no,True


In [12]:
# Remove the following non-numeric columns
columns_to_drop = [
    "pclass", "sibsp", "parch", "fare", "class", "who",
    "adult_male", "deck", "embark_town", "alive", "alone"
]
df = df.drop(columns=[c for c in columns_to_drop if c in df.columns])
df.head()

Unnamed: 0,survived,sex,age,embarked
0,0,male,22.0,S
1,1,female,38.0,C
2,1,female,26.0,S
3,1,female,35.0,S
4,0,male,35.0,S


In [14]:
#Drop rows containing missing values
df = df.dropna()
df.head()

Unnamed: 0,survived,sex,age,embarked
0,0,male,22.0,S
1,1,female,38.0,C
2,1,female,26.0,S
3,1,female,35.0,S
4,0,male,35.0,S


In [15]:
#One-hot encoding to convert the 'sex' and 'embarked' to boolean columns
df = pd.get_dummies(df, columns=["sex", "embarked"], drop_first=False)
df.head()

Unnamed: 0,survived,age,sex_female,sex_male,embarked_C,embarked_Q,embarked_S
0,0,22.0,False,True,False,False,True
1,1,38.0,True,False,True,False,False
2,1,26.0,True,False,False,False,True
3,1,35.0,True,False,False,False,True
4,0,35.0,False,True,False,False,True


In [16]:
#Assign 'Survived' as the dependent and independent variables
y = df["survived"]
X = df[["age","sex_female","sex_male","embarked_C","embarked_Q","embarked_S"]]
df.head()

Unnamed: 0,survived,age,sex_female,sex_male,embarked_C,embarked_Q,embarked_S
0,0,22.0,False,True,False,False,True
1,1,38.0,True,False,True,False,False
2,1,26.0,True,False,False,False,True
3,1,35.0,True,False,False,False,True
4,0,35.0,False,True,False,False,True


In [18]:
#Split data into train/test sets (70/30) & shuffle
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, shuffle=True, random_state=42)
df.head()

Unnamed: 0,survived,age,sex_female,sex_male,embarked_C,embarked_Q,embarked_S
0,0,22.0,False,True,False,False,True
1,1,38.0,True,False,True,False,False
2,1,26.0,True,False,False,False,True
3,1,35.0,True,False,False,False,True
4,0,35.0,False,True,False,False,True


In [19]:
#Assign the Logicstic Regression model & link the X and Y variables
model = LogisticRegression(max_iter=1000)
model.fit(X_train, y_train)
df.head()

Unnamed: 0,survived,age,sex_female,sex_male,embarked_C,embarked_Q,embarked_S
0,0,22.0,False,True,False,False,True
1,1,38.0,True,False,True,False,False
2,1,26.0,True,False,False,False,True
3,1,35.0,True,False,False,False,True
4,0,35.0,False,True,False,False,True


In [23]:
#Evaluate predictions using the confusion_matrix and classfication_report from sci-kit learn
y_pred = model.predict(X_test)
confusion_matrix(y_test, y_pred)

classification_report(y_test, y_pred)

'              precision    recall  f1-score   support\n\n           0       0.75      0.84      0.80       122\n           1       0.75      0.63      0.69        92\n\n    accuracy                           0.75       214\n   macro avg       0.75      0.74      0.74       214\nweighted avg       0.75      0.75      0.75       214\n'

In [24]:
#Create a data point and predict survival
passenger = [
    22, # age
    0,  # sex_female
    1,  # sex_male
    0,  # embarked_C
    0,  # embarked_Q
    1   # embarked_S
]

predict_passenger = model.predict([passenger])
predict_passenger



array([0], dtype=int64)

In [26]:
# count of values in the survived column
titanic = sns.load_dataset("titanic")
print(titanic["survived"].value_counts()) 

survived
0    549
1    342
Name: count, dtype: int64
