In [86]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn import datasets
from sklearn.linear_model import LogisticRegression
from sklearn.tree import DecisionTreeClassifier
from sklearn.naive_bayes import GaussianNB
try:
    import lightgbm as lgb
except ImportError:
    !pip install lightgbm
    import lightgbm as lgb

In [87]:
titanic_url = 'https://raw.githubusercontent.com/datasciencedojo/datasets/master/titanic.csv'
df = pd.read_csv(titanic_url)
df.head()

Unnamed: 0,PassengerId,Survived,Pclass,Name,Sex,Age,SibSp,Parch,Ticket,Fare,Cabin,Embarked
0,1,0,3,"Braund, Mr. Owen Harris",male,22.0,1,0,A/5 21171,7.25,,S
1,2,1,1,"Cumings, Mrs. John Bradley (Florence Briggs Th...",female,38.0,1,0,PC 17599,71.2833,C85,C
2,3,1,3,"Heikkinen, Miss. Laina",female,26.0,0,0,STON/O2. 3101282,7.925,,S
3,4,1,1,"Futrelle, Mrs. Jacques Heath (Lily May Peel)",female,35.0,1,0,113803,53.1,C123,S
4,5,0,3,"Allen, Mr. William Henry",male,35.0,0,0,373450,8.05,,S


In [88]:
df = df.drop(['PassengerId','Ticket', 'Name', 'Cabin'], axis=1)

In [89]:
df = pd.get_dummies(df, columns=['Embarked'], dtype=int)

In [90]:
df['Sex'] = df['Sex'].map({'male': 0, 'female': 1})

In [91]:
df['Age'] = df['Age'].fillna(df['Age'].mean())

In [92]:
from sklearn.model_selection import train_test_split

X = df.drop('Survived', axis=1)
y = df['Survived']

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [93]:
logreg = LogisticRegression(max_iter=100000)
dt = DecisionTreeClassifier()
lgbm = lgb.LGBMClassifier()
nb = GaussianNB()

models = [
    logreg,
    dt,
    lgbm,
    nb
]

for model in models:
    model.fit(X_train, y_train)


from IPython.display import clear_output
clear_output()

preds = [model.predict(X_test) for model in models]

from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score
import lightgbm
accs = [accuracy_score(y_test, pred) for pred in preds]
precs = [precision_score(y_test, pred) for pred in preds]
recs = [recall_score(y_test, pred) for pred in preds]
f1s = [f1_score(y_test, pred) for pred in preds]


In [94]:
metrics_df = pd.DataFrame({
    'Model': ['LogisticRegression', 'DecisionTree', 'LightGBM', 'GaussianNB'],
    'Accuracy': accs,
    'Precision': precs,
    'Recall': recs,
    'F1 Score': f1s
})

metrics_df

Unnamed: 0,Model,Accuracy,Precision,Recall,F1 Score
0,LogisticRegression,0.810056,0.785714,0.743243,0.763889
1,DecisionTree,0.782123,0.739726,0.72973,0.734694
2,LightGBM,0.821229,0.776316,0.797297,0.786667
3,GaussianNB,0.776536,0.7125,0.77027,0.74026


In [95]:
pd.DataFrame({
    'Feature': X.columns,
    'Coefficient': logreg.coef_[0]
})

Unnamed: 0,Feature,Coefficient
0,Pclass,-0.938826
1,Sex,2.589321
2,Age,-0.031358
3,SibSp,-0.296045
4,Parch,-0.110962
5,Fare,0.002491
6,Embarked_C,0.114235
7,Embarked_Q,-0.011816
8,Embarked_S,-0.319376
