In [None]:
import seaborn as sns
import matplotlib.pyplot as plt
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.impute import SimpleImputer
from sklearn.preprocessing import LabelEncoder
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import accuracy_score

In [None]:
titanic = sns.load_dataset("titanic")
titanic.head()

In [None]:
features = ["pclass", "sex", "fare", "embarked", "age"]
target = ["survived"]
titanic.isnull().sum()

# handle missing data
imp_median = SimpleImputer(strategy = "median")
titanic[["age"]] = imp_median.fit_transform(titanic[["age"]])

imp_freq = SimpleImputer(strategy = "most_frequent")
titanic[["embarked"]] = imp_freq.fit_transform(titanic[["embarked"]])

#Encode
le = LabelEncoder()

titanic["sex"] = le.fit_transform(titanic["sex"])
titanic["embarked"] = le.fit_transform(titanic["embarked"])

X = titanic[features]
y = titanic["survived"]

# Train Test Split

X_train, X_test, y_train, y_test = train_test_split(
    X, y,test_size = 0.3, random_state = 42
)

In [None]:
# Decision Tree
model = DecisionTreeClassifier(max_depth = 4)

model.fit(X_train, y_train)

y_pred_test = model.predict(X_test)
y_pred_train = model.predict(X_train)

print("Training accuracy:", accuracy_score(y_train, y_pred_train)*100, "%" )
print("Testing accuracy:", accuracy_score(y_test, y_pred_test)*100, "%" )

In [None]:
# Plotting

from sklearn.tree import plot_tree

plt.figure(figsize = (18,10))
plot_tree(
    model,
    feature_names = X.columns,
    class_names = ["Died", "Survived"],
    filled = True
)
plt.tight_layout()

In [None]:
# Random Forest
from sklearn.ensemble import RandomForestClassifier

rf = RandomForestClassifier(
    n_estimators = 501,
    oob_score = True,
    max_depth = 4

)

rf.fit(X_train, y_train)

In [None]:
y_pred = rf.predict(X_test)

# OOB score refers to the training score
print("OOB Score :", rf.oob_score_ *100, "%")
print("testing accuracy :", accuracy_score(y_test, y_pred)*100, "%")

In [None]:
# Bagging classifier

from sklearn.ensemble import BaggingClassifier

best_model = DecisionTreeClassifier()

bagging = BaggingClassifier(
    base_model,
    n_estimators = 201
)

bagging.fit(X_train, y_train)

y_pred = bagging.predict(X_test)

print("Accuracy :", accuracy_score(y_test, y_pred))