In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.tree import DecisionTreeClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report, confusion_matrix, accuracy_score
from sklearn.preprocessing import LabelEncoder
from sklearn.impute import SimpleImputer

In [None]:
df = sns.load_dataset('titanic')
df.head()

Unnamed: 0,survived,pclass,sex,age,sibsp,parch,fare,embarked,class,who,adult_male,deck,embark_town,alive,alone
0,0,3,male,22.0,1,0,7.25,S,Third,man,True,,Southampton,no,False
1,1,1,female,38.0,1,0,71.2833,C,First,woman,False,C,Cherbourg,yes,False
2,1,3,female,26.0,0,0,7.925,S,Third,woman,False,,Southampton,yes,True
3,1,1,female,35.0,1,0,53.1,S,First,woman,False,C,Southampton,yes,False
4,0,3,male,35.0,0,0,8.05,S,Third,man,True,,Southampton,no,True


In [None]:
# drop deck column
df.drop('deck', axis=1, inplace=True)

#impute missing values of age, and fare using median
imputer = SimpleImputer(strategy='median')
df[['age', 'fare']] = imputer.fit_transform(df[['age', 'fare']])

# impute missing values of embark and embarked_town using mode
imputer = SimpleImputer(strategy='most_frequent')
df[['embark_town', 'embarked']] = imputer.fit_transform(df[['embark_town', 'embarked']])

In [None]:
X = df.drop(['survived','alive'],  axis=1)
y = df['survived']

In [None]:
for col in X.columns:
    if X[col].dtype == 'object' or X[col].dtype == 'category':
        le = LabelEncoder()
        X[col] = le.fit_transform(X[col])


In [None]:
X['adult_male'].replace({True:1, False:0}, inplace=True)
X['alone'].replace({True:1, False:0}, inplace=True)
X.head()

The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  X['adult_male'].replace({True:1, False:0}, inplace=True)
  X['adult_male'].replace({True:1, False:0}, inplace=True)
The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  X['alone'].replace({True:1, False:0}, inplace=True)
  X['alone'].replace({True:1, False:0}, inplace=True)


Unnamed: 0,pclass,sex,age,sibsp,parch,fare,embarked,class,who,adult_male,embark_town,alone
0,3,1,22.0,1,0,7.25,2,2,1,1,2,0
1,1,0,38.0,1,0,71.2833,0,0,2,0,0,0
2,3,0,26.0,0,0,7.925,2,2,2,0,2,1
3,1,0,35.0,1,0,53.1,2,0,2,0,2,0
4,3,1,35.0,0,0,8.05,2,2,1,1,2,1


In [None]:
model = DecisionTreeClassifier(criterion='entropy', max_depth=3)
model.fit(X,y)
y_pred = model.predict(X)
print(classification_report(y, y_pred))
print(confusion_matrix(y, y_pred))


              precision    recall  f1-score   support

           0       0.85      0.89      0.87       549
           1       0.81      0.74      0.77       342

    accuracy                           0.83       891
   macro avg       0.83      0.82      0.82       891
weighted avg       0.83      0.83      0.83       891

[[489  60]
 [ 88 254]]


In [None]:
X_test , X_train, y_test, y_train = train_test_split(X, y, test_size=0.2, random_state=42)

In [75]:
model = DecisionTreeClassifier(criterion='gini', max_depth=10)
model.fit(X_train,y_train)
y_pred = model.predict(X_test)
print(classification_report(y_test, y_pred))
print(confusion_matrix(y_test, y_pred))

              precision    recall  f1-score   support

           0       0.79      0.81      0.80       444
           1       0.67      0.63      0.65       268

    accuracy                           0.74       712
   macro avg       0.73      0.72      0.72       712
weighted avg       0.74      0.74      0.74       712

[[360  84]
 [ 98 170]]
