In [None]:
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt

In [None]:
penguins = pd.read_csv('../../DATA/penguins_size.csv')
penguins.head()

In [None]:
penguins.describe()

In [None]:
penguins.isnull().sum()

In [None]:
penguins['species'].unique()

In [None]:
penguins = penguins.dropna()

In [None]:
penguins['island'].unique()

In [None]:
penguins[penguins['species'] == 'Gentoo'].groupby('sex').describe().transpose()

In [None]:
penguins.at[336,'sex'] = 'FEMALE'
penguins.loc[336]

In [None]:
sns.pairplot(data=penguins,hue='species')

In [None]:
sns.catplot(x='species',y='culmen_length_mm',data=penguins,kind='box',col='sex')

In [None]:
penguins.columns

In [None]:
sns.catplot(x='species',y='body_mass_g',data=penguins,kind='box',col='sex')

In [None]:
sns.catplot(x='species',y='flipper_length_mm',data=penguins,kind='box',col='sex')

In [None]:
sns.catplot(x='species',y='culmen_depth_mm',data=penguins,kind='box',col='sex')

In [None]:
X = pd.get_dummies(penguins.drop('species',axis=1),drop_first=True)

In [None]:
y = penguins['species']

In [None]:
from sklearn.model_selection import train_test_split

In [None]:
X_train, X_test, y_train, y_test = train_test_split(X,y,test_size=0.3,random_state=101)

In [None]:
from sklearn.tree import DecisionTreeClassifier

In [None]:
model = DecisionTreeClassifier()

In [None]:
model.fit(X_train,y_train)

In [None]:
pred = model.predict(X_test)
pred

In [None]:
from sklearn.metrics import classification_report,accuracy_score

In [None]:
print(classification_report(y_test,pred))

In [None]:
accuracy_score(y_test,pred)

In [None]:
model.feature_importances_

In [None]:
pd.DataFrame(index=X.columns,data=model.feature_importances_,columns=['Feature Importance']).sort_values('Feature Importance')

In [None]:
from sklearn.tree import plot_tree

In [None]:
plt.figure(figsize=(12,8),dpi=250)
plot_tree(model,filled=True);

In [None]:
def report_model(model):
  model_preds = model.predict(X_test)
  print(classification_report(y_test,model_preds))
  print('\n')
  print(accuracy_score(y_test,model_preds))
  print('\n\n\n\n')
  plt.figure(figsize=(12,8),dpi=250)
  plot_tree(model,filled=True);

In [None]:
report_model(model)

In [None]:
pruned_tree = DecisionTreeClassifier(max_depth=2)

In [None]:
pruned_tree.fit(X_train,y_train)

In [None]:
report_model(pruned_tree)

In [None]:
max_leaf_tree = DecisionTreeClassifier(max_leaf_nodes=3)

In [None]:
max_leaf_tree.fit(X_train,y_train)

In [None]:
report_model(max_leaf_tree)

In [None]:
entropy_tree = DecisionTreeClassifier(criterion='entropy')
entropy_tree.fit(X_train,y_train)
report_model(entropy_tree)

In [None]:
collab = DecisionTreeClassifier(criterion='entropy')
collab.fit(X_train,y_train)
report_model(collab)