In [1]:
from sklearn import tree
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import train_test_split
from sklearn.model_selection import cross_val_score
from sklearn.model_selection import GridSearchCV
from sklearn.tree import plot_tree
from sklearn.metrics import roc_curve, auc
from sklearn.metrics import precision_score, recall_score, f1_score
from sklearn.ensemble import RandomForestClassifier
from sklearn.datasets import make_classification

%matplotlib inline
sns.set(rc={"figure.figsize": (13, 10)})

# all the trees are the same

In [2]:
# Load and data preparation
titanic_data = pd.read_csv(
    r"D:\Projects\PythonProjects\DS-Rush\data\titanic\train.csv")
X = titanic_data.drop(
    ["PassengerId", "Survived", "Name", "Ticket", "Cabin"], axis=1)

y = titanic_data.Survived
X = pd.get_dummies(X)
X = X.drop(["Sex_female", "Embarked_C"], axis=1)
X.fillna({"Age": X.Age.median()}, inplace=True)
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.33, random_state=42)

In [5]:
clf_rf = RandomForestClassifier()
params = {"n_estimators": [20, 30, 40, 50],
          "max_depth": (3, 4, 5, 6),
          }

grid_search_cv_clf = GridSearchCV(clf_rf, param_grid=params, cv=5)

In [6]:
grid_search_cv_clf.fit(X_train, y_train)

In [7]:
grid_search_cv_clf.best_params_

{'max_depth': 5, 'n_estimators': 30}

In [8]:
best_clf = grid_search_cv_clf.best_estimator_


In [9]:
best_clf.score(X_test, y_test)

0.8101694915254237

In [10]:
best_clf.feature_importances_

array([0.17294012, 0.12995854, 0.05135526, 0.04063566, 0.1979883 ,
       0.3590556 , 0.01341837, 0.03464816])

In [11]:
feature_importances = pd.DataFrame({"features": list(X_train),
                                    "feature_importances": best_clf.feature_importances_})

In [12]:
feature_importances.sort_values("feature_importances", ascending=False)

Unnamed: 0,features,feature_importances
5,Sex_male,0.359056
4,Fare,0.197988
0,Pclass,0.17294
1,Age,0.129959
2,SibSp,0.051355
3,Parch,0.040636
7,Embarked_S,0.034648
6,Embarked_Q,0.013418
