Import libraries

In [None]:
import pandas as pd

# visualization libraries
import seaborn as sns

import matplotlib.pyplot as plt
import matplotlib.dates as mdates
sns.set_style("whitegrid")

import plotly.graph_objects as go
import plotly.express as px
from plotly.subplots import make_subplots

Define train and test data

In [None]:
train = pd.read_csv("../input/hr-analytics-job-change-of-data-scientists/aug_train.csv")
train = train.dropna(inplace=False)

In [None]:
train

In [None]:
dummy = train.copy()

Trying to switch `experience` into an `int` type, before being processed

In [None]:
dummy.loc[dummy['experience'] == '>20', 'experience'] = 21
dummy.loc[dummy['experience'] == '<1', 'experience'] = 0
dummy['experience'] = dummy['experience'].astype(int)

At here I try to transform the data eligible for me to understand. Basically, for `zero to five` is a **Novice**, `six to ten` is a **Beginner**, `eleven to fifteen` is a **Competent**, `sixteen to twenty` is a **Proficient**, `twenty above` is an **Expert**. Furthermore, I created another column, called `stage` to store them.

In [None]:
dummy.loc[(dummy['experience'] >= 0) & (dummy['experience'] <= 5), 'stage'] = 'Novice'
dummy.loc[(dummy['experience'] > 5 ) & (dummy['experience'] <= 10), 'stage'] = 'Beginner'
dummy.loc[(dummy['experience'] > 10) & (dummy['experience'] <= 15), 'stage'] = 'Competent'
dummy.loc[(dummy['experience'] > 15) & (dummy['experience'] <= 20), 'stage'] = 'Proficient'
dummy.loc[(dummy['experience'] > 20) , 'stage'] = 'Expert'

In [None]:
train['stage'] = dummy['stage']

View the `city_development_index` heading for further inspection

In [None]:
dummy.loc[(dummy['city_development_index'] >= 0) & (dummy['city_development_index'] <= 0.549), 'level'] = 'Low'
dummy.loc[(dummy['city_development_index'] > 0.549 ) & (dummy['city_development_index'] <= 0.699), 'level'] = 'Medium'
dummy.loc[(dummy['city_development_index'] > 0.699) & (dummy['city_development_index'] <= 0.799), 'level'] = 'High'
dummy.loc[dummy['city_development_index'] > 0.799, 'level'] = 'Very High'

In [None]:
train['level'] = dummy['level']

Rather than creating a `dictionary` using categorical codes `cat.codes` provided by pandas, by and far, is the quickest way to label category into an integer

In [None]:
dummy['gender']=dummy['gender'].astype('category').cat.codes
dummy['relevent_experience']=dummy['relevent_experience'].astype('category').cat.codes
dummy['enrolled_university']=dummy['enrolled_university'].astype('category').cat.codes
dummy['education_level']=dummy['education_level'].astype('category').cat.codes
dummy['major_discipline']=dummy['major_discipline'].astype('category').cat.codes
dummy['company_type']=dummy['company_type'].astype('category').cat.codes
dummy['stage']=dummy['stage'].astype('category').cat.codes
dummy['level']=dummy['level'].astype('category').cat.codes

`Experience` and `enrolle_id` columns are not used for the heatmap as it has been replaced with the `stage` columns

In [None]:
dummy = dummy.drop(columns=['experience', 'enrollee_id'])

In [None]:
fig = go.Figure()

fig.add_trace(go.Heatmap(z=dummy.corr(), 
                         x=dummy.corr().index.values,
                        y=dummy.corr().columns.values))

fig.update_layout(
    title_text="Heatmap Correlation",
)
fig.show()

dummy.corr().style.highlight_min(axis=0)

After seeing the correlation `heatmap` matrix, we shall dive in step-by-step thoroughly

# Gender

Overall, the number of male candidates are higher, covering almost `90%` of the pire chart

In [None]:
fig = go.Figure()
fig.add_trace(go.Pie(values=train['gender'].value_counts(), labels=list(train['gender'].unique()), pull=[0, 0, 0.05]))

fig.update_layout(
    title_text="Gender",
)
fig.show()

Looking from the previous heatmap, it is shown that `education_level` and `gender` category are inversely correlated. As we could see from the Histogram below, there will be **less** candidate for **higher** education

In [None]:
fig = go.Figure()

# education level by gender
fig.add_trace(go.Histogram(x=train['education_level'][train["gender"] == "Male"], name="Male"))
fig.add_trace(go.Histogram(x=train['education_level'][train["gender"] == "Female"], name="Female"))
fig.add_trace(go.Histogram(x=train['education_level'][train["gender"] == "Other"], name="Other"))

# gender by education level
fig.add_trace(go.Histogram(x=train['gender'][train["education_level"] == "Graduate"], name="Graduate", visible=False))
fig.add_trace(go.Histogram(x=train['gender'][train["education_level"] == "Masters"], name="Masters", visible=False))
fig.add_trace(go.Histogram(x=train['gender'][train["education_level"] == "Phd"], name="Phd", visible=False))


fig.update_layout(
    updatemenus=[
        dict(
            type="dropdown",
            direction="left",
            active=0,
            x=1,
            y=1.2,
            buttons=list([
                    dict(label="Education Level",
                     method="update",
                     args=[{"visible": [True, True, True, False, False, False]},
                           {"title": "Education Level by Gender"}]),
                dict(label="Gender",
                     method="update",
                     args=[{"visible": [False, False, False, True, True, True]},
                           {"title": "Gender by Education Level"}])
            ]),
        ),
    ])


fig.update_layout(
    title_text="Gender and Education Level",
)
fig.show()

I could conclude that `male` gender prefer to change their jobs at any level of graduation. However, there are a lot of `unidentified` gender who prefer staying to changing their jobs. Moreover, candidates who do not want to `change their job` are higher otherwise.

In [None]:
fig = go.Figure()

# education level by gender looking for job changes
fig.add_trace(go.Histogram(x=train['education_level'][(train["gender"] == "Male") & train["target"] == 1.0], name="Male"))
fig.add_trace(go.Histogram(x=train['education_level'][(train["gender"] == "Female") & train["target"] == 1.0], name="Female"))
fig.add_trace(go.Histogram(x=train['education_level'][(train["gender"] == "Other") & train["target"] == 1.0], name="Other"))

# education level by gender looking for no job changes
fig.add_trace(go.Histogram(x=train['education_level'][(train["gender"] == "Male") & train["target"] == 0.0], name="Male", visible=False))
fig.add_trace(go.Histogram(x=train['education_level'][(train["gender"] == "Female") & train["target"] == 0.0], name="Female", visible=False))
fig.add_trace(go.Histogram(x=train['education_level'][(train["gender"] == "Other") & train["target"] == 0.0], name="Other", visible=False))

fig.update_layout(
    updatemenus=[
        dict(
            type="dropdown",
            direction="left",
            active=0,
            x=1,
            y=1.2,
            buttons=list([
                    dict(label="Job Changing",
                     method="update",
                     args=[{"visible": [True, True, True, False, False, False]},
                           {"title": "Education Level by Gender with Job Changing"}]),
                dict(label="No Job Changing",
                     method="update",
                     args=[{"visible": [False, False, False, True, True, True]},
                           {"title": "Education Level by Gender with no Job Changing"}])
            ]),
        ),
    ])


fig.update_layout(
    title_text="Education Level and Gender with target",
)
fig.show()

# University Enrollment

There is no balance amongst the value, almost `85%` no enrollment studies dominates the pie chart

In [None]:
fig = go.Figure()
fig.add_trace(go.Pie(values=train['enrolled_university'].value_counts(), labels=list(train['enrolled_university'].unique()), pull=[0, 0, 0.05]))

fig.update_layout(
    title_text="Enrollment",
)
fig.show()

Well, I need to take a double look, candidates who `do not have any enrollment` have the highest rate of relevant experience 🤔.

In [None]:
fig = go.Figure()

# enrollment by relevant experience
fig.add_trace(go.Histogram(x=train['enrolled_university'][train["relevent_experience"] == 'No relevent experience'], name="No Experience"))
fig.add_trace(go.Histogram(x=train['enrolled_university'][train["relevent_experience"] == 'Has relevent experience'], name="Has Experience"))


# relevant experience by enrollment
fig.add_trace(go.Histogram(x=train['relevent_experience'][train["enrolled_university"] == 'no_enrollment'], name="No Enrollment", visible=False))
fig.add_trace(go.Histogram(x=train['relevent_experience'][train["enrolled_university"] == 'Part time course'], name="Part Time", visible=False))
fig.add_trace(go.Histogram(x=train['relevent_experience'][train["enrolled_university"] == 'Full time course'], name="Full Time", visible=False))


fig.update_layout(
    updatemenus=[
        dict(
            type="dropdown",
            direction="left",
            active=0,
            x=1,
            y=1.2,
            buttons=list([
                    dict(label="Enrollment",
                     method="update",
                     args=[{"visible": [True, True, False, False, False]},
                           {"title": "Enrollment by Relevant Experience"}]),
                dict(label="Relevant Experience",
                     method="update",
                     args=[{"visible": [False, False, True, True, True]},
                           {"title": "Relevant experience by Enrollment"}])
            ]),
        ),
    ])


fig.update_layout(
    title_text="Enrollment and Experience",
)
fig.show()

As we could see vividly, `experienced` candidates have the most choice at either staying or changing jobs. On the other hand, there is a big number on candidates who would `not change their jobs` compared to the candidates who would `change their jobs`.

In [None]:
fig = go.Figure()

# enrollment by relevant experience
fig.add_trace(go.Histogram(x=train['enrolled_university'][(train["relevent_experience"] == 'No relevent experience') & (train['target'] == 1.0)], name="No Experience"))
fig.add_trace(go.Histogram(x=train['enrolled_university'][(train["relevent_experience"] == 'Has relevent experience') & (train['target'] == 1.0)], name="Has Experience"))

# relevant experience by enrollment
fig.add_trace(go.Histogram(x=train['enrolled_university'][(train["relevent_experience"] == 'No relevent experience') & (train['target'] == 0.0)], name="No Experience", visible=False))
fig.add_trace(go.Histogram(x=train['enrolled_university'][(train["relevent_experience"] == 'Has relevent experience') & (train['target'] == 0.0)], name="Has Experience", visible=False))




fig.update_layout(
    updatemenus=[
        dict(
            type="dropdown",
            direction="left",
            active=0,
            x=1,
            y=1.2,
            buttons=list([
                    dict(label="Job Changing",
                     method="update",
                     args=[{"visible": [True, True, False, False]},
                           {"title": "Enrollment and Experience with Job Changing"}]),
                dict(label="No Job Changing",
                     method="update",
                     args=[{"visible": [False, False, True, True]},
                           {"title": "Enrollment and Experience with no Job Changing"}])
            ]),
        ),
    ])


fig.update_layout(
    title_text="Enrollment and Experience with target",
)
fig.show()

# Major Discipline

In [None]:
fig = go.Figure()
fig.add_trace(go.Pie(values=train['major_discipline'].value_counts(), labels=list(train['major_discipline'].unique()), pull=[0, 0, 0, 0, 0, 0.05]))

fig.update_layout(
    title_text="Major Dicipline owned by candidates",
)
fig.show()

Well, previously we have compared `gender` and `education level`. Little do you know, `gender` and `major diciplice` is positively correlated. Male genders, moreover, have always been the highest since then, most of whom are mostly taking the `STEM` major. Overall, it does not surprise me that `no major` women are very few compare to `male`, beside `STEM` and `Business Subject`, `female` tend to pursue `art` major. 

> NOTE : You could toggle down one or some  `major dicipline` by pressing the legend on the top right.

In [None]:
fig = go.Figure()

# major_discipline by gender
fig.add_trace(go.Histogram(x=train['major_discipline'][train["gender"] == "Male"], name="Male"))
fig.add_trace(go.Histogram(x=train['major_discipline'][train["gender"] == "Female"], name="Female"))
fig.add_trace(go.Histogram(x=train['major_discipline'][train["gender"] == "Other"], name="Other"))

# gender by major_discipline
fig.add_trace(go.Histogram(x=train['gender'][train["major_discipline"] == "STEM"], name="STEM", visible=False))
fig.add_trace(go.Histogram(x=train['gender'][train["major_discipline"] == "Humanities"], name="Humanities", visible=False))
fig.add_trace(go.Histogram(x=train['gender'][train["major_discipline"] == "Business Degree"], name="Business Degree", visible=False))
fig.add_trace(go.Histogram(x=train['gender'][train["major_discipline"] == "Other"], name="Other", visible=False))
fig.add_trace(go.Histogram(x=train['gender'][train["major_discipline"] == "No Major"], name="No Major", visible=False))
fig.add_trace(go.Histogram(x=train['gender'][train["major_discipline"] == "Arts"], name="Arts", visible=False))


fig.update_layout(
    updatemenus=[
        dict(
            type="dropdown",
            direction="left",
            active=0,
            x=1,
            y=1.2,
            buttons=list([
                    dict(label="Major Discipline",
                     method="update",
                     args=[{"visible": [True, True, True, False, False, False, False, False, False]},
                           {"title": "Major Discipline by Gender"}]),
                dict(label="Gender",
                     method="update",
                     args=[{"visible": [False, False, False, True, True, True, True, True, True]},
                           {"title": "Gender by Major Discipline"}])
            ]),
        ),
    ])


fig.update_layout(
    title_text="Major Discipline and Gender",
)
fig.show()

Anyway, I have been thinking that measuring `stage` and `Major discipline` would be interesting. Well though, the result is the same as I expected, there are a lot of `beginners` who pursue in `STEM` major, followed by `humanities`, `business`, `other`, `arts`, and `no` major which is taken by mostly `novice`, 

In [None]:
fig = go.Figure()

# major_discipline by stage
fig.add_trace(go.Histogram(x=train['major_discipline'][train["stage"] == "Novice"], name="Novice"))
fig.add_trace(go.Histogram(x=train['major_discipline'][train["stage"] == "Beginner"], name="Beginner"))
fig.add_trace(go.Histogram(x=train['major_discipline'][train["stage"] == "Competent"], name="Competent"))
fig.add_trace(go.Histogram(x=train['major_discipline'][train["stage"] == "Proficient"], name="Proficient"))
fig.add_trace(go.Histogram(x=train['major_discipline'][train["stage"] == "Expert"], name="Expert"))

# stage by major_discipline
fig.add_trace(go.Histogram(x=train['stage'][train["major_discipline"] == "STEM"], name="STEM", visible=False))
fig.add_trace(go.Histogram(x=train['stage'][train["major_discipline"] == "Humanities"], name="Humanities", visible=False))
fig.add_trace(go.Histogram(x=train['stage'][train["major_discipline"] == "Business Degree"], name="Business Degree", visible=False))
fig.add_trace(go.Histogram(x=train['stage'][train["major_discipline"] == "Other"], name="Other", visible=False))
fig.add_trace(go.Histogram(x=train['stage'][train["major_discipline"] == "No Major"], name="No Major", visible=False))
fig.add_trace(go.Histogram(x=train['stage'][train["major_discipline"] == "Arts"], name="Arts", visible=False))


fig.update_layout(
    updatemenus=[
        dict(
            type="dropdown",
            direction="left",
            active=0,
            x=1,
            y=1.2,
            buttons=list([
                    dict(label="Major Discipline",
                     method="update",
                     args=[{"visible": [True, True, True, True, True, False, False, False, False, False, False]},
                           {"title": "Major Discipline by Stage"}]),
                dict(label="Stage",
                     method="update",
                     args=[{"visible": [False, False, False, False, False, True, True, True, True, True, True]},
                           {"title": "Stage by Major Discipline"}])
            ]),
        ),
    ])


fig.update_layout(
    title_text="Major Discipline and Stage",
)
fig.show()

.. That is so obvious, isn't it. The glaring effect from a mass number of `STEM`

In [None]:
fig = go.Figure()

# major_discipline by stage
fig.add_trace(go.Histogram(x=train['major_discipline'][(train["stage"] == "Novice") & (train["target"] == 1.0)], name="Novice"))
fig.add_trace(go.Histogram(x=train['major_discipline'][(train["stage"] == "Beginner") & (train["target"] == 1.0)], name="Beginner"))
fig.add_trace(go.Histogram(x=train['major_discipline'][(train["stage"] == "Competent") & (train["target"] == 1.0)], name="Competent"))
fig.add_trace(go.Histogram(x=train['major_discipline'][(train["stage"] == "Proficient") & (train["target"] == 1.0)], name="Proficient"))
fig.add_trace(go.Histogram(x=train['major_discipline'][(train["stage"] == "Expert") & (train["target"] == 1.0)], name="Expert"))

# stage by major_discipline
fig.add_trace(go.Histogram(x=train['major_discipline'][(train["stage"] == "Novice") & (train["target"] == 0.0)], name="Novice"))
fig.add_trace(go.Histogram(x=train['major_discipline'][(train["stage"] == "Beginner") & (train["target"] == 0.0)], name="Beginner"))
fig.add_trace(go.Histogram(x=train['major_discipline'][(train["stage"] == "Competent") & (train["target"] == 0.0)], name="Competent"))
fig.add_trace(go.Histogram(x=train['major_discipline'][(train["stage"] == "Proficient") & (train["target"] == 0.0)], name="Proficient"))
fig.add_trace(go.Histogram(x=train['major_discipline'][(train["stage"] == "Expert") & (train["target"] == 0.0)], name="Expert"))




fig.update_layout(
    updatemenus=[
        dict(
            type="dropdown",
            direction="left",
            active=0,
            x=1,
            y=1.2,
            buttons=list([
                    dict(label="Job Changing",
                     method="update",
                     args=[{"visible": [True, True, True, True, True, False, False, False, False, False]},
                           {"title": "Major Discipline and Stage with Job Changing"}]),
                dict(label="No Job Changing",
                     method="update",
                     args=[{"visible": [False, False, False, False, False, True, True, True, True, True]},
                           {"title": "Major Discipline and Stage with no Job Changing"}])
            ]),
        ),
    ])


fig.update_layout(
    title_text="Major Discipline and Stage with target",
)
fig.show()

# City Development Index

Well now we shall focus to the target, according to the heatmap, `city_development_index` (CDI) dovetailed positively with the `target`. According to wikipedia, CDI lower than `0.549` is considered `low`, CDI with the range between `0.550` and `0.699` is `medium`, whereas CDI from `0.700` - `0.799` is at the `High` level, while CDI larger than `0.800` is coined with `very high`.

In [None]:
fig = go.Figure()
fig.add_trace(go.Pie(values=train['level'].value_counts(), labels=list(train['level'].unique()), pull=[0, 0, 0, 0.05]))

fig.update_layout(
    title_text="City Development Index",
)
fig.show()

The histogram details that CDI at range `0.700` to `0.799` (High) is `not going to change their job`, but CDI at the range between either `0.550` - `0.699` (Medium) or `0.8` onwards (Very High) is `likely to change their job`

In [None]:
fig = go.Figure()

# levle by target
fig.add_trace(go.Histogram(x=train['level'][train["target"] == 1.0], name="Changing Job"))
fig.add_trace(go.Histogram(x=train['level'][train["target"] == 0.0], name="Not Changing Job"))

# target by level
fig.add_trace(go.Histogram(x=train['target'][train["level"] == 'Low'], name="Low", visible=False))
fig.add_trace(go.Histogram(x=train['target'][train["level"] == 'Medium'], name="Medium", visible=False))
fig.add_trace(go.Histogram(x=train['target'][train["level"] == 'High'], name="High", visible=False))
fig.add_trace(go.Histogram(x=train['target'][train["level"] == 'Very High'], name="Very High", visible=False))

fig.update_layout(
    updatemenus=[
        dict(
            type="dropdown",
            direction="left",
            active=0,
            x=1,
            y=1.2,
            buttons=list([
                    dict(label="Level",
                     method="update",
                     args=[{"visible": [True, True, True, False, False, False]},
                           {"title": "Level by Target"}]),
                dict(label="Target",
                     method="update",
                     args=[{"visible": [False, False, False, True, True, True]},
                           {"title": "Target by Level"}])
            ]),
        ),
    ])

fig.update_layout(
    title_text="Level and Target",
)
fig.show()

On the other hand, `Very High` CDI candidates usually do `not have enrollment`

In [None]:
fig = go.Figure()

# level by enrollment
fig.add_trace(go.Histogram(x=train['level'][train["enrolled_university"] == 'no_enrollment'], name="No Enrollment"))
fig.add_trace(go.Histogram(x=train['level'][train["enrolled_university"] == 'Part time course'], name="Part Time"))
fig.add_trace(go.Histogram(x=train['level'][train["enrolled_university"] == 'Full time course'], name="Full Time"))

# enrollment by level
fig.add_trace(go.Histogram(x=train['enrolled_university'][train["level"] == 'Low'], name="Low", visible=False))
fig.add_trace(go.Histogram(x=train['enrolled_university'][train["level"] == 'Medium'], name="Medium", visible=False))
fig.add_trace(go.Histogram(x=train['enrolled_university'][train["level"] == 'High'], name="High", visible=False))
fig.add_trace(go.Histogram(x=train['enrolled_university'][train["level"] == 'Very High'], name="Very High", visible=False))

fig.update_layout(
    updatemenus=[
        dict(
            type="dropdown",
            direction="left",
            active=0,
            x=1,
            y=1.2,
            buttons=list([
                    dict(label="Level",
                     method="update",
                     args=[{"visible": [True, True, True, False, False, False, False]},
                           {"title": "Level by Enrollment"}]),
                dict(label="Enrollment",
                     method="update",
                     args=[{"visible": [False, False, False, True, True, True, True]},
                           {"title": "Enrollment by Level"}])
            ]),
        ),
    ])

fig.update_layout(
    title_text="Level and Enrollment",
)
fig.show()

# Conclusion

1. The data is not balance, as they are not distributed equally
2. Very High CDI ==> No Enrollment
3. No Enrollment ==> High Relevant Experience
4. Beginners ==> STEM Majors

...

# Part 2, Predicting Model

Extract and Feature the data

In [None]:
train

In [None]:
data = train.copy()

In [None]:
data = data.drop(columns=['enrollee_id', 'city_development_index', 'experience'])

In [None]:
data['gender']=data['gender'].astype('category').cat.codes
data['relevent_experience']=data['relevent_experience'].astype('category').cat.codes
data['enrolled_university']=data['enrolled_university'].astype('category').cat.codes
data['education_level']=data['education_level'].astype('category').cat.codes
data['major_discipline']=data['major_discipline'].astype('category').cat.codes
data['company_type']=data['company_type'].astype('category').cat.codes
data['stage']=data['stage'].astype('category').cat.codes
data['level']=data['level'].astype('category').cat.codes
data['company_size']=data['company_size'].astype('category').cat.codes
data['last_new_job']=data['last_new_job'].astype('category').cat.codes
data['city']=data['city'].astype('category').cat.codes

In [None]:
X = data.drop('target', axis=1)
y = data['target']

from sklearn.model_selection import train_test_split
X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.33, random_state=2021)

In [None]:
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import make_pipeline
from sklearn.svm import SVC
from sklearn.metrics import confusion_matrix, classification_report, accuracy_score, recall_score, precision_score, roc_auc_score

pipeline = make_pipeline(StandardScaler(), SVC(kernel='sigmoid'))
pipeline.fit(X_train, y_train)
svc_prediction = pipeline.predict(X_val)
cm_svc = confusion_matrix(y_val, svc_prediction)
svc_df = pd.DataFrame(data=[accuracy_score(y_val, svc_prediction), recall_score(y_val, svc_prediction),
                            precision_score(y_val, svc_prediction), roc_auc_score(y_val, svc_prediction)], 
                      columns=['SVC Score'],
                      index=["Accuracy", "Recall", "Precision", "ROC AUC Score"])

In [None]:
svc_df

In [None]:
from sklearn.ensemble import RandomForestClassifier

# Random Forest
rfc = RandomForestClassifier(n_estimators=600)
rfc.fit(X_train,y_train)
rfc_prediction = rfc.predict(X_val)
cm_rfc = confusion_matrix(y_val, rfc_prediction)
rfc_df = pd.DataFrame(data=[accuracy_score(y_val, rfc_prediction), recall_score(y_val, rfc_prediction),
                   precision_score(y_val, rfc_prediction), roc_auc_score(y_val, rfc_prediction)], 
             columns=['Random Forest Score'],
             index=["Accuracy", "Recall", "Precision", "ROC AUC Score"])

In [None]:
from sklearn.model_selection import GridSearchCV

param_grid = { 
   'n_estimators': [ 300,500,800, 1000],
   'max_features': ['auto', 'sqrt'],
   'max_depth' : [6,7,8,9,10],
   'criterion' :['gini', 'entropy']
}

CV_rfc = GridSearchCV(estimator=rfc, param_grid=param_grid, cv= 5)
CV_rfc.fit(X_train,y_train)
CV_rfc.best_params_

In [None]:
# Tuned Random Forest
rfc1=RandomForestClassifier(random_state=0, n_estimators= 300, criterion = 'entropy',max_features = 'sqrt',max_depth = 8)
rfc1.fit(X_train,y_train)
prediction_rf1= rfc1.predict(X_val)
cm_trfc = confusion_matrix(y_val, prediction_rf1)
trfc_df = pd.DataFrame(data=[accuracy_score(y_val, prediction_rf1), recall_score(y_val, prediction_rf1),
                   precision_score(y_val, prediction_rf1), roc_auc_score(y_val, prediction_rf1)], 
             columns=['Tuned Random Forest Score'],
             index=["Accuracy", "Recall", "Precision", "ROC AUC Score"])

In [None]:
trfc_df

In [None]:
cm_trfc