In [None]:
from google.colab import drive
drive.mount('/content/drive')

In [None]:
# import

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

from sklearn.model_selection import train_test_split , cross_val_score , StratifiedKFold , learning_curve
from sklearn.model_selection import GridSearchCV
from sklearn.ensemble import RandomForestClassifier , GradientBoostingClassifier
from sklearn.preprocessing import LabelEncoder
from lightgbm import LGBMClassifier

import warnings

warnings.filterwarnings('ignore')

In [None]:
df = pd.read_csv('/content/drive/MyDrive/GCI/Final_HW/dataset/ICorporation/data.csv')

In [None]:
df.drop(['EmployeeCount' , 'Over18' , 'StandardHours' , 'EmployeeNumber'] , axis = 1 , inplace = True)

In [None]:
df.info()

In [None]:
le = LabelEncoder()

In [None]:
obj_data = df.select_dtypes(include = 'object')
obj_data.head(3)

In [None]:
for col in obj_data.columns:

    print(f"{col} Unique = {df[col].unique()}\n")

In [None]:
df['Attrition'] = le.fit_transform(df['Attrition'])
df['BusinessTravel'] = le.fit_transform(df['BusinessTravel'])
df['Department'] = le.fit_transform(df['Department'])
df.drop(['EducationField'] , axis = 1 , inplace = True)
df['Gender'] = le.fit_transform(df['Gender'])
df.drop(['JobRole'] , axis = 1 , inplace = True)
df['MaritalStatus'] = le.fit_transform(df['MaritalStatus'])
df['OverTime'] = le.fit_transform(df['OverTime'])

In [None]:
employee_data = pd.get_dummies(df['HowToEmploy'])

In [None]:
df.drop(['HowToEmploy'] , axis = 1 , inplace = True)
df = pd.concat([df , employee_data] , axis = 1)

In [None]:
df.head(3)

In [None]:
target = df['Attrition']
train_df = df.drop(['Attrition'] , axis = 1)

In [None]:
X_train , X_test , Y_train , Y_test = train_test_split(train_df , target , random_state = 42 , test_size = 0.3)

In [None]:
kfold = StratifiedKFold(n_splits = 10)

In [None]:
random_state = 42
model_rf = RandomForestClassifier(random_state = random_state)
model_gb = GradientBoostingClassifier(random_state = random_state)
model_lgb = LGBMClassifier(random_state = random_state)

In [None]:
model_rf.fit(X_train , Y_train)

In [None]:
model_gb.fit(X_train , Y_train)

In [None]:
model_lgb.fit(X_train , Y_train)

In [None]:
features = X_train.columns

imp_rf = model_rf.feature_importances_
imp_gb = model_gb.feature_importances_
imp_lgb = model_lgb.feature_importances_

In [None]:
# RandomForestClassifier

mean_imp = imp_rf.mean()

plt.figure(figsize=(20,8))
plt.title("RF model Feature Importnace",fontsize=15)
plt.bar(features,imp_rf,color="orange",label="Not Important")
plt.bar(features[imp_rf>mean_imp],imp_rf[imp_rf>mean_imp],color="r",label="Important")
plt.axhline(mean_imp,color="k",linestyle="dashed")
plt.xlabel("Features",fontsize=12)
plt.ylabel("Importace Score",fontsize=12)
plt.xticks(rotation = 90)
plt.legend(fontsize=22)
plt.savefig('/content/drive/MyDrive/GCI/Final_HW/dataset/ICorporation/RandomForest.png')

In [None]:
# GB

mean_imp = imp_gb.mean()

plt.figure(figsize=(20,8))
plt.title("GB model Feature Importnace",fontsize=15)
plt.bar(features,imp_gb,color="orange",label="Not Important")
plt.bar(features[imp_gb>mean_imp],imp_gb[imp_gb>mean_imp],color="r",label="Important")
plt.axhline(mean_imp,color="k",linestyle="dashed")
plt.xlabel("Features",fontsize=12)
plt.ylabel("Importace Score",fontsize=12)
plt.xticks(rotation = 90)
plt.legend(fontsize=22)
plt.savefig('/content/drive/MyDrive/GCI/Final_HW/dataset/ICorporation/GradientBoosting.png')

In [None]:
# LGBM

mean_imp = imp_lgb.mean()

plt.figure(figsize=(20,8))
plt.title("LGBM model Feature Importnace",fontsize=15)
plt.bar(features,imp_lgb,color="orange",label="Not Important")
plt.bar(features[imp_lgb>mean_imp],imp_lgb[imp_lgb>mean_imp],color="r",label="Important")
plt.axhline(mean_imp,color="k",linestyle="dashed")
plt.xlabel("Features",fontsize=12)
plt.ylabel("Importace Score",fontsize=12)
plt.xticks(rotation = 90)
plt.legend(fontsize=22)
plt.savefig('/content/drive/MyDrive/GCI/Final_HW/dataset/ICorporation/LightGBM.png')