### A trained model which can predict the employee performance based on factors as inputs. This will be used to hire employees

### Import libraries

In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.preprocessing import normalize
from sklearn.model_selection import train_test_split

### Creating data frame

In [2]:
perfo=pd.read_csv("pro_new.csv")

### Reading first five columns

In [3]:
perfo.head()

Unnamed: 0.1,Unnamed: 0,Age,DistanceFromHome,EmpEducationLevel,EmpEnvironmentSatisfaction,EmpHourlyRate,EmpJobInvolvement,EmpJobLevel,EmpJobSatisfaction,NumCompaniesWorked,...,EmpJobRole_Senior Manager R&D,EmpJobRole_Technical Architect,EmpJobRole_Technical Lead,BusinessTravelFrequency_Non-Travel,BusinessTravelFrequency_Travel_Frequently,BusinessTravelFrequency_Travel_Rarely,OverTime_No,OverTime_Yes,Attrition_No,Attrition_Yes
0,0,32,10,3,4,55,3,2,4,1,...,0,0,0,0,0,1,1,0,1,0
1,1,47,14,4,4,42,3,2,1,2,...,0,0,0,0,0,1,1,0,1,0
2,2,40,5,4,4,48,2,3,1,5,...,0,0,0,0,1,0,0,1,1,0
3,3,41,10,4,2,73,2,5,4,3,...,0,0,0,0,0,1,1,0,1,0
4,4,60,16,4,1,84,3,2,1,8,...,0,0,0,0,0,1,1,0,1,0


### Dropping column named unnamed since it is an index

In [4]:
perfo.drop("Unnamed: 0",axis=1,inplace=True)

### Missing values

In [5]:
perfo.isnull().sum()

Age                                          0
DistanceFromHome                             0
EmpEducationLevel                            0
EmpEnvironmentSatisfaction                   0
EmpHourlyRate                                0
EmpJobInvolvement                            0
EmpJobLevel                                  0
EmpJobSatisfaction                           0
NumCompaniesWorked                           0
EmpLastSalaryHikePercent                     0
EmpRelationshipSatisfaction                  0
TotalWorkExperienceInYears                   0
TrainingTimesLastYear                        0
EmpWorkLifeBalance                           0
ExperienceYearsAtThisCompany                 0
ExperienceYearsInCurrentRole                 0
YearsSinceLastPromotion                      0
YearsWithCurrManager                         0
PerformanceRating                            0
Gender_Female                                0
Gender_Male                                  0
EducationBack

### Applying machine learning to train the given model

In [6]:
attri=perfo.drop("PerformanceRating",axis=1)
classs=perfo.PerformanceRating

### Normalization

In [7]:
#rescaling of data so that all the datas are from 0 to 1
normal=normalize(attri)
normal
print(normal.min(),normal.max())

0.0 0.969006183329157


### Splitting the given data for all the attributes

In [8]:
attri_train,attri_test,classs_train,classs_test=train_test_split(attri,classs,test_size=0.35)

In [9]:
print(attri_train)

      Age  DistanceFromHome  EmpEducationLevel  EmpEnvironmentSatisfaction  \
280    24                17                  2                           4   
448    40                 9                  4                           4   
390    31                 1                  4                           2   
138    30                 8                  2                           3   
383    35                18                  2                           3   
1110   26                23                  3                           1   
273    44                17                  3                           4   
1030   27                 5                  3                           3   
292    54                 1                  4                           4   
816    50                 7                  2                           2   
889    32                 8                  3                           2   
470    35                 2                  3                  

### Applying Decision Tree Algorithm

In [10]:
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import accuracy_score
tr=DecisionTreeClassifier(max_depth=5)
tr.fit(attri_train,classs_train)
tr1=tr.predict(attri_test)
accuracy_score(classs_test,tr1)*100

91.42857142857143

### Applying KNN [K Nearest Neighbour] Algorithm

In [11]:
from sklearn.neighbors import KNeighborsClassifier
ds=KNeighborsClassifier(11)
ds.fit(attri_train,classs_train)
ds1=ds.predict(attri_test)
accuracy_score(classs_test,ds1)

0.7309523809523809

### Applying SVM [Support Vector Machine] Algorithm

In [12]:
from sklearn.svm import SVC
svm = SVC(kernel = "rbf", C=10, gamma=2)
svm.fit(attri_train,classs_train)
sv1=svm.predict(attri_test)
accuracy_score(classs_test,sv1)


0.7476190476190476

### Applying Random Forest Algorithm

In [13]:
from sklearn.ensemble import RandomForestClassifier
lm = RandomForestClassifier(n_estimators=20)
lm.fit(attri_train,classs_train)
lm1=lm.predict(attri_test)
accuracy_score(classs_test,lm1)


0.8976190476190476

### Data splitting with respect to three attributes

In [14]:
attri=perfo[["EmpEnvironmentSatisfaction","EmpLastSalaryHikePercent","EmpWorkLifeBalance"]]
classs=perfo.PerformanceRating

In [15]:
from sklearn.preprocessing import normalize#rescaling of data so that all the datas are from 0 to 1
normal1=normalize(attri)
normal1
print(normal1.min(),normal1.max())

0.03946685189819292 0.9982683969692436


In [16]:
attri_train,attri_test,classs_train,classs_test=train_test_split(attri,classs,test_size=0.3,random_state=0)

### Applying Decision Tree Algorithm

In [17]:
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import accuracy_score
tr=DecisionTreeClassifier(max_depth=3)
tr.fit(attri_train,classs_train)
tr1=tr.predict(attri_test)
accuracy_score(classs_test,tr1)*100

82.5

### Applying KNN [K Nearest Neighbour] Algorithm

In [18]:
from sklearn.neighbors import KNeighborsClassifier
ds=KNeighborsClassifier(11)
ds.fit(attri_train,classs_train)
ds1=ds.predict(attri_test)
accuracy_score(classs_test,ds1)

0.7888888888888889

### Applying SVM [Support Vector Machine] Algorithm

In [19]:
from sklearn.svm import SVC
svm = SVC(kernel = "rbf",C=10, gamma=3)
svm.fit(attri_train,classs_train)
sv1=svm.predict(attri_test)
accuracy_score(classs_test,sv1)

0.7833333333333333

### Applying Random Forest Algorithm

In [20]:
from sklearn.ensemble import RandomForestClassifier
lm = RandomForestClassifier(n_estimators=11, random_state=0)
lm.fit(attri_train,classs_train)
lm1=lm.predict(attri_test)
accuracy_score(classs_test,lm1)


0.7694444444444445