# Pre-Proccessing

In [None]:
# Import statements
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
from sklearn.preprocessing import MinMaxScaler

# Display options
pd.set_option('display.max_columns', None)

In [None]:
hr_employee_attrition = pd.read_csv( "./Data/WA_Fn-UseC_-HR-Employee-Attrition.csv", delimiter = ",")

### Functions

In [None]:
# Function:    dummyConversion
# Input:       Dataframe and Attribute
# Return:      Dataframe
# URL:         https://stackoverflow.com/questions/37292872/how-can-i-one-hot-encode-in-python
def dummyConversion(df, attribute):
    dataframe = df
    # Dummy conversion
    dummy = pd.get_dummies(dataframe[attribute], prefix = attribute)
    # Drop attribute
    dataframe = dataframe.drop(attribute,axis = 1)
    # Add dummy attributes to the dataframe
    dataframe = dataframe.join(dummy)
    return dataframe

### Save Attrition to CSV file.

In [None]:
# Map Yes/No to 1/0
hr_employee_attrition['Attrition'] = hr_employee_attrition.Attrition.map(dict(Yes=1, No=0))
# Save to CSV file
Attrition = hr_employee_attrition['Attrition']
Attrition.to_csv('./Data/Attrition.csv')

### Normalize numeric attributes.

In [None]:
# https://stackoverflow.com/questions/26414913/normalize-columns-of-pandas-data-frame

column_names_to_normalize = ['Age', 'DailyRate', 'DistanceFromHome', 'HourlyRate', 'JobLevel', 'MonthlyIncome', 'MonthlyRate', 'NumCompaniesWorked', 'PercentSalaryHike', 'StockOptionLevel', 'TotalWorkingYears', 'TrainingTimesLastYear', 'YearsAtCompany', 'YearsInCurrentRole', 'YearsSinceLastPromotion', 'YearsWithCurrManager']
x = hr_employee_attrition[column_names_to_normalize].values
x_scaled = MinMaxScaler().fit_transform(x)
temp = pd.DataFrame(x_scaled, columns=column_names_to_normalize, index = hr_employee_attrition.index)
hr_employee_attrition[column_names_to_normalize] = temp
hr_employee_attrition

### Convert Categorical(Ordinal) attributes to Categorical(Nominal)

In [None]:
# Education
education_map = {1 : 'Below College', 2 : 'College', 3 : 'Bachelor', 4 : 'Master', 5 : 'Doctor'}
hr_employee_attrition['Education'] = hr_employee_attrition['Education'].map(education_map)

# EnvironmentSatisfaction
environment_satisfaction_map = {1 : 'Low', 2 : 'Medium', 3 : 'High', 4 : 'Very High'}
hr_employee_attrition['EnvironmentSatisfaction'] = hr_employee_attrition['EnvironmentSatisfaction'].map(environment_satisfaction_map)

# JobInvolvement
job_involvement_map = {1 : 'Low', 2 : 'Medium', 3 : 'High', 4 : 'Very High'}
hr_employee_attrition['JobInvolvement'] = hr_employee_attrition['JobInvolvement'].map(job_involvement_map)

# JobSatisfaction
job_satisfaction_map = {1 : 'Low', 2 : 'Medium', 3 : 'High', 4 : 'Very High'}
hr_employee_attrition['JobSatisfaction'] = hr_employee_attrition['JobSatisfaction'].map(job_satisfaction_map)

# PerformanceRating
performance_rating_map = {1 : 'Low', 2 : 'Good', 3 : 'Excellent', 4 : 'Outstanding'}
hr_employee_attrition['PerformanceRating'] = hr_employee_attrition['PerformanceRating'].map(performance_rating_map)

# RelationshipSatisfaction
relationship_satisfaction_map = {1 : 'Low', 2 : 'Medium', 3 : 'High', 4 : 'Very High'}
hr_employee_attrition['RelationshipSatisfaction'] = hr_employee_attrition['RelationshipSatisfaction'].map(relationship_satisfaction_map)

# WorkLifeBalance 
work_life_balance_map = {1 : 'Bad', 2 : 'Good', 3 : 'Better', 4 : 'Best'}
hr_employee_attrition['WorkLifeBalance'] = hr_employee_attrition['WorkLifeBalance'].map(work_life_balance_map)

hr_employee_attrition

### Convert Categorical(Nominal) to dummy variables.

In [None]:
# Education
hr_employee_attrition = dummyConversion(hr_employee_attrition, 'Education')
# EnvironmentSatisfaction
hr_employee_attrition = dummyConversion(hr_employee_attrition, 'EnvironmentSatisfaction')
# JobInvolvement
hr_employee_attrition = dummyConversion(hr_employee_attrition, 'JobInvolvement')
# JobSatisfaction
hr_employee_attrition = dummyConversion(hr_employee_attrition, 'JobSatisfaction')
# PerformanceRating
hr_employee_attrition = dummyConversion(hr_employee_attrition, 'PerformanceRating')
# RelationshipSatisfaction
hr_employee_attrition = dummyConversion(hr_employee_attrition, 'RelationshipSatisfaction')
# WorkLifeBalance
hr_employee_attrition = dummyConversion(hr_employee_attrition, 'WorkLifeBalance')

# BusinessTravel
hr_employee_attrition = dummyConversion(hr_employee_attrition, 'BusinessTravel')
# Department
hr_employee_attrition = dummyConversion(hr_employee_attrition, 'Department')
# EducationField
hr_employee_attrition = dummyConversion(hr_employee_attrition, 'EducationField')
# Gender
hr_employee_attrition = dummyConversion(hr_employee_attrition, 'Gender')
# JobRole
hr_employee_attrition = dummyConversion(hr_employee_attrition, 'JobRole')
# MaritalStatus
hr_employee_attrition = dummyConversion(hr_employee_attrition, 'MaritalStatus')
# OverTime
hr_employee_attrition = dummyConversion(hr_employee_attrition, 'OverTime')

hr_employee_attrition  

### Attributes to be dropped.

In [None]:
# Drop Attrition (Class Label)
hr_employee_attrition = hr_employee_attrition.drop(['Attrition'], axis=1)

# EmployeeNumber (Index)
hr_employee_attrition = hr_employee_attrition.drop(['EmployeeNumber'], axis=1)

# Drop EmployeeCount = 1
hr_employee_attrition = hr_employee_attrition.drop(['EmployeeCount'], axis=1)
# Drop Over18 = Y
hr_employee_attrition = hr_employee_attrition.drop(['Over18'], axis=1)
# Drop StandardHours = 80
hr_employee_attrition = hr_employee_attrition.drop(['StandardHours'], axis=1)

# Drop JobLevel (High Correlation, 95%)
hr_employee_attrition = hr_employee_attrition.drop(['JobLevel'], axis=1)
# Drop MonthlyIncome (High Correlation, 95%)
hr_employee_attrition = hr_employee_attrition.drop(['MonthlyIncome'], axis=1)


hr_employee_attrition

### Save hr_employee to CSV file.

In [None]:
hr_employee = hr_employee_attrition
hr_employee.to_csv('./Data/HR_Employee.csv')