In [30]:
import pandas as pd
import scipy.stats as st

In [31]:
dataset = pd.read_csv('general_data.csv')

# Checking for null values

In [32]:
dataset.isnull().sum()

Age                         0
Attrition                   0
BusinessTravel              0
Department                  0
DistanceFromHome            0
Education                   0
EducationField              0
EmployeeCount               0
EmployeeID                  0
Gender                      0
JobLevel                    0
JobRole                     0
MaritalStatus               0
MonthlyIncome               0
NumCompaniesWorked         19
Over18                      0
PercentSalaryHike           0
StandardHours               0
StockOptionLevel            0
TotalWorkingYears           9
TrainingTimesLastYear       0
YearsAtCompany              0
YearsSinceLastPromotion     0
YearsWithCurrManager        0
dtype: int64

# Removing null values

In [33]:
dataset.dropna(inplace=True)

# Creating dummies

In [34]:
dataset = pd.get_dummies(dataset,drop_first=True)

In [35]:
dataset.columns

Index(['Age', 'DistanceFromHome', 'Education', 'EmployeeCount', 'EmployeeID',
       'JobLevel', 'MonthlyIncome', 'NumCompaniesWorked', 'PercentSalaryHike',
       'StandardHours', 'StockOptionLevel', 'TotalWorkingYears',
       'TrainingTimesLastYear', 'YearsAtCompany', 'YearsSinceLastPromotion',
       'YearsWithCurrManager', 'Attrition_Yes',
       'BusinessTravel_Travel_Frequently', 'BusinessTravel_Travel_Rarely',
       'Department_Research & Development', 'Department_Sales',
       'EducationField_Life Sciences', 'EducationField_Marketing',
       'EducationField_Medical', 'EducationField_Other',
       'EducationField_Technical Degree', 'Gender_Male',
       'JobRole_Human Resources', 'JobRole_Laboratory Technician',
       'JobRole_Manager', 'JobRole_Manufacturing Director',
       'JobRole_Research Director', 'JobRole_Research Scientist',
       'JobRole_Sales Executive', 'JobRole_Sales Representative',
       'MaritalStatus_Married', 'MaritalStatus_Single'],
      dtype='obje

In [36]:
dataset.describe(include='all') #checking for describe dataset

Unnamed: 0,Age,DistanceFromHome,Education,EmployeeCount,EmployeeID,JobLevel,MonthlyIncome,NumCompaniesWorked,PercentSalaryHike,StandardHours,...,JobRole_Human Resources,JobRole_Laboratory Technician,JobRole_Manager,JobRole_Manufacturing Director,JobRole_Research Director,JobRole_Research Scientist,JobRole_Sales Executive,JobRole_Sales Representative,MaritalStatus_Married,MaritalStatus_Single
count,4382.0,4382.0,4382.0,4382.0,4382.0,4382.0,4382.0,4382.0,4382.0,4382.0,...,4382.0,4382.0,4382.0,4382.0,4382.0,4382.0,4382.0,4382.0,4382.0,4382.0
mean,36.933364,9.198996,2.912369,1.0,2207.804884,2.063898,65061.702419,2.693291,15.210634,8.0,...,0.0356,0.176403,0.069603,0.097901,0.054085,0.198996,0.222501,0.056139,0.45801,0.32063
std,9.137272,8.105396,1.024728,0.0,1271.688783,1.106115,47142.310175,2.497832,3.663007,0.0,...,0.185312,0.381207,0.254506,0.297214,0.226211,0.399291,0.415973,0.230216,0.498291,0.466772
min,18.0,1.0,1.0,1.0,1.0,1.0,10090.0,0.0,11.0,8.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
25%,30.0,2.0,2.0,1.0,1108.25,1.0,29110.0,1.0,12.0,8.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
50%,36.0,7.0,3.0,1.0,2208.5,2.0,49190.0,2.0,14.0,8.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
75%,43.0,14.0,4.0,1.0,3308.75,3.0,83790.0,4.0,18.0,8.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,1.0
max,60.0,29.0,5.0,1.0,4409.0,5.0,199990.0,9.0,25.0,8.0,...,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0


In [37]:
dataset.shape # looking for the shape

(4382, 37)

# All Statistical Tests begin -:

#  Wilcoxon Sign Test

In [38]:
distanceFromHome1 = dataset['DistanceFromHome'][:dataset.shape[0]//2]
distanceFromHome2 = dataset['DistanceFromHome'][dataset.shape[0]//2:]

stat,p = st.wilcoxon(distanceFromHome1,distanceFromHome2)
print(p)

0.728944378738797


# Friedman test

In [39]:
distanceFromHome1 = dataset['DistanceFromHome'][:dataset.shape[0]//3]
distanceFromHome2 = dataset['DistanceFromHome'][dataset.shape[0]//3*2+2:]
distanceFromHome3 = dataset['DistanceFromHome'][dataset.shape[0]//3*2+2:]

stat,p = st.friedmanchisquare(distanceFromHome1,distanceFromHome2,distanceFromHome3)
print(p)

0.672958232388412


#  Mann whitney test

In [40]:
yearsWithCurrManager = dataset['YearsWithCurrManager']
education = dataset['Education']

stat,p = st.mannwhitneyu(yearsWithCurrManager,education)
print(p)

1.316734632613527e-06


# kruskal wallis test

In [41]:
yearsWithCurrManager = dataset['YearsWithCurrManager']
education = dataset['Education']
numCompaniesWorked = dataset['NumCompaniesWorked']

stat,p = st.kruskal(yearsWithCurrManager,education,numCompaniesWorked)
print(p)

2.637301301864434e-105


# chi-sqaure Test

In [42]:
businessTravel = dataset['JobRole_Manager']
department = dataset['Attrition_Yes']

chitable = pd.crosstab(department,businessTravel)
stat,p,dof,exepected = st.chi2_contingency(chitable)

print(p)

0.2884669470285437
