In [5]:
import pandas as pd
from scipy.stats import pearsonr, spearmanr
import numpy as np

In [3]:
data = pd.read_csv('Absenteeism_at_work.tsv', sep='\t',low_memory=False)

In [6]:
data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 740 entries, 0 to 739
Data columns (total 21 columns):
 #   Column                           Non-Null Count  Dtype  
---  ------                           --------------  -----  
 0   ID                               740 non-null    int64  
 1   Reason for absence               740 non-null    int64  
 2   Month of absence                 740 non-null    int64  
 3   Day of the week                  740 non-null    int64  
 4   Seasons                          740 non-null    int64  
 5   Transportation expense           740 non-null    int64  
 6   Distance from Residence to Work  740 non-null    int64  
 7   Service time                     740 non-null    int64  
 8   Age                              740 non-null    int64  
 9   Work load Average/day            740 non-null    float64
 10  Hit target                       740 non-null    int64  
 11  Disciplinary failure             740 non-null    int64  
 12  Education             

In [13]:
data['Reason for absence']

0      26
1       0
2      23
3       7
4      23
       ..
735    14
736    11
737     0
738     0
739     0
Name: Reason for absence, Length: 740, dtype: int64

In [7]:
np.corrcoef(data['Day of the week'], data['Reason for absence'])

array([[1.        , 0.11631924],
       [0.11631924, 1.        ]])

In [14]:
pearsonr(data['Day of the week'], data['Reason for absence'])

(0.11631924393042804, 0.001526270295711275)

In [15]:
spearmanr(data['Day of the week'], data['Reason for absence'])

SpearmanrResult(correlation=0.12246263308006688, pvalue=0.0008429727110657136)

# Day of the week and reason for absence

There is a weak positive correlation that is signifigant. The day of the week slightly impacts the reason for absence.

In [17]:
pearsonr(data['Month of absence'], data['Age'])

(-0.0015195444984268044, 0.9670836591482739)

In [18]:
spearmanr(data['Month of absence'], data['Age'])

SpearmanrResult(correlation=0.0016805552597149393, pvalue=0.9635981476773132)

# Month of absence and Age

There is no correlation between the month of absence and Age. The month of absence is not impacted by Age.

In [20]:
pearsonr(data['Hit target'], data['Disciplinary failure'])

(-0.1479708344455568, 5.329171363725385e-05)

In [21]:
spearmanr(data['Hit target'], data['Disciplinary failure'])

SpearmanrResult(correlation=-0.11472538854357511, pvalue=0.001772504612734226)

# Hit Target and Disciplinary Failure

The hit target and disciplinary failure have a weak negative correlation and is significant.

In [22]:
pearsonr(data['Service time'], data['Reason for absence'])

(0.048425115424340914, 0.18822416547756862)

In [23]:
spearmanr(data['Service time'], data['Reason for absence'])

SpearmanrResult(correlation=0.08398925927149598, pvalue=0.022316883034199483)

# Service time and Reason for absence

The Service Time and Reason for absence have no correlation and are not significant.

In [26]:
pearsonr(data['Transportation expense'], data['Distance from Residence to Work'])

(0.26218311063190775, 4.2533009832573875e-13)

In [27]:
spearmanr(data['Transportation expense'], data['Distance from Residence to Work'])

SpearmanrResult(correlation=0.287267232417557, pvalue=1.588177816180632e-15)

# Transportaion Expense and Distance from Residence to Work

The Transportaion Expense and Distance from Residence to Work have a noticeable postive correlation that is significant.

In [30]:
pearsonr(data['Age'], data['Pet'])

(-0.23122599912658473, 1.9420065119551898e-10)

In [31]:
spearmanr(data['Age'], data['Pet'])

SpearmanrResult(correlation=-0.27576995864941006, pvalue=2.2121000884657082e-14)

# Age and Pet

The Age and Pet have a noticeable negative correlation that is significant.

In [32]:
pearsonr(data['Absenteeism time in hours'], data['Day of the week'])

(-0.12436061376972112, 0.0006978426162534148)

In [33]:
spearmanr(data['Absenteeism time in hours'], data['Day of the week'])

SpearmanrResult(correlation=-0.0945472314370009, pvalue=0.01007101146881483)

# Absenteeism time in hours and Day of the week

The Absenteeism time in hours and Day of the week have a weak negative correlation that is slightly significant.