In [1]:
import pandas as pd

In [2]:
absenteeism_dataset_raw = pd.read_csv("absenteeism_dataset_raw.csv")

In [3]:
absenteeism_dataset_raw.head(10)

Unnamed: 0,ID,Reason for Absence,Date,Transportation Expense,Distance to Work,Age,Daily Work Load Average,Body Mass Index,Education,Children,Pets,Absenteeism Time in Hours
0,11,26,07/07/2015,289,36,33,239.554,30,1,2,1,4
1,36,0,14/07/2015,118,13,50,239.554,31,1,1,0,0
2,3,23,15/07/2015,179,51,38,239.554,31,1,0,0,2
3,7,7,16/07/2015,279,5,39,239.554,24,1,2,0,4
4,11,23,23/07/2015,289,36,33,239.554,30,1,2,1,2
5,3,23,10/07/2015,179,51,38,239.554,31,1,0,0,2
6,10,22,17/07/2015,361,52,28,239.554,27,1,1,4,8
7,20,23,24/07/2015,260,50,36,239.554,23,1,4,0,4
8,14,19,06/07/2015,155,12,34,239.554,25,1,2,0,40
9,1,22,13/07/2015,235,11,37,239.554,29,3,1,1,8


In [4]:
df = absenteeism_dataset_raw.copy()

In [5]:
pd.options.display.max_columns = None

In [6]:
df

Unnamed: 0,ID,Reason for Absence,Date,Transportation Expense,Distance to Work,Age,Daily Work Load Average,Body Mass Index,Education,Children,Pets,Absenteeism Time in Hours
0,11,26,07/07/2015,289,36,33,239.554,30,1,2,1,4
1,36,0,14/07/2015,118,13,50,239.554,31,1,1,0,0
2,3,23,15/07/2015,179,51,38,239.554,31,1,0,0,2
3,7,7,16/07/2015,279,5,39,239.554,24,1,2,0,4
4,11,23,23/07/2015,289,36,33,239.554,30,1,2,1,2
...,...,...,...,...,...,...,...,...,...,...,...,...
695,17,10,23/05/2018,179,22,40,237.656,22,2,2,0,8
696,28,6,23/05/2018,225,26,28,237.656,24,1,1,2,3
697,18,10,24/05/2018,330,16,28,237.656,25,2,0,0,8
698,25,23,24/05/2018,235,16,32,237.656,25,3,0,0,2


In [7]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 700 entries, 0 to 699
Data columns (total 12 columns):
 #   Column                     Non-Null Count  Dtype  
---  ------                     --------------  -----  
 0   ID                         700 non-null    int64  
 1   Reason for Absence         700 non-null    int64  
 2   Date                       700 non-null    object 
 3   Transportation Expense     700 non-null    int64  
 4   Distance to Work           700 non-null    int64  
 5   Age                        700 non-null    int64  
 6   Daily Work Load Average    700 non-null    float64
 7   Body Mass Index            700 non-null    int64  
 8   Education                  700 non-null    int64  
 9   Children                   700 non-null    int64  
 10  Pets                       700 non-null    int64  
 11  Absenteeism Time in Hours  700 non-null    int64  
dtypes: float64(1), int64(10), object(1)
memory usage: 65.8+ KB


In [8]:
if df.isnull().sum().sum() == 0:
    print("No missing values in the dataset")
else:
    print("Missing values found!")

No missing values in the dataset


## ID

In [9]:
# 'ID' column is a unique identifier and has no analytical value — dropped

In [10]:
df = df.drop(['ID'], axis = 1)

In [11]:
df.head(5)

Unnamed: 0,Reason for Absence,Date,Transportation Expense,Distance to Work,Age,Daily Work Load Average,Body Mass Index,Education,Children,Pets,Absenteeism Time in Hours
0,26,07/07/2015,289,36,33,239.554,30,1,2,1,4
1,0,14/07/2015,118,13,50,239.554,31,1,1,0,0
2,23,15/07/2015,179,51,38,239.554,31,1,0,0,2
3,7,16/07/2015,279,5,39,239.554,24,1,2,0,4
4,23,23/07/2015,289,36,33,239.554,30,1,2,1,2


## Reason for Absence

#### Reason Index

In [12]:
df["Reason for Absence"].max()

28

In [13]:
pd.unique(df["Reason for Absence"])

array([26,  0, 23,  7, 22, 19,  1, 11, 14, 21, 10, 13, 28, 18, 25, 24,  6,
       27, 17,  8, 12,  5,  9, 15,  4,  3,  2, 16], dtype=int64)

In [14]:
#should be 29, 0-28

In [15]:
sorted(df["Reason for Absence"].unique())

[0,
 1,
 2,
 3,
 4,
 5,
 6,
 7,
 8,
 9,
 10,
 11,
 12,
 13,
 14,
 15,
 16,
 17,
 18,
 19,
 21,
 22,
 23,
 24,
 25,
 26,
 27,
 28]

In [16]:
# Reason code 20 is not present in the dataset, indicating that this category has not occurred.
# Since "Reason for Absence" is a nominal categorical variable based on an indexed list,
# missing categories simply reflect unobserved values in this sample — not missing data.

In [17]:
# ✅ Pre-check: Expecting no missing or incorrect values — 
# each row should contain exactly one valid reason for absence.
# All values are within the expected range, and each record is assigned to a single category.

In [18]:
reason_columns = pd.get_dummies(df['Reason for Absence']).astype(int)
reason_columns

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,21,22,23,24,25,26,27,28
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0
1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0
3,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
4,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
695,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
696,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
697,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
698,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0


In [19]:
reason_columns['check'] = reason_columns.sum(axis=1)
reason_columns

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,21,22,23,24,25,26,27,28,check
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,1
1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1
2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,1
3,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1
4,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
695,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1
696,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1
697,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1
698,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,1


In [20]:
reason_columns['check'].sum(axis=0)

700

In [21]:
df.shape[0]

700

In [22]:
reason_columns['check'].unique()

array([1], dtype=int64)

In [23]:
# No missing or incorrect values found — each row contains exactly one valid reason for absence.
# All unique values are within the expected range, and each record is assigned to a single category.

In [24]:
reason_columns = reason_columns.drop(['check'],axis = 1)
reason_columns.head(5)

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,21,22,23,24,25,26,27,28
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0
1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0
3,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
4,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0


In [25]:
#dropping reason 0

In [26]:
reason_columns = pd.get_dummies(df['Reason for Absence'], drop_first = True)

In [27]:
reason_columns.head(5).astype(int)

Unnamed: 0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,21,22,23,24,25,26,27,28
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0
1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0
3,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
4,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0


## Group the Reason for Absence

##### as there are a lot, will be grouped a similar group absent into several groups

In [28]:
print(df.columns.values)
print(reason_columns.columns.values)

['Reason for Absence' 'Date' 'Transportation Expense' 'Distance to Work'
 'Age' 'Daily Work Load Average' 'Body Mass Index' 'Education' 'Children'
 'Pets' 'Absenteeism Time in Hours']
[ 1  2  3  4  5  6  7  8  9 10 11 12 13 14 15 16 17 18 19 21 22 23 24 25
 26 27 28]


In [29]:
# to avoid multicollinearity between soon 4 groups of absence,, dropping reason for absence column for now

In [30]:
df = df.drop(["Reason for Absence"], axis = 1)

In [31]:
df.head(5)

Unnamed: 0,Date,Transportation Expense,Distance to Work,Age,Daily Work Load Average,Body Mass Index,Education,Children,Pets,Absenteeism Time in Hours
0,07/07/2015,289,36,33,239.554,30,1,2,1,4
1,14/07/2015,118,13,50,239.554,31,1,1,0,0
2,15/07/2015,179,51,38,239.554,31,1,0,0,2
3,16/07/2015,279,5,39,239.554,24,1,2,0,4
4,23/07/2015,289,36,33,239.554,30,1,2,1,2


In [32]:
reason_type_1 = reason_columns.loc[:, 1:14].max(axis=1).astype(int)
reason_type_2 = reason_columns.loc[:, 15:17].max(axis=1).astype(int)
reason_type_3 = reason_columns.loc[:, 18:21].max(axis=1).astype(int)
reason_type_4 = reason_columns.loc[:, 22:].max(axis=1).astype(int)

In [33]:
#combining the reasons group to the df

In [34]:
df = pd.concat([df, reason_type_1, reason_type_2, reason_type_3, reason_type_4], 
               axis = 1)

In [35]:
df.head(5)

Unnamed: 0,Date,Transportation Expense,Distance to Work,Age,Daily Work Load Average,Body Mass Index,Education,Children,Pets,Absenteeism Time in Hours,0,1,2,3
0,07/07/2015,289,36,33,239.554,30,1,2,1,4,0,0,0,1
1,14/07/2015,118,13,50,239.554,31,1,1,0,0,0,0,0,0
2,15/07/2015,179,51,38,239.554,31,1,0,0,2,0,0,0,1
3,16/07/2015,279,5,39,239.554,24,1,2,0,4,1,0,0,0
4,23/07/2015,289,36,33,239.554,30,1,2,1,2,0,0,0,1


In [36]:
df.columns.values

array(['Date', 'Transportation Expense', 'Distance to Work', 'Age',
       'Daily Work Load Average', 'Body Mass Index', 'Education',
       'Children', 'Pets', 'Absenteeism Time in Hours', 0, 1, 2, 3],
      dtype=object)

In [37]:
column_names = ['Date', 'Transportation Expense', 'Distance to Work', 'Age',
       'Daily Work Load Average', 'Body Mass Index', 'Education',
       'Children', 'Pets', 'Absenteeism Time in Hours', 'Reason_1', 'Reason_2', 'Reason_3', 'Reason_4']

In [38]:
df.columns = column_names

In [39]:
df.head(5)

Unnamed: 0,Date,Transportation Expense,Distance to Work,Age,Daily Work Load Average,Body Mass Index,Education,Children,Pets,Absenteeism Time in Hours,Reason_1,Reason_2,Reason_3,Reason_4
0,07/07/2015,289,36,33,239.554,30,1,2,1,4,0,0,0,1
1,14/07/2015,118,13,50,239.554,31,1,1,0,0,0,0,0,0
2,15/07/2015,179,51,38,239.554,31,1,0,0,2,0,0,0,1
3,16/07/2015,279,5,39,239.554,24,1,2,0,4,1,0,0,0
4,23/07/2015,289,36,33,239.554,30,1,2,1,2,0,0,0,1


In [40]:
# reordering columns

In [41]:
column_name_reorder = ['Reason_1', 'Reason_2', 'Reason_3', 'Reason_4', 'Date', 'Transportation Expense', 'Distance to Work', 'Age',
       'Daily Work Load Average', 'Body Mass Index', 'Education',
       'Children', 'Pets', 'Absenteeism Time in Hours']

In [42]:
df = df[column_name_reorder]
df.head(3)

Unnamed: 0,Reason_1,Reason_2,Reason_3,Reason_4,Date,Transportation Expense,Distance to Work,Age,Daily Work Load Average,Body Mass Index,Education,Children,Pets,Absenteeism Time in Hours
0,0,0,0,1,07/07/2015,289,36,33,239.554,30,1,2,1,4
1,0,0,0,0,14/07/2015,118,13,50,239.554,31,1,1,0,0
2,0,0,0,1,15/07/2015,179,51,38,239.554,31,1,0,0,2


## Checkpoint

In [43]:
df_reason_mod = df.copy()
df_reason_mod.head(3)

Unnamed: 0,Reason_1,Reason_2,Reason_3,Reason_4,Date,Transportation Expense,Distance to Work,Age,Daily Work Load Average,Body Mass Index,Education,Children,Pets,Absenteeism Time in Hours
0,0,0,0,1,07/07/2015,289,36,33,239.554,30,1,2,1,4
1,0,0,0,0,14/07/2015,118,13,50,239.554,31,1,1,0,0
2,0,0,0,1,15/07/2015,179,51,38,239.554,31,1,0,0,2


## Date

In [44]:
# convert to timestamp
# extracting the Month and Day of the week
# will drop date 

In [45]:
df_reason_mod.head(3)

Unnamed: 0,Reason_1,Reason_2,Reason_3,Reason_4,Date,Transportation Expense,Distance to Work,Age,Daily Work Load Average,Body Mass Index,Education,Children,Pets,Absenteeism Time in Hours
0,0,0,0,1,07/07/2015,289,36,33,239.554,30,1,2,1,4
1,0,0,0,0,14/07/2015,118,13,50,239.554,31,1,1,0,0
2,0,0,0,1,15/07/2015,179,51,38,239.554,31,1,0,0,2


In [46]:
type(df_reason_mod["Date"][0])

str

In [47]:
df_reason_mod["Date"]

0      07/07/2015
1      14/07/2015
2      15/07/2015
3      16/07/2015
4      23/07/2015
          ...    
695    23/05/2018
696    23/05/2018
697    24/05/2018
698    24/05/2018
699    31/05/2018
Name: Date, Length: 700, dtype: object

In [48]:
df_reason_mod["Date"] = pd.to_datetime(df_reason_mod["Date"],
                                      format = '%d/%m/%Y')

In [49]:
df_reason_mod["Date"]

0     2015-07-07
1     2015-07-14
2     2015-07-15
3     2015-07-16
4     2015-07-23
         ...    
695   2018-05-23
696   2018-05-23
697   2018-05-24
698   2018-05-24
699   2018-05-31
Name: Date, Length: 700, dtype: datetime64[ns]

In [50]:
type(df_reason_mod["Date"][0])

pandas._libs.tslibs.timestamps.Timestamp

In [51]:
# Month Values 

In [52]:
list_months = []

In [53]:
for i in range (700):
    list_months.append(df_reason_mod["Date"][i].month)

In [54]:
df_reason_mod["Month Value"] = list_months

In [55]:
# Day of the week

In [56]:
def date_to_weekday(date_value):
    return date_value.weekday()

In [57]:
df_reason_mod["Day of the Week"] = df_reason_mod["Date"].apply(date_to_weekday)

In [58]:
df_reason_mod.head(3)

Unnamed: 0,Reason_1,Reason_2,Reason_3,Reason_4,Date,Transportation Expense,Distance to Work,Age,Daily Work Load Average,Body Mass Index,Education,Children,Pets,Absenteeism Time in Hours,Month Value,Day of the Week
0,0,0,0,1,2015-07-07,289,36,33,239.554,30,1,2,1,4,7,1
1,0,0,0,0,2015-07-14,118,13,50,239.554,31,1,1,0,0,7,1
2,0,0,0,1,2015-07-15,179,51,38,239.554,31,1,0,0,2,7,2


In [59]:
# reorder the month next to the date, will not drop the date as perhaps it might needed 

In [60]:
df_reason_mod.columns.values

array(['Reason_1', 'Reason_2', 'Reason_3', 'Reason_4', 'Date',
       'Transportation Expense', 'Distance to Work', 'Age',
       'Daily Work Load Average', 'Body Mass Index', 'Education',
       'Children', 'Pets', 'Absenteeism Time in Hours', 'Month Value',
       'Day of the Week'], dtype=object)

In [61]:
column_names_full_reorder = ['Reason_1', 'Reason_2', 'Reason_3', 'Reason_4', 'Date', 'Month Value', 'Day of the Week',
       'Transportation Expense', 'Distance to Work', 'Age',
       'Daily Work Load Average', 'Body Mass Index', 'Education', 'Children',
       'Pets', 'Absenteeism Time in Hours']

In [62]:
df_reason_mod = df_reason_mod[column_names_full_reorder]

In [63]:
df_reason_mod.head(3)

Unnamed: 0,Reason_1,Reason_2,Reason_3,Reason_4,Date,Month Value,Day of the Week,Transportation Expense,Distance to Work,Age,Daily Work Load Average,Body Mass Index,Education,Children,Pets,Absenteeism Time in Hours
0,0,0,0,1,2015-07-07,7,1,289,36,33,239.554,30,1,2,1,4
1,0,0,0,0,2015-07-14,7,1,118,13,50,239.554,31,1,1,0,0
2,0,0,0,1,2015-07-15,7,2,179,51,38,239.554,31,1,0,0,2


## 2nd Check Point

In [64]:
df_reason_date_mod = df_reason_mod.copy()
df_reason_date_mod.head(3)

Unnamed: 0,Reason_1,Reason_2,Reason_3,Reason_4,Date,Month Value,Day of the Week,Transportation Expense,Distance to Work,Age,Daily Work Load Average,Body Mass Index,Education,Children,Pets,Absenteeism Time in Hours
0,0,0,0,1,2015-07-07,7,1,289,36,33,239.554,30,1,2,1,4
1,0,0,0,0,2015-07-14,7,1,118,13,50,239.554,31,1,1,0,0
2,0,0,0,1,2015-07-15,7,2,179,51,38,239.554,31,1,0,0,2


In [65]:
df_reason_date_mod.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 700 entries, 0 to 699
Data columns (total 16 columns):
 #   Column                     Non-Null Count  Dtype         
---  ------                     --------------  -----         
 0   Reason_1                   700 non-null    int32         
 1   Reason_2                   700 non-null    int32         
 2   Reason_3                   700 non-null    int32         
 3   Reason_4                   700 non-null    int32         
 4   Date                       700 non-null    datetime64[ns]
 5   Month Value                700 non-null    int64         
 6   Day of the Week            700 non-null    int64         
 7   Transportation Expense     700 non-null    int64         
 8   Distance to Work           700 non-null    int64         
 9   Age                        700 non-null    int64         
 10  Daily Work Load Average    700 non-null    float64       
 11  Body Mass Index            700 non-null    int64         
 12  Educatio

## Education

In [66]:
df_reason_date_mod['Education'].unique()

array([1, 3, 2, 4], dtype=int64)

1 = High School
2 = Graduate
3 = Postgraduate
4 = A master or a Doctor

In [67]:
df_reason_date_mod['Education'].value_counts()

Education
1    583
3     73
2     40
4      4
Name: count, dtype: int64

In [68]:
# as 2-4 show no significant number, so it better  just group them

In [69]:
df_reason_date_mod['Education'] = df_reason_date_mod['Education'].map({1:0,
                                                                 2:1,
                                                                 3:1,
                                                                 4:1})

In [70]:
df_reason_date_mod['Education'].unique()

array([0, 1], dtype=int64)

In [71]:
df_reason_date_mod['Education'].value_counts()

Education
0    583
1    117
Name: count, dtype: int64

## Final Check Point

In [72]:
df_cleaned = df_reason_date_mod.copy()
df_cleaned.head(3)

Unnamed: 0,Reason_1,Reason_2,Reason_3,Reason_4,Date,Month Value,Day of the Week,Transportation Expense,Distance to Work,Age,Daily Work Load Average,Body Mass Index,Education,Children,Pets,Absenteeism Time in Hours
0,0,0,0,1,2015-07-07,7,1,289,36,33,239.554,30,0,2,1,4
1,0,0,0,0,2015-07-14,7,1,118,13,50,239.554,31,0,1,0,0
2,0,0,0,1,2015-07-15,7,2,179,51,38,239.554,31,0,0,0,2


In [73]:
df_cleaned.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 700 entries, 0 to 699
Data columns (total 16 columns):
 #   Column                     Non-Null Count  Dtype         
---  ------                     --------------  -----         
 0   Reason_1                   700 non-null    int32         
 1   Reason_2                   700 non-null    int32         
 2   Reason_3                   700 non-null    int32         
 3   Reason_4                   700 non-null    int32         
 4   Date                       700 non-null    datetime64[ns]
 5   Month Value                700 non-null    int64         
 6   Day of the Week            700 non-null    int64         
 7   Transportation Expense     700 non-null    int64         
 8   Distance to Work           700 non-null    int64         
 9   Age                        700 non-null    int64         
 10  Daily Work Load Average    700 non-null    float64       
 11  Body Mass Index            700 non-null    int64         
 12  Educatio

In [74]:
pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', None)
pd.set_option('display.width', None)
display(df_cleaned)

Unnamed: 0,Reason_1,Reason_2,Reason_3,Reason_4,Date,Month Value,Day of the Week,Transportation Expense,Distance to Work,Age,Daily Work Load Average,Body Mass Index,Education,Children,Pets,Absenteeism Time in Hours
0,0,0,0,1,2015-07-07,7,1,289,36,33,239.554,30,0,2,1,4
1,0,0,0,0,2015-07-14,7,1,118,13,50,239.554,31,0,1,0,0
2,0,0,0,1,2015-07-15,7,2,179,51,38,239.554,31,0,0,0,2
3,1,0,0,0,2015-07-16,7,3,279,5,39,239.554,24,0,2,0,4
4,0,0,0,1,2015-07-23,7,3,289,36,33,239.554,30,0,2,1,2
5,0,0,0,1,2015-07-10,7,4,179,51,38,239.554,31,0,0,0,2
6,0,0,0,1,2015-07-17,7,4,361,52,28,239.554,27,0,1,4,8
7,0,0,0,1,2015-07-24,7,4,260,50,36,239.554,23,0,4,0,4
8,0,0,1,0,2015-07-06,7,0,155,12,34,239.554,25,0,2,0,40
9,0,0,0,1,2015-07-13,7,0,235,11,37,239.554,29,1,1,1,8


In [75]:
from datetime import datetime

# Create timestamp
timestamp = datetime.now().strftime("%Y-%m-%d")

# Create dynamic filename
filename = f"absenteeism_dataset_cleaned_{timestamp}.csv"

# Export to CSV
df_cleaned.to_csv(filename, index=False)