# DATA INGESTION MODULE

**GOAL:** Load dataframe using Dask Cluster by chunks, then serialize dataset for next processing steps

In [86]:
#IMPORTS
import gc
import os
import pandas as pd
import datetime as dt
import dask
import dask.dataframe as dd
from dask.distributed import LocalCluster,client

In [2]:
#GLOBAL VARIABLES
AGEL_HOME_DIR = "/home/pavol/Plocha/Agel"
AGEL_DATA_DIR = "/home/pavol/Plocha/Agel/data"
AGEL_SCRIPT_DIR = "/home/pavol/Plocha/Agel/scripts"

DATA_FILEPATH = "/home/pavol/Plocha/Agel/data/diabetic_data.csv"
DATA_MAPPING_FILEPATH = "/home/pavol/Plocha/Agel/data/IDS_mapping.csv"

In [3]:
#Change to working directory where data is located with all scripts
os.chdir(AGEL_SCRIPT_DIR)

### CREATE LOCAL DASK CLUSTER

In [97]:
cluster = LocalCluster(
				n_workers=2,          #Nodes/workers
				processes=True,
				threads_per_worker=1,#adjust by your preferences...
                memory_limit='2GB'
			)

In [98]:
cluster.status

<Status.running: 'running'>

In [99]:
cluster

0,1
Dashboard: http://127.0.0.1:8787/status,Workers: 2
Total threads: 2,Total memory: 3.73 GiB
Status: running,Using processes: True

0,1
Comm: tcp://127.0.0.1:42745,Workers: 2
Dashboard: http://127.0.0.1:8787/status,Total threads: 2
Started: Just now,Total memory: 3.73 GiB

0,1
Comm: tcp://127.0.0.1:45285,Total threads: 1
Dashboard: http://127.0.0.1:34373/status,Memory: 1.86 GiB
Nanny: tcp://127.0.0.1:39509,
Local directory: /tmp/dask-scratch-space/worker-54ipfn4k,Local directory: /tmp/dask-scratch-space/worker-54ipfn4k

0,1
Comm: tcp://127.0.0.1:43133,Total threads: 1
Dashboard: http://127.0.0.1:33969/status,Memory: 1.86 GiB
Nanny: tcp://127.0.0.1:42037,
Local directory: /tmp/dask-scratch-space/worker-71fojg2z,Local directory: /tmp/dask-scratch-space/worker-71fojg2z


In [None]:
cluster.scale_up(n=2, cores=2, memory="1GB")    #Scale up by 2nodes/workers, each of them by 2 cores and add more memory 1GB
cluster.scale_down(n=1)                         #Scale down by 1 node/worker
cluster.adapt()                                 #Adaptive strategy for cluster to define <MIN,MAX> workers

### LOAD DASK DATAFRAME USING CLUSTER

<img src="https://docs.dask.org/en/stable/_images/dask-dataframe.svg" width=250 height=400 />

In [7]:
#pandas Dataframe
pdf = None

#Load dataframe using batches...
with cluster.get_client() as client:
    try:

        #Dask loads dataset by chunks and into many smaller datasets by row-wise (vertically)
        ddf = dd.read_csv(DATA_FILEPATH, header=0, blocksize=5e6,  # ~5MB chunks
                          dtype={'A1Cresult': 'object', 'diag_1': 'object', 'max_glu_serum': 'object'})

        #Conversion of object column to string values
        ddf['A1Cresult'] = ddf['A1Cresult'].astype('|S')
        ddf['diag_1'] = ddf['diag_1'].astype('|S')
        ddf['max_glu_serum'] = ddf['max_glu_serum'].astype('|S')

        print(ddf.info)
        print(ddf.head(n=5, npartitions=2, compute= True))

        #Convert from distributed Dask dataset into pandas Dataset - only for small datasets that fits into memory
        #Otherwise you will have to perform operations with dask and
        # after all processing steps, you serialize result dataset using .parquet or other such as .HDF5 format
        pdf =  ddf.compute()
    except Exception as e:
        print(f'Exception: {e}')

<bound method DataFrame.info of Dask DataFrame Structure:
              encounter_id patient_nbr    race  gender     age  weight admission_type_id discharge_disposition_id admission_source_id time_in_hospital payer_code medical_specialty num_lab_procedures num_procedures num_medications number_outpatient number_emergency number_inpatient diag_1  diag_2  diag_3 number_diagnoses max_glu_serum A1Cresult metformin repaglinide nateglinide chlorpropamide glimepiride acetohexamide glipizide glyburide tolbutamide pioglitazone rosiglitazone acarbose miglitol troglitazone tolazamide examide citoglipton insulin glyburide-metformin glipizide-metformin glimepiride-pioglitazone metformin-rosiglitazone metformin-pioglitazone  change diabetesMed readmitted
npartitions=3                                                                                                                                                                                                                                            

### CLOSE LOCALCLUSTER

In [100]:
cluster.close()

In [101]:
cluster.status

<Status.closed: 'closed'>

In [102]:
del cluster
gc.collect()

1774

## PANDAS DATAFRAME LOADING

For simplicity I will use pandas Dataframe due to size of the Diabetes dataset that fits into memory

In [11]:
#We have already load Pandas Dataframe using Dask and in which we converted Dask Dataframe -> Pandas Dataframe
pdf.head(10)

Unnamed: 0,encounter_id,patient_nbr,race,gender,age,weight,admission_type_id,discharge_disposition_id,admission_source_id,time_in_hospital,...,citoglipton,insulin,glyburide-metformin,glipizide-metformin,glimepiride-pioglitazone,metformin-rosiglitazone,metformin-pioglitazone,change,diabetesMed,readmitted
0,2278392,8222157,Caucasian,Female,[0-10),?,6,25,1,1,...,No,No,No,No,No,No,No,No,No,NO
1,149190,55629189,Caucasian,Female,[10-20),?,1,1,7,3,...,No,Up,No,No,No,No,No,Ch,Yes,>30
2,64410,86047875,AfricanAmerican,Female,[20-30),?,1,1,7,2,...,No,No,No,No,No,No,No,No,Yes,NO
3,500364,82442376,Caucasian,Male,[30-40),?,1,1,7,2,...,No,Up,No,No,No,No,No,Ch,Yes,NO
4,16680,42519267,Caucasian,Male,[40-50),?,1,1,7,1,...,No,Steady,No,No,No,No,No,Ch,Yes,NO
5,35754,82637451,Caucasian,Male,[50-60),?,2,1,2,3,...,No,Steady,No,No,No,No,No,No,Yes,>30
6,55842,84259809,Caucasian,Male,[60-70),?,3,1,2,4,...,No,Steady,No,No,No,No,No,Ch,Yes,NO
7,63768,114882984,Caucasian,Male,[70-80),?,1,1,7,5,...,No,No,No,No,No,No,No,No,Yes,>30
8,12522,48330783,Caucasian,Female,[80-90),?,2,1,4,13,...,No,Steady,No,No,No,No,No,Ch,Yes,NO
9,15738,63555939,Caucasian,Female,[90-100),?,3,3,4,12,...,No,Steady,No,No,No,No,No,Ch,Yes,NO


In [108]:
pdf.info

<bound method DataFrame.info of        encounter_id  patient_nbr             race  gender      age weight  \
0           2278392      8222157        Caucasian  Female   [0-10)      ?   
1            149190     55629189        Caucasian  Female  [10-20)      ?   
2             64410     86047875  AfricanAmerican  Female  [20-30)      ?   
3            500364     82442376        Caucasian    Male  [30-40)      ?   
4             16680     42519267        Caucasian    Male  [40-50)      ?   
...             ...          ...              ...     ...      ...    ...   
34398     443847548    100162476  AfricanAmerican    Male  [70-80)      ?   
34399     443847782     74694222  AfricanAmerican  Female  [80-90)      ?   
34400     443854148     41088789        Caucasian    Male  [70-80)      ?   
34401     443857166     31693671        Caucasian  Female  [80-90)      ?   
34402     443867222    175429310        Caucasian    Male  [70-80)      ?   

       admission_type_id admission_type_des

In [110]:
x = pdf.drop_duplicates(subset=['encounter_id', 'patient_nbr'])

In [114]:
len(pdf.encounter_id.unique().tolist())

101766

In [115]:
#Some patients ocurrs more
len(pdf.patient_nbr.unique().tolist())

71518

In [107]:
pdf[pdf.patient_nbr == 8222157]

Unnamed: 0,encounter_id,patient_nbr,race,gender,age,weight,admission_type_id,admission_type_desc,discharge_disposition_id,discharge_disposition_desc,...,citoglipton,insulin,glyburide-metformin,glipizide-metformin,glimepiride-pioglitazone,metformin-rosiglitazone,metformin-pioglitazone,change,diabetesMed,readmitted
0,2278392,8222157,Caucasian,Female,[0-10),?,6,,25,Not Mapped,...,No,No,No,No,No,No,No,No,No,NO


In [117]:
pdf.groupby('patient_nbr').size()

patient_nbr
135          2
378          1
729          1
774          1
927          1
            ..
189351095    1
189365864    1
189445127    1
189481478    1
189502619    1
Length: 71518, dtype: int64

In [118]:
pdf[pdf.patient_nbr == 1152]

Unnamed: 0,encounter_id,patient_nbr,race,gender,age,weight,admission_type_id,admission_type_desc,discharge_disposition_id,discharge_disposition_desc,...,citoglipton,insulin,glyburide-metformin,glipizide-metformin,glimepiride-pioglitazone,metformin-rosiglitazone,metformin-pioglitazone,change,diabetesMed,readmitted
1164,8380170,1152,AfricanAmerican,Female,[50-60),?,1,Emergency,1,Discharged to home,...,No,Steady,No,No,No,No,No,No,Yes,>30
5953,30180318,1152,AfricanAmerican,Female,[50-60),?,1,Emergency,1,Discharged to home,...,No,Down,No,No,No,No,No,Ch,Yes,>30
14180,55533660,1152,AfricanAmerican,Female,[60-70),?,1,Emergency,1,Discharged to home,...,No,Steady,No,No,No,No,No,No,Yes,>30
23623,80742510,1152,AfricanAmerican,Female,[60-70),?,1,Emergency,1,Discharged to home,...,No,Steady,No,No,No,No,No,No,Yes,>30
24642,83281464,1152,AfricanAmerican,Female,[60-70),?,1,Emergency,1,Discharged to home,...,No,Up,No,No,No,No,No,Ch,Yes,NO


In [104]:
pdf.info

<bound method DataFrame.info of        encounter_id  patient_nbr             race  gender      age weight  \
0           2278392      8222157        Caucasian  Female   [0-10)      ?   
1            149190     55629189        Caucasian  Female  [10-20)      ?   
2             64410     86047875  AfricanAmerican  Female  [20-30)      ?   
3            500364     82442376        Caucasian    Male  [30-40)      ?   
4             16680     42519267        Caucasian    Male  [40-50)      ?   
...             ...          ...              ...     ...      ...    ...   
34398     443847548    100162476  AfricanAmerican    Male  [70-80)      ?   
34399     443847782     74694222  AfricanAmerican  Female  [80-90)      ?   
34400     443854148     41088789        Caucasian    Male  [70-80)      ?   
34401     443857166     31693671        Caucasian  Female  [80-90)      ?   
34402     443867222    175429310        Caucasian    Male  [70-80)      ?   

       admission_type_id admission_type_des

### LOAD IDS_MAPPING FILE

There is also another file called 'IDS_mapping.csv'.
This file contains mapping of some columns IDs to relevant description.

As a Data Engineer it would be more suitable to have also descriptions for ID columns, to enrich our dataset with more information.

In [17]:
#1. Load mapping .csv file
ids_df = pd.read_csv(DATA_MAPPING_FILEPATH, names=["val", "desc"])
ids_df.head(20)

Unnamed: 0,val,desc
0,admission_type_id,description
1,1,Emergency
2,2,Urgent
3,3,Elective
4,4,Newborn
5,5,Not Available
6,6,
7,7,Trauma Center
8,8,Not Mapped
9,,


In [18]:
#Show some info about it
ids_df.info

<bound method DataFrame.info of                   val                                               desc
0   admission_type_id                                        description
1                   1                                          Emergency
2                   2                                             Urgent
3                   3                                           Elective
4                   4                                            Newborn
..                ...                                                ...
63                 22   Transfer from hospital inpt/same fac reslt in...
64                 23                          Born inside this hospital
65                 24                         Born outside this hospital
66                 25            Transfer from Ambulatory Surgery Center
67                 26                              Transfer from Hospice

[68 rows x 2 columns]>

In [21]:
#2. Make sure that all the column and their values are str datatype
ids_df['val'] = ids_df['val'].astype('str')
ids_df['desc'] = ids_df['desc'].astype('str')
ids_df.head(30)

Unnamed: 0,val,desc
0,admission_type_id,description
1,1,Emergency
2,2,Urgent
3,3,Elective
4,4,Newborn
5,5,Not Available
6,6,
7,7,Trauma Center
8,8,Not Mapped
9,,


As we can see IDS mapping file is a little bit messy.
In the first column is stored the name of the mapping column with the related key.
The second column contains desired description.

Before mapping to source columns in our Diabetes dataframe, we have to do some transformation of IDS dataframe
to extract source column with its keys and description.
We do so in the steps below...

In [23]:
#3.Mark ID column names
#Mark ID key values
ids_df['val_key'] = ids_df['val'].apply(lambda x: x if 'id' in x else 0)
ids_df['val_num'] = ids_df['val'].apply(lambda x: 0 if 'id' in x else 1)
ids_df['val_header'] = ids_df['val'].apply(lambda x: 1 if 'id' in x else 0)

In [24]:
#Show marked data
ids_df.head(20)

Unnamed: 0,val,desc,val_key,val_num,val_header
0,admission_type_id,description,admission_type_id,0,1
1,1,Emergency,0,1,0
2,2,Urgent,0,1,0
3,3,Elective,0,1,0
4,4,Newborn,0,1,0
5,5,Not Available,0,1,0
6,6,,0,1,0
7,7,Trauma Center,0,1,0
8,8,Not Mapped,0,1,0
9,,,0,1,0


In [25]:
#4. Now we fill the appropriate column name for its keys using replace() and ffill() method,
#   which fills down the column name.
#   Thanks this, we will know which column has which keys
_ = ids_df['val_key'] = ids_df['val_key'].replace(0, pd.NA ).ffill()
ids_df.head(100)

Unnamed: 0,val,desc,val_key,val_num,val_header
0,admission_type_id,description,admission_type_id,0,1
1,1,Emergency,admission_type_id,1,0
2,2,Urgent,admission_type_id,1,0
3,3,Elective,admission_type_id,1,0
4,4,Newborn,admission_type_id,1,0
...,...,...,...,...,...
63,22,Transfer from hospital inpt/same fac reslt in...,admission_source_id,1,0
64,23,Born inside this hospital,admission_source_id,1,0
65,24,Born outside this hospital,admission_source_id,1,0
66,25,Transfer from Ambulatory Surgery Center,admission_source_id,1,0


In [26]:
#5. Now extract the column keys to list
keys = ids_df[ids_df['val_key'] != 0]['val_key'].unique().tolist()
print(keys)

['admission_type_id', 'discharge_disposition_id', 'admission_source_id']


In [27]:
#6. After the extraction of column names, create separate dataframes for them
admission_type = ids_df[(ids_df.val_key == 'admission_type_id') & (ids_df.val_header == 0)][['val', 'desc']]
discharge_df = ids_df[(ids_df.val_key == 'discharge_disposition_id') & (ids_df.val_header == 0)][['val', 'desc']]
admission_source_df = ids_df[(ids_df.val_key == 'admission_source_id') & (ids_df.val_header == 0)][['val', 'desc']]

In [28]:
#View of the separate dataframe for admission_type
admission_type.head(10)

Unnamed: 0,val,desc
1,1.0,Emergency
2,2.0,Urgent
3,3.0,Elective
4,4.0,Newborn
5,5.0,Not Available
6,6.0,
7,7.0,Trauma Center
8,8.0,Not Mapped
9,,


Now, we have 3 separate dataframes for 3 columns. In order to do mapping of these columns in our Diabetes dataframe,
we will need to make dictionaries from these dataframe and then we will perform mapping.

In [29]:
#7. Making dictionaries from the separate dataframes in order to do mapping
admission_type_id = {}
discharge_id = {}
admission_source_id = {}

for i,row in admission_type.iterrows():
    admission_type_id[row['val']] = row['desc']

for i,row in discharge_df.iterrows():
    discharge_id[row['val']] = row['desc']

for i,row in admission_source_df.iterrows():
    admission_source_id[row['val']] = row['desc']

In [30]:
#Show dictionary items for admission_type column
list(admission_type_id.items())

[('1', 'Emergency'),
 ('2', 'Urgent'),
 ('3', 'Elective'),
 ('4', 'Newborn'),
 ('5', 'Not Available'),
 ('6', 'nan'),
 ('7', 'Trauma Center'),
 ('8', 'Not Mapped'),
 ('nan', 'nan')]

In [31]:
#Show dictionary items for discharge_id column
list(discharge_id.items())

[('1', 'Discharged to home'),
 ('2', 'Discharged/transferred to another short term hospital'),
 ('3', 'Discharged/transferred to SNF'),
 ('4', 'Discharged/transferred to ICF'),
 ('5', 'Discharged/transferred to another type of inpatient care institution'),
 ('6', 'Discharged/transferred to home with home health service'),
 ('7', 'Left AMA'),
 ('8', 'Discharged/transferred to home under care of Home IV provider'),
 ('9', 'Admitted as an inpatient to this hospital'),
 ('10', 'Neonate discharged to another hospital for neonatal aftercare'),
 ('11', 'Expired'),
 ('12', 'Still patient or expected to return for outpatient services'),
 ('13', 'Hospice / home'),
 ('14', 'Hospice / medical facility'),
 ('15',
  'Discharged/transferred within this institution to Medicare approved swing bed'),
 ('16',
  'Discharged/transferred/referred another institution for outpatient services'),
 ('17',
  'Discharged/transferred/referred to this institution for outpatient services'),
 ('18', 'nan'),
 ('19', 'E

In [32]:
#Show dictionary items for admission_source_id column
list(admission_source_id.items())

[('1', ' Physician Referral'),
 ('2', 'Clinic Referral'),
 ('3', 'HMO Referral'),
 ('4', 'Transfer from a hospital'),
 ('5', ' Transfer from a Skilled Nursing Facility (SNF)'),
 ('6', ' Transfer from another health care facility'),
 ('7', ' Emergency Room'),
 ('8', ' Court/Law Enforcement'),
 ('9', ' Not Available'),
 ('10', ' Transfer from critial access hospital'),
 ('11', 'Normal Delivery'),
 ('12', ' Premature Delivery'),
 ('13', ' Sick Baby'),
 ('14', ' Extramural Birth'),
 ('15', 'Not Available'),
 ('17', 'nan'),
 ('18', ' Transfer From Another Home Health Agency'),
 ('19', 'Readmission to Same Home Health Agency'),
 ('20', ' Not Mapped'),
 ('21', 'Unknown/Invalid'),
 ('22', ' Transfer from hospital inpt/same fac reslt in a sep claim'),
 ('23', ' Born inside this hospital'),
 ('24', ' Born outside this hospital'),
 ('25', ' Transfer from Ambulatory Surgery Center'),
 ('26', 'Transfer from Hospice')]

Now we do mapping to the Diabetes source columns using created dictionaries...
We will use lambda (anonymous) function for it, shown below

In [34]:
#8. Do mapping
pdf['admission_type_desc'] = pdf['admission_type_id'].apply(lambda x: admission_type_id.get(str(x), pd.NA))
pdf['discharge_disposition_desc'] = pdf['discharge_disposition_id'].apply(lambda x: discharge_id.get(str(x), pd.NA))
pdf['admission_source_desc'] = pdf['admission_source_id'].apply(lambda x: admission_source_id.get(str(x), pd.NA))

In [35]:
#Show source column and with its performed mapping
pdf[['admission_type_id','admission_type_desc']]

Unnamed: 0,admission_type_id,admission_type_desc
0,6,
1,1,Emergency
2,1,Emergency
3,1,Emergency
4,1,Emergency
...,...,...
34398,1,Emergency
34399,1,Emergency
34400,1,Emergency
34401,2,Urgent


In [37]:
#You can check the correctnes of the mapping by showing the related dictionory
print(list(admission_type_id.items()))

[('1', 'Emergency'), ('2', 'Urgent'), ('3', 'Elective'), ('4', 'Newborn'), ('5', 'Not Available'), ('6', 'nan'), ('7', 'Trauma Center'), ('8', 'Not Mapped'), ('nan', 'nan')]


Same applies for the other 2 columns.
But, we have to also need to change the order of columns to make sure that source ID columns and their descriptive columns are next to each other!
To save the changes, we will need to serialize the loaded and mapped dataset using Pandas pickle method

In [77]:
#Get df columns
df_cols = pdf.columns.tolist()
print(df_cols)
print(len(df_cols))

['encounter_id', 'patient_nbr', 'race', 'gender', 'age', 'weight', 'admission_type_id', 'discharge_disposition_id', 'admission_source_id', 'time_in_hospital', 'payer_code', 'medical_specialty', 'num_lab_procedures', 'num_procedures', 'num_medications', 'number_outpatient', 'number_emergency', 'number_inpatient', 'diag_1', 'diag_2', 'diag_3', 'number_diagnoses', 'max_glu_serum', 'A1Cresult', 'metformin', 'repaglinide', 'nateglinide', 'chlorpropamide', 'glimepiride', 'acetohexamide', 'glipizide', 'glyburide', 'tolbutamide', 'pioglitazone', 'rosiglitazone', 'acarbose', 'miglitol', 'troglitazone', 'tolazamide', 'examide', 'citoglipton', 'insulin', 'glyburide-metformin', 'glipizide-metformin', 'glimepiride-pioglitazone', 'metformin-rosiglitazone', 'metformin-pioglitazone', 'change', 'diabetesMed', 'readmitted', 'admission_type_desc', 'discharge_disposition_desc', 'admission_source_desc']
53


In [78]:
id1 = int(df_cols.index('admission_type_id'))
id2 = int(df_cols.index('discharge_disposition_id'))
id3 = int(df_cols.index('admission_source_id'))

In [79]:
id1

6

In [80]:
df_cols = [*df_cols[:id1+1], df_cols[-3],  df_cols[id2], df_cols[-2], df_cols[id3], df_cols[-1], *df_cols[id3+1:-3]]

In [81]:
df_cols

['encounter_id',
 'patient_nbr',
 'race',
 'gender',
 'age',
 'weight',
 'admission_type_id',
 'admission_type_desc',
 'discharge_disposition_id',
 'discharge_disposition_desc',
 'admission_source_id',
 'admission_source_desc',
 'time_in_hospital',
 'payer_code',
 'medical_specialty',
 'num_lab_procedures',
 'num_procedures',
 'num_medications',
 'number_outpatient',
 'number_emergency',
 'number_inpatient',
 'diag_1',
 'diag_2',
 'diag_3',
 'number_diagnoses',
 'max_glu_serum',
 'A1Cresult',
 'metformin',
 'repaglinide',
 'nateglinide',
 'chlorpropamide',
 'glimepiride',
 'acetohexamide',
 'glipizide',
 'glyburide',
 'tolbutamide',
 'pioglitazone',
 'rosiglitazone',
 'acarbose',
 'miglitol',
 'troglitazone',
 'tolazamide',
 'examide',
 'citoglipton',
 'insulin',
 'glyburide-metformin',
 'glipizide-metformin',
 'glimepiride-pioglitazone',
 'metformin-rosiglitazone',
 'metformin-pioglitazone',
 'change',
 'diabetesMed',
 'readmitted']

In [82]:
len(df_cols)

53

In [83]:
pdf = pdf[df_cols]

In [84]:
pdf.head()

Unnamed: 0,encounter_id,patient_nbr,race,gender,age,weight,admission_type_id,admission_type_desc,discharge_disposition_id,discharge_disposition_desc,...,citoglipton,insulin,glyburide-metformin,glipizide-metformin,glimepiride-pioglitazone,metformin-rosiglitazone,metformin-pioglitazone,change,diabetesMed,readmitted
0,2278392,8222157,Caucasian,Female,[0-10),?,6,,25,Not Mapped,...,No,No,No,No,No,No,No,No,No,NO
1,149190,55629189,Caucasian,Female,[10-20),?,1,Emergency,1,Discharged to home,...,No,Up,No,No,No,No,No,Ch,Yes,>30
2,64410,86047875,AfricanAmerican,Female,[20-30),?,1,Emergency,1,Discharged to home,...,No,No,No,No,No,No,No,No,Yes,NO
3,500364,82442376,Caucasian,Male,[30-40),?,1,Emergency,1,Discharged to home,...,No,Up,No,No,No,No,No,Ch,Yes,NO
4,16680,42519267,Caucasian,Male,[40-50),?,1,Emergency,1,Discharged to home,...,No,Steady,No,No,No,No,No,Ch,Yes,NO


### Serialize loaded and mapped dataset to .csv file for next steps

In [88]:
#Using pandas.to_csv method
today = dt.date.today().strftime('%Y-%m-%d')
pdf.to_csv(f'{AGEL_DATA_DIR}/data_{today}.csv', index=False)