# Data Preparation
- This notebook has not considered in dropping duplicates / handling missing value imputation
- Unprocessed columns are:
    - RCRI score
    - Preoptransfusionwithin30days
    - Intraop
    - Postopwithin30days
    - TransfusionIntraandpostopCategory
    - Transfusionintraandpostop
    - AnaestypeCategory

In [1]:
import numpy as np
import pandas as pd

from tqdm.notebook import tqdm
tqdm.pandas()

In [None]:
cares_data_path = "https://github.com/ISSS623-AHA/ISSS623_2024/raw/main/Group_Project/Group_Project-SGH/CARES_data.xlsx"
cares = pd.read_excel(cares_data_path, "Sheet1")
cares.head()

In [4]:
cares.tail()

Unnamed: 0,Indexno,AGE,GENDER,RCRI score,Anemia category,PreopEGFRMDRD,GradeofKidneydisease,DaysbetweenDeathandoperation,@30daymortality,Preoptransfusionwithin30days,...,CVARCRICategory,IHDRCRICategory,CHFRCRICategory,DMinsulinRCRICategory,CreatinineRCRICategory,GradeofKidneyCategory,Anemiacategorybinned,RDW15.7,ASAcategorybinned,ICUAdmgt24h
90783,121198,63.0,FEMALE,0.0,,,BLANK,,NO,0.0,...,no,no,no,no,no,#NULL!,#NULL!,#NULL!,II,no
90784,121200,45.0,FEMALE,0.0,mild,125.902498,g1,,NO,0.0,...,no,no,no,no,no,G1,Mild,<= 15.7,II,no
90785,,,,,,,,,,,...,,,,,,,,,,
90786,,,,,,,,,,,...,,,,,,,,,,
90787,? Duplicates,,,,,,,,,,...,,,,,,,,,,


In [5]:
cares.shape

(90788, 32)

In [6]:
cares.columns

Index(['Indexno', 'AGE', 'GENDER', 'RCRI score', 'Anemia category',
       'PreopEGFRMDRD', 'GradeofKidneydisease', 'DaysbetweenDeathandoperation',
       '@30daymortality', 'Preoptransfusionwithin30days', 'Intraop',
       'Postopwithin30days', 'Transfusionintraandpostop', 'AnaestypeCategory',
       'PriorityCategory', 'TransfusionIntraandpostopCategory', 'AGEcategory',
       'AGEcategoryOriginal', 'Mortality', 'thirtydaymortality',
       'SurgRiskCategory', 'RaceCategory', 'CVARCRICategory',
       'IHDRCRICategory', 'CHFRCRICategory', 'DMinsulinRCRICategory',
       'CreatinineRCRICategory', 'GradeofKidneyCategory',
       'Anemiacategorybinned', 'RDW15.7', 'ASAcategorybinned', 'ICUAdmgt24h'],
      dtype='object')

## EDA

In [7]:
# remove the last 3 rows because there's a weird "? Duplicates" cell on the excel file
cares.drop(cares.index[-3:], axis=0, inplace=True)
cares.tail()

Unnamed: 0,Indexno,AGE,GENDER,RCRI score,Anemia category,PreopEGFRMDRD,GradeofKidneydisease,DaysbetweenDeathandoperation,@30daymortality,Preoptransfusionwithin30days,...,CVARCRICategory,IHDRCRICategory,CHFRCRICategory,DMinsulinRCRICategory,CreatinineRCRICategory,GradeofKidneyCategory,Anemiacategorybinned,RDW15.7,ASAcategorybinned,ICUAdmgt24h
90780,121192,66.0,FEMALE,2.0,mild,48.339582,G3a,,NO,0.0,...,no,no,no,yes,no,G3,Mild,<= 15.7,II,no
90781,121194,50.0,MALE,1.0,moderate,126.592489,g1,,NO,0.0,...,no,no,no,yes,no,G1,Moderate/Severe,<= 15.7,#NULL!,no
90782,121197,58.0,FEMALE,,none,86.306771,G2,,NO,0.0,...,#NULL!,#NULL!,#NULL!,#NULL!,#NULL!,G2,,<= 15.7,#NULL!,no
90783,121198,63.0,FEMALE,0.0,,,BLANK,,NO,0.0,...,no,no,no,no,no,#NULL!,#NULL!,#NULL!,II,no
90784,121200,45.0,FEMALE,0.0,mild,125.902498,g1,,NO,0.0,...,no,no,no,no,no,G1,Mild,<= 15.7,II,no


In [8]:
# drop index number
cares.drop("Indexno", axis=1, inplace=True)

In [9]:
cares.shape

(90785, 31)

In [10]:
cares.describe()

Unnamed: 0,AGE,RCRI score,PreopEGFRMDRD,DaysbetweenDeathandoperation,Preoptransfusionwithin30days,Intraop,Postopwithin30days,Transfusionintraandpostop
count,90785.0,63361.0,79955.0,5595.0,90785.0,90785.0,90785.0,90785.0
mean,52.253225,0.322296,96.426155,476.782127,0.042672,0.056276,0.018946,0.075222
std,17.087307,0.634789,33.954241,421.86674,0.40584,0.230455,0.260728,0.395623
min,18.0,0.0,2.541026,0.0,0.0,0.0,0.0,0.0
25%,39.0,0.0,79.078715,121.0,0.0,0.0,0.0,0.0
50%,54.0,0.0,96.398561,355.0,0.0,0.0,0.0,0.0
75%,65.0,1.0,114.310169,746.0,0.0,0.0,0.0,0.0
max,103.0,6.0,671.298147,1783.0,21.0,1.0,23.0,24.0


In [11]:
cares.isna().describe()

Unnamed: 0,AGE,GENDER,RCRI score,Anemia category,PreopEGFRMDRD,GradeofKidneydisease,DaysbetweenDeathandoperation,@30daymortality,Preoptransfusionwithin30days,Intraop,...,CVARCRICategory,IHDRCRICategory,CHFRCRICategory,DMinsulinRCRICategory,CreatinineRCRICategory,GradeofKidneyCategory,Anemiacategorybinned,RDW15.7,ASAcategorybinned,ICUAdmgt24h
count,90785,90785,90785,90785,90785,90785,90785,90785,90785,90785,...,90785,90785,90785,90785,90785,90785,90785,90785,90785,90785
unique,1,1,2,2,2,1,2,1,1,1,...,1,1,1,1,1,1,2,1,1,1
top,False,False,False,False,False,False,True,False,False,False,...,False,False,False,False,False,False,True,False,False,False
freq,90785,90785,63361,86747,79955,90785,85190,90785,90785,90785,...,90785,90785,90785,90785,90785,90785,62878,90785,90785,90785


Check Missing values

**Note**: the paper got 79914 rows after cleaning missing values but we seem to cannot replicate this number

In [12]:
for column_name in cares.columns:
    print(cares[column_name].value_counts())
    print(cares[column_name].isna().value_counts())
    print()

AGE
61.0     2104
65.0     2103
64.0     2033
63.0     1999
60.0     1989
         ... 
99.0        4
100.0       2
102.0       1
101.0       1
103.0       1
Name: count, Length: 86, dtype: int64
AGE
False    90785
Name: count, dtype: int64

GENDER
FEMALE    48708
MALE      42077
Name: count, dtype: int64
GENDER
False    90785
Name: count, dtype: int64

RCRI score
0.0    47385
1.0    12653
2.0     2441
3.0      679
4.0      168
5.0       33
6.0        2
Name: count, dtype: int64
RCRI score
False    63361
True     27424
Name: count, dtype: int64

Anemia category
none        62878
mild        13006
moderate    10439
severe        424
Name: count, dtype: int64
Anemia category
False    86747
True      4038
Name: count, dtype: int64

PreopEGFRMDRD
104.452527    50
105.027160    49
104.657013    46
111.268426    46
95.529518     45
              ..
54.267341      1
46.530834      1
14.005128      1
44.728352      1
38.424462      1
Name: count, Length: 14893, dtype: int64
PreopEGFRMDRD
False

In [13]:
from ydata_profiling import ProfileReport

In [14]:
# early profiling
profile = ProfileReport(cares)
profile.to_file(output_file='../output/pre_processed_data_profile.html')

Summarize dataset:   0%|          | 0/5 [00:00<?, ?it/s]

Generate report structure:   0%|          | 0/1 [00:00<?, ?it/s]

Render HTML:   0%|          | 0/1 [00:00<?, ?it/s]

Export report to file:   0%|          | 0/1 [00:00<?, ?it/s]

## Data Cleaning

Drop the exact duplicates if exist

In [15]:
# cares.drop_duplicates(inplace=True)

In [16]:
cares.shape

(90785, 31)

### Labels
- @30daymortality
- thirtydaymortality
- Mortality
- DaysbetweenDeathandoperation
- ICUAdmgt24h

check if these 2 labels 30 days mortality are the same

In [17]:
cares["@30daymortality"] = cares["@30daymortality"] == "YES"
cares["thirtydaymortality"] = cares["thirtydaymortality"] == "Yes"

In [18]:
any(cares["@30daymortality"] == cares["thirtydaymortality"])

True

In [19]:
cares.drop("@30daymortality", axis=1, inplace=True)

^ Exactly the same. Can drop one

In [20]:
cares["DaysbetweenDeathandoperation"].describe()

count    5595.000000
mean      476.782127
std       421.866740
min         0.000000
25%       121.000000
50%       355.000000
75%       746.000000
max      1783.000000
Name: DaysbetweenDeathandoperation, dtype: float64

^ from 2012-01-01 to 2016-10-31 is only 1765 days. But some days are more than this number

In [21]:
cares["ICUAdmgt24h"].value_counts()

ICUAdmgt24h
no     89521
yes     1264
Name: count, dtype: int64

In [22]:
cares["ICUAdmgt24h"].isna().value_counts()

ICUAdmgt24h
False    90785
Name: count, dtype: int64

### Features

In [23]:
cares.columns

Index(['AGE', 'GENDER', 'RCRI score', 'Anemia category', 'PreopEGFRMDRD',
       'GradeofKidneydisease', 'DaysbetweenDeathandoperation',
       'Preoptransfusionwithin30days', 'Intraop', 'Postopwithin30days',
       'Transfusionintraandpostop', 'AnaestypeCategory', 'PriorityCategory',
       'TransfusionIntraandpostopCategory', 'AGEcategory',
       'AGEcategoryOriginal', 'Mortality', 'thirtydaymortality',
       'SurgRiskCategory', 'RaceCategory', 'CVARCRICategory',
       'IHDRCRICategory', 'CHFRCRICategory', 'DMinsulinRCRICategory',
       'CreatinineRCRICategory', 'GradeofKidneyCategory',
       'Anemiacategorybinned', 'RDW15.7', 'ASAcategorybinned', 'ICUAdmgt24h'],
      dtype='object')

#### Age
make binning on age, making sure match with the paper. Use the same binning as the paper suggest

In [24]:
cares["AGE"].describe()

count    90785.000000
mean        52.253225
std         17.087307
min         18.000000
25%         39.000000
50%         54.000000
75%         65.000000
max        103.000000
Name: AGE, dtype: float64

In [25]:
cares["AGEcategoryOriginal"].value_counts().sort_index()

AGEcategoryOriginal
18-29    11052
30-49    27078
50-69    37360
>=70     15295
Name: count, dtype: int64

In [26]:
cares["AGEcategory"].value_counts().sort_index()

AGEcategory
18-29    11052
30-49    27078
50-64    28227
65-74    15837
75-84     7256
>=85      1335
Name: count, dtype: int64

In [27]:
cares.drop(["AGE", "AGEcategoryOriginal"], axis=1, inplace=True)

#### Gender
No need to do anything about gender

In [28]:
cares["GENDER"].value_counts().sort_index()

GENDER
FEMALE    48708
MALE      42077
Name: count, dtype: int64

#### Race

In [29]:
cares["RaceCategory"].value_counts().sort_index()

RaceCategory
#NULL!         6
Chinese    64861
Indian      8012
Malay       8979
Others      8927
Name: count, dtype: int64

In [30]:
# change the NULL to NaN
cares["RaceCategory"].replace('#NULL!',np.nan,inplace=True)
cares["RaceCategory"].value_counts().sort_index()

The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  cares["RaceCategory"].replace('#NULL!',np.nan,inplace=True)


RaceCategory
Chinese    64861
Indian      8012
Malay       8979
Others      8927
Name: count, dtype: int64

In [31]:
cares["RaceCategory"].isna().value_counts()

RaceCategory
False    90779
True         6
Name: count, dtype: int64

#### ASA Classification

In [32]:
cares["ASAcategorybinned"].value_counts().sort_index()

ASAcategorybinned
#NULL!     4819
I         22047
II        49435
III       13405
IV-VI      1079
Name: count, dtype: int64

In [33]:
# change the NULL to NaN
cares["ASAcategorybinned"].replace('#NULL!',np.nan,inplace=True)
cares["ASAcategorybinned"].value_counts().sort_index()

The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  cares["ASAcategorybinned"].replace('#NULL!',np.nan,inplace=True)


ASAcategorybinned
I        22047
II       49435
III      13405
IV-VI     1079
Name: count, dtype: int64

In [34]:
cares["ASAcategorybinned"].isna().value_counts()

ASAcategorybinned
False    85966
True      4819
Name: count, dtype: int64

#### Anemia
There are 2 columns for anemia, make sure they're the same first

In [35]:
cares["Anemia category"].value_counts()

Anemia category
none        62878
mild        13006
moderate    10439
severe        424
Name: count, dtype: int64

In [36]:
cares["Anemiacategorybinned"].value_counts()

Anemiacategorybinned
Mild               13006
Moderate/Severe    10863
#NULL!              4038
Name: count, dtype: int64

In [37]:
print(all((cares["Anemia category"] == "mild").index == (cares["Anemiacategorybinned"] == "Mild").index))
print(all((cares["Anemia category"].isin(["moderate", "severe"])).index == (cares["Anemiacategorybinned"] == "Moderate/Severe").index))

True
True


After checking:
- make `moderate` and `severe` in one category
    - we agree on combining `moderate` and `severe` in 1 class because they equally need treatment in comparison to `none` and `mild`
- remove `Anemiacategorybinned` column

Because Anemia is ordinal data, can later convert them using labelencoding =
```JSON
{
    "none": 0,
    "mild": 1,
    "moderate/severe": 2
}
```

In [38]:
cares["Anemia category"].replace('moderate', 'moderate/severe', inplace=True)
cares["Anemia category"].replace('severe', 'moderate/severe', inplace=True)

The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  cares["Anemia category"].replace('moderate', 'moderate/severe', inplace=True)
The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  cares["Anemia category"].replace('severe', 'moderate/severe', inplace=True)


In [39]:
cares["Anemia category"].value_counts()

Anemia category
none               62878
mild               13006
moderate/severe    10863
Name: count, dtype: int64

In [40]:
cares.drop("Anemiacategorybinned", axis=1, inplace=True)

#### RDW

In [41]:
cares["RDW15.7"].value_counts()

RDW15.7
<= 15.7    76069
>15.7       8478
#NULL!      6238
Name: count, dtype: int64

In [42]:
cares["RDW15.7"].replace('#NULL!',np.nan,inplace=True)
cares["RDW15.7"].value_counts()

The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  cares["RDW15.7"].replace('#NULL!',np.nan,inplace=True)


RDW15.7
<= 15.7    76069
>15.7       8478
Name: count, dtype: int64

#### Grade of CKD

Source: https://www.kidney.org/professionals/explore-your-knowledge/how-to-classify-ckd

In [43]:
cares["GradeofKidneyCategory"].value_counts()

GradeofKidneyCategory
G1        47948
G2        23635
#NULL!    10830
G3         5114
G4-G5      3258
Name: count, dtype: int64

In [44]:
cares["GradeofKidneyCategory"].replace('#NULL!', np.nan, inplace=True)
cares["GradeofKidneyCategory"].value_counts()

The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  cares["GradeofKidneyCategory"].replace('#NULL!', np.nan, inplace=True)


GradeofKidneyCategory
G1       47948
G2       23635
G3        5114
G4-G5     3258
Name: count, dtype: int64

In [45]:
cares[cares["GradeofKidneyCategory"].isna()]["AGEcategory"].value_counts().sort_index()

AGEcategory
18-29    3385
30-49    5073
50-64    1493
65-74     638
75-84     210
>=85       31
Name: count, dtype: int64

In [46]:
cares["GradeofKidneydisease"].value_counts()

GradeofKidneydisease
g1       47948
G2       23635
BLANK    10830
G3a       3425
G5        2059
G3b       1689
G4        1199
Name: count, dtype: int64

In [47]:
cares.drop("GradeofKidneydisease", axis=1, inplace=True)

#### CVA

RCRI stands for "Revised Cardiac Risk Index". According to the paper, NULL here means the same as no.


"NULL" here is assumed as "not tested". It is assumed as not tested because they are not susceptible of the risk, that's why we combine the "NULL" and the "no" class together

In [48]:
cares["CVARCRICategory"].value_counts()

CVARCRICategory
no        60917
#NULL!    28325
yes        1543
Name: count, dtype: int64

In [49]:
cares["CVARCRICategory"].replace('#NULL!', 'no', inplace=True)
cares["CVARCRICategory"].value_counts()

The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  cares["CVARCRICategory"].replace('#NULL!', 'no', inplace=True)


CVARCRICategory
no     89242
yes     1543
Name: count, dtype: int64

#### IHD

In [50]:
cares["IHDRCRICategory"].value_counts()

IHDRCRICategory
no        57968
#NULL!    28572
yes        4245
Name: count, dtype: int64

In [51]:
cares["IHDRCRICategory"].replace('#NULL!', 'no', inplace=True)
cares["IHDRCRICategory"].value_counts()

The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  cares["IHDRCRICategory"].replace('#NULL!', 'no', inplace=True)


IHDRCRICategory
no     86540
yes     4245
Name: count, dtype: int64

#### CHF

In [52]:
cares["CHFRCRICategory"].value_counts()

CHFRCRICategory
no        63739
#NULL!    26259
yes         787
Name: count, dtype: int64

In [53]:
cares["CHFRCRICategory"].replace('#NULL!', 'no', inplace=True)
cares["CHFRCRICategory"].value_counts()

The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  cares["CHFRCRICategory"].replace('#NULL!', 'no', inplace=True)


CHFRCRICategory
no     89998
yes      787
Name: count, dtype: int64

#### DM on Insulin

In [54]:
cares["DMinsulinRCRICategory"].value_counts()

DMinsulinRCRICategory
no        61907
#NULL!    26875
yes        2003
Name: count, dtype: int64

In [55]:
cares["DMinsulinRCRICategory"].replace('#NULL!', 'no', inplace=True)
cares["DMinsulinRCRICategory"].value_counts()

The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  cares["DMinsulinRCRICategory"].replace('#NULL!', 'no', inplace=True)


DMinsulinRCRICategory
no     88782
yes     2003
Name: count, dtype: int64

#### Creatinine

In [56]:
cares["CreatinineRCRICategory"].value_counts()

CreatinineRCRICategory
no        72760
#NULL!    15743
yes        2282
Name: count, dtype: int64

In [57]:
cares["CreatinineRCRICategory"].replace('#NULL!', 'no', inplace=True)
cares["CreatinineRCRICategory"].value_counts()

The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  cares["CreatinineRCRICategory"].replace('#NULL!', 'no', inplace=True)


CreatinineRCRICategory
no     88503
yes     2282
Name: count, dtype: int64

#### Surgical Risk
No need to do anything

In [58]:
cares["SurgRiskCategory"].value_counts()

SurgRiskCategory
Low         48049
Moderate    39014
High         3722
Name: count, dtype: int64

In [59]:
cares["SurgRiskCategory"].isna().value_counts()

SurgRiskCategory
False    90785
Name: count, dtype: int64

#### Priority of Surgery
No need to do anything

In [60]:
cares["PriorityCategory"].value_counts()

PriorityCategory
Elective     72331
Emergency    18454
Name: count, dtype: int64

In [61]:
cares["PriorityCategory"].isna().value_counts()

PriorityCategory
False    90785
Name: count, dtype: int64

In [62]:
cares.head()

Unnamed: 0,GENDER,RCRI score,Anemia category,PreopEGFRMDRD,DaysbetweenDeathandoperation,Preoptransfusionwithin30days,Intraop,Postopwithin30days,Transfusionintraandpostop,AnaestypeCategory,...,RaceCategory,CVARCRICategory,IHDRCRICategory,CHFRCRICategory,DMinsulinRCRICategory,CreatinineRCRICategory,GradeofKidneyCategory,RDW15.7,ASAcategorybinned,ICUAdmgt24h
0,FEMALE,,,,,0.0,0.0,0.0,0.0,GA,...,Chinese,no,no,no,no,no,,,I,no
1,FEMALE,,none,,,0.0,0.0,0.0,0.0,GA,...,Chinese,no,no,no,no,no,,<= 15.7,I,no
2,FEMALE,,mild,152.53857,,0.0,0.0,0.0,0.0,GA,...,Chinese,no,no,no,no,no,G1,<= 15.7,I,no
3,MALE,,moderate/severe,117.231496,,0.0,1.0,0.0,1.0,GA,...,Chinese,no,no,no,no,no,G1,<= 15.7,I,no
4,MALE,0.0,mild,98.651255,59.0,0.0,0.0,0.0,0.0,GA,...,Chinese,no,no,no,no,no,G1,>15.7,II,no


In [64]:
# post-processing profiling
profile = ProfileReport(cares)
profile.to_file(output_file='../output/post_processed_data_profile.html')

Summarize dataset:   0%|          | 0/5 [00:00<?, ?it/s]

Generate report structure:   0%|          | 0/1 [00:00<?, ?it/s]

Render HTML:   0%|          | 0/1 [00:00<?, ?it/s]

Export report to file:   0%|          | 0/1 [00:00<?, ?it/s]

## Save as CSV

In [65]:
cares.to_csv("../data/CARES_dataset_clean.csv", index=False)