# Only for the year 2019

In [43]:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import io
import statsmodels.api as sm
from scipy import stats
import geopandas as gpd
from google.colab import drive

In [44]:
drive.mount('/content/drive', force_remount=True)

Mounted at /content/drive


In [45]:
#Import the data on the infrastructure of the hospital (2008-2021)
url = "https://spitalstatistik.bagapps.ch/data/download/kzp21_KZ_TimeSerie.xlsx?v=1678279041"
sheet = 'KZ2008-KZ2021'
hospital_data = pd.read_excel(url, sheet_name=sheet)

#Only the 2021 data
hospital_data = hospital_data[hospital_data['JAHR'] == 2019]

In [46]:
#Import the data on the 2019 cases, cleaned by Lenja and in the shared drive under data
cases = pd.read_csv('/content/drive/MyDrive/Module1_CDR/DATA/2019_clean.csv', delimiter=';')


In [47]:
columns_to_drop = ['taux observE 2019', 'taux attendu 2019', 'SMR 2019']
cases = cases.drop(columns=columns_to_drop)

## Infrastructure of hospitals

In [48]:
hospital_data.tail()

Unnamed: 0,JAHR,KT,Status,Inst,Adr,Ort,Typ,Notfalldienst,Infrastruktur1,Infrastruktur2,...,ErlOKPAmbB,ErlStatB,ErlKVGStatB,ErlKVGStatVB,ErlZvOKPStatB,ErlZvOKPStatVB,ErlLangB,PTageLang,AustLang,KostLangT
3517,2019,ZH,,Geburtshaus Delphys,Badenerstrasse 177,8003 Zürich,K232,,,,...,281464.0,,1857869.0,44.999997,0.0,,,,,
3518,2019,ZH,,Universitäts-Kinderspital Zürich das Spital de...,Steinwiesstrasse 75,8032 Zürich,K233,,,,...,,,,,,,,,,
3519,2019,ZH,,Klinik Lengg AG,Bleulerstrasse 60,8008 Zürich,K235,,,,...,,,,,,,,,,
3520,2019,ZH,,Klinik Susenberg,Schreberweg 9,8044 Zürich,K235,,,,...,,,,,,,,,,
3521,2019,ZH,,Sune-Egge,Konradstrasse 62,8005 Zürich,K235,,,,...,,,,,,,,,,


In [49]:
# everyline is a unique hospital
duplicates = hospital_data.duplicated(['Inst'])
count_duplicates = duplicates.sum()
count_duplicates

0

## Cases

## Clean the "nombre de cas 2019"

In [50]:
cases.head()

Unnamed: 0,institution,indicator,nombre de cas 2019
0,Les HÙpitaux Universitaires de GenËve HUG - HU...,A Maladies cardiaques,
1,Les HÙpitaux Universitaires de GenËve HUG - HU...,A.1 Infarctus du myocarde,
2,Les HÙpitaux Universitaires de GenËve HUG - HU...,"A.1.1.M DP infarctus du myocarde (‚ge >19), mo...",572.0
3,Les HÙpitaux Universitaires de GenËve HUG - HU...,"A.1.2.M DP infarctus du myocarde, ‚ge 20-44, m...",37.0
4,Les HÙpitaux Universitaires de GenËve HUG - HU...,"A.1.3.M DP infarctus du myocarde, ‚ge 45-64, m...",220.0


Let's clean the "number_of_cases_2021" variable

In [51]:
## Information about the format of the data in the variable:
# From https://spitalstatistik.bagapps.ch/data/download/qip21_publikation.pdf?v=1680713881, page 4

# Verwendete Zeichen, wenn keine Zahlenangabe erfolgt
# ... Zahl unbekannt, weil (noch) nicht erhoben oder (noch) nicht berechnet
# * entfällt, weil trivial oder Begriffe nicht anwendbar
# – wird bei gerundeten Zahlen verwendet und steht hier für den Wert absolut null

# Signes utilisés en l’absence de chiffres
# ... chiffre inconnu (pas [encore] relevé ou pas [encore] calculé)
# * non indiqué car évident ou non pertinent
# – utilisé pour les chiffres arrondis ; signifie zéro

In [52]:
valeur_nb_cases=(cases['nombre de cas 2019'].unique())
#valeur_nb_cases.tolist()

In [53]:
# Chang thousands separator
#Creation of a new variable called "number_of_cases_2" which is the cleaned variable
cases['number_of_cases_2'] = cases['nombre de cas 2019'].apply(lambda x: int(x.replace("'", "")) if isinstance(x, str) and x.replace("'", "").isdigit() else x)

In [54]:
# Identify cells which start with a name (ex: Fallzahl 2021) and delete
# And creation of a new dataset

## Create a Boolean mask to filter the rows to keep
masque = ~(cases['number_of_cases_2'].str.startswith('nomb') | cases['number_of_cases_2'].str.startswith('nume') | cases['number_of_cases_2'].str.startswith('Fall'))

## Apply the mask to delete the corresponding lines and create a new dataset
cases_2 = cases[masque]

In [55]:
# Identify if some no digit value remain:
import re
non_digit_start = cases_2[cases_2['number_of_cases_2'].apply(lambda x: not re.match(r'^\d', str(x)))]
value_nb_cases_bizarre=(non_digit_start['number_of_cases_2'].unique())
value_nb_cases_bizarre
# there are still ' ', '-'

array([' ', '-'], dtype=object)

In [56]:
# For value "-", " " and empty, I delete the rows
masque = ~(cases_2['number_of_cases_2'].str.startswith(' ') | cases_2['number_of_cases_2'].str.startswith('-'))
cases_3 = cases_2[masque]

#cases_3.dropna(subset=['number_of_cases_2'], inplace=True)

In [57]:
# number_of_cases_2 should a float
float_value = cases_3['number_of_cases_2'].astype(float)

cases_3 = cases_3.assign(number_of_cases_2=float_value)

In [58]:
print('the shape of the initial dataset:', cases.shape)
print('the shape of the dataset after the cleaning:', cases_3.shape)

the shape of the initial dataset: (110879, 4)
the shape of the dataset after the cleaning: (91680, 4)


In [59]:
cases_3.drop(columns=['nombre de cas 2019'])

Unnamed: 0,institution,indicator,number_of_cases_2
2,Les HÙpitaux Universitaires de GenËve HUG - HU...,"A.1.1.M DP infarctus du myocarde (‚ge >19), mo...",572.0
3,Les HÙpitaux Universitaires de GenËve HUG - HU...,"A.1.2.M DP infarctus du myocarde, ‚ge 20-44, m...",37.0
4,Les HÙpitaux Universitaires de GenËve HUG - HU...,"A.1.3.M DP infarctus du myocarde, ‚ge 45-64, m...",220.0
5,Les HÙpitaux Universitaires de GenËve HUG - HU...,"A.1.4.M DP infarctus du myocarde, ‚ge 65-84, m...",253.0
6,Les HÙpitaux Universitaires de GenËve HUG - HU...,"A.1.5.M DP infarctus du myocarde, ‚ge >84, mor...",62.0
...,...,...,...
110874,Stiftung Ostschweizer Kinderspital - Romerhuus...,F.2.13.V Anteil Verlegungen bei Resektion/Ersa...,0.0
110875,Stiftung Ostschweizer Kinderspital - Romerhuus...,F.3.1.V Anteil Verlegungen bei OP an Becken≠/B...,0.0
110876,Stiftung Ostschweizer Kinderspital - Romerhuus...,I.1.8.V Anteil Verlegungen bei H¸ft≠Endoprothe...,0.0
110877,Stiftung Ostschweizer Kinderspital - Romerhuus...,J.1.1.V Anteil Verlegungen bei Beatmungsf‰llen...,0.0


In [60]:
duplicates = cases_3.duplicated(subset=['institution', 'indicator'])
count_duplicates = duplicates.sum()
count_duplicates

0

## Clean the indicators of the cases

In [61]:
valeur_nb_cases=(cases_3['indicator'].unique())
valeur_nb_cases.tolist()

['A.1.1.M DP infarctus du myocarde (‚ge >19), mortalitE',
 'A.1.2.M DP infarctus du myocarde, ‚ge 20-44, mortalitE',
 'A.1.3.M DP infarctus du myocarde, ‚ge 45-64, mortalitE',
 'A.1.4.M DP infarctus du myocarde, ‚ge 65-84, mortalitE',
 'A.1.5.M DP infarctus du myocarde, ‚ge >84, mortalitE',
 'A.1.7.M DP infarctus du myocarde (‚ge >19), admissions directes, non transfErE, mortalitE',
 "A.1.8.M DP infarctus du myocarde (‚ge >19), transfErE d'un autre hÙpital, mortalitE",
 'A.1.14.P DP infarctus du myocarde avec cathEtErisme des vaisseaux coronaires (‚ge >19), pourcentage',
 'A.1.15.P DP infarctus du myocarde avec cathEtErisme des vaisseaux coronaires (‚ge >19), admissions directes, pourcentage',
 "A.1.16.P DP infarctus du myocarde avec cathEtErisme des vaisseaux coronaires (‚ge >19), transfErE d'un autre hÙpital, pourcentage",
 'A.1.17.P DP infarctus du myocarde avec cathEtErisme cardiaque ou OP coronarienne (‚ge >19), admissions directes, pourcentage',
 'A.1.9.P Infarctus aigu du myocar

In [62]:
# Get only the codes: so keep everything before the first space
string = cases_3['indicator'].str.split(' ').str[0]
cases_3 = cases_3.assign(indicator_3=string)


In [63]:
cases_3

Unnamed: 0,institution,indicator,nombre de cas 2019,number_of_cases_2,indicator_3
2,Les HÙpitaux Universitaires de GenËve HUG - HU...,"A.1.1.M DP infarctus du myocarde (‚ge >19), mo...",572,572.0,A.1.1.M
3,Les HÙpitaux Universitaires de GenËve HUG - HU...,"A.1.2.M DP infarctus du myocarde, ‚ge 20-44, m...",37,37.0,A.1.2.M
4,Les HÙpitaux Universitaires de GenËve HUG - HU...,"A.1.3.M DP infarctus du myocarde, ‚ge 45-64, m...",220,220.0,A.1.3.M
5,Les HÙpitaux Universitaires de GenËve HUG - HU...,"A.1.4.M DP infarctus du myocarde, ‚ge 65-84, m...",253,253.0,A.1.4.M
6,Les HÙpitaux Universitaires de GenËve HUG - HU...,"A.1.5.M DP infarctus du myocarde, ‚ge >84, mor...",62,62.0,A.1.5.M
...,...,...,...,...,...
110874,Stiftung Ostschweizer Kinderspital - Romerhuus...,F.2.13.V Anteil Verlegungen bei Resektion/Ersa...,0,0.0,F.2.13.V
110875,Stiftung Ostschweizer Kinderspital - Romerhuus...,F.3.1.V Anteil Verlegungen bei OP an Becken≠/B...,0,0.0,F.3.1.V
110876,Stiftung Ostschweizer Kinderspital - Romerhuus...,I.1.8.V Anteil Verlegungen bei H¸ft≠Endoprothe...,0,0.0,I.1.8.V
110877,Stiftung Ostschweizer Kinderspital - Romerhuus...,J.1.1.V Anteil Verlegungen bei Beatmungsf‰llen...,0,0.0,J.1.1.V


In [64]:
cases_3.drop(columns=["indicator", 'nombre de cas 2019'])

Unnamed: 0,institution,number_of_cases_2,indicator_3
2,Les HÙpitaux Universitaires de GenËve HUG - HU...,572.0,A.1.1.M
3,Les HÙpitaux Universitaires de GenËve HUG - HU...,37.0,A.1.2.M
4,Les HÙpitaux Universitaires de GenËve HUG - HU...,220.0,A.1.3.M
5,Les HÙpitaux Universitaires de GenËve HUG - HU...,253.0,A.1.4.M
6,Les HÙpitaux Universitaires de GenËve HUG - HU...,62.0,A.1.5.M
...,...,...,...
110874,Stiftung Ostschweizer Kinderspital - Romerhuus...,0.0,F.2.13.V
110875,Stiftung Ostschweizer Kinderspital - Romerhuus...,0.0,F.3.1.V
110876,Stiftung Ostschweizer Kinderspital - Romerhuus...,0.0,I.1.8.V
110877,Stiftung Ostschweizer Kinderspital - Romerhuus...,0.0,J.1.1.V


In [65]:
cases_3.columns

Index(['institution', 'indicator', 'nombre de cas 2019', 'number_of_cases_2',
       'indicator_3'],
      dtype='object')

In [66]:
cases_3.columns

Index(['institution', 'indicator', 'nombre de cas 2019', 'number_of_cases_2',
       'indicator_3'],
      dtype='object')

## Pivot the dataset

In [67]:
df_case = cases_3.pivot(index='institution', columns='indicator_3', values='number_of_cases_2')

In [68]:
cases_4=df_case.reset_index()
cases_4

indicator_3,institution,A.1.1.M,A.1.1.V,A.1.10.M,A.1.10.P,A.1.11.M,A.1.12.X,A.1.13.M,A.1.14.P,A.1.15.P,...,L.6.2.F,L.6.3.F,L.6.4.F,L.6.5.F,L.6.8.F,L.7.1.F,L.7.2.F,M.1.1.F,M.1.2.F,Z.1.1.X
0,"Adus Medica AG - Adus Medica AG, Breitestrasse...",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,73.0
1,"Andreas Klinik - Andreas Klinik, Rigistrasse 1...",22.0,17.0,6.0,6.0,15.0,22.0,7.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,13.0
2,Asana Spital Leuggern AG - Asana Spital Leugge...,28.0,18.0,10.0,10.0,16.0,28.0,5.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,46.0
3,Asana Spital Menziken AG - Asana Spital Menzik...,23.0,8.0,11.0,11.0,11.0,23.0,3.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,Berit Klinik AG - Berit Klinik AG Rehabiliatio...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
235,Universit‰tsspital Z¸rich - Universit‰tsSpital...,830.0,166.0,356.0,356.0,466.0,819.0,313.0,731.0,465.0,...,7.0,1.0,162.0,102.0,0.0,22.0,0.0,302.0,251.0,174.0
236,Universit‰ts≠Kinderspital beider Basel (UKBB) ...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,3.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,304.0
237,"Uroviva Klinik AG - Uroviva Klinik AG, Z¸richs...",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
238,Venenklinik Bellevue AG - Venenklinik Bellevue...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


## Include the correspondance name in the cases dataframe

In [69]:
names_hosp = pd.read_csv('/content/drive/MyDrive/Module1_CDR/2018-2019/correspondances_19.csv')

In [70]:
names_hosp

Unnamed: 0.1,Unnamed: 0,Nom_df1,Nom_df2
0,0,Les HÙpitaux Universitaires de GenËve HUG - HU...,Les Hôpitaux Universitaires de Genève HUG
1,1,Les HÙpitaux Universitaires de GenËve HUG - HU...,Les Hôpitaux Universitaires de Genève HUG
2,2,Les HÙpitaux Universitaires de GenËve HUG - HU...,Les Hôpitaux Universitaires de Genève HUG
3,3,Institution,Institution de Lavigny
4,4,"Insel Gruppe AG (universit‰r) - Inselspital, F...",Insel Gruppe AG (universitär)
...,...,...,...
143,143,Ita Wegman Geburtshaus - Ita Wegman Geburtshau...,Ita Wegman Geburtshaus
144,144,Universit‰ts-Kinderspital Z¸rich das Spital de...,Universitäts-Kinderspital Zürich das Spital de...
145,145,Universit‰ts-Kinderspital Z¸rich das Spital de...,Universitäts-Kinderspital Zürich das Spital de...
146,146,Stiftung Ostschweizer Kinderspital - Ostschwei...,Stiftung Ostschweizer Kinderspital


In [71]:
# Merge to obtain in the dataframe cases the right names of hospitals
cases_F = cases_4.merge(names_hosp, left_on='institution', right_on='Nom_df1', how='left')

In [72]:
cases_F

Unnamed: 0.1,institution,A.1.1.M,A.1.1.V,A.1.10.M,A.1.10.P,A.1.11.M,A.1.12.X,A.1.13.M,A.1.14.P,A.1.15.P,...,L.6.5.F,L.6.8.F,L.7.1.F,L.7.2.F,M.1.1.F,M.1.2.F,Z.1.1.X,Unnamed: 0,Nom_df1,Nom_df2
0,"Adus Medica AG - Adus Medica AG, Breitestrasse...",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,73.0,,,
1,"Andreas Klinik - Andreas Klinik, Rigistrasse 1...",22.0,17.0,6.0,6.0,15.0,22.0,7.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,13.0,,,
2,Asana Spital Leuggern AG - Asana Spital Leugge...,28.0,18.0,10.0,10.0,16.0,28.0,5.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,46.0,89.0,Asana Spital Leuggern AG - Asana Spital Leugge...,Asana Spital Leuggern AG
3,Asana Spital Menziken AG - Asana Spital Menzik...,23.0,8.0,11.0,11.0,11.0,23.0,3.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,91.0,Asana Spital Menziken AG - Asana Spital Menzik...,Asana Spital Menziken AG
4,Berit Klinik AG - Berit Klinik AG Rehabiliatio...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
237,Universit‰tsspital Z¸rich - Universit‰tsSpital...,830.0,166.0,356.0,356.0,466.0,819.0,313.0,731.0,465.0,...,102.0,0.0,22.0,0.0,302.0,251.0,174.0,,,
238,Universit‰ts≠Kinderspital beider Basel (UKBB) ...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,304.0,,,
239,"Uroviva Klinik AG - Uroviva Klinik AG, Z¸richs...",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,121.0,"Uroviva Klinik AG - Uroviva Klinik AG, Z¸richs...",Uroviva Klinik AG
240,Venenklinik Bellevue AG - Venenklinik Bellevue...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,,,


In [73]:
# Here, before we merge the cases dataset with the institutions dataset, we need to be sure that there is only one row per hospital
# we can see that there are many duplicates
duplicates = cases_F.duplicated(['Nom_df2'])
count_duplicates = duplicates.sum()
count_duplicates

143

In [74]:
cases_F_0=cases_F.drop(columns=["institution", 'Nom_df1'])

In [75]:
# We need to group_by the name of institution and sum
result = cases_F_0.groupby('Nom_df2').agg('sum').reset_index()

In [76]:
# We need to group_by the name of institution and sum
result = cases_F_0.groupby('Nom_df2').agg('sum').reset_index()

In [77]:
# No more duplicate, the dataset is now ready to be merged with the institution dataset
duplicates = result.duplicated(['Nom_df2'])
count_duplicates = duplicates.sum()
count_duplicates

0

## Merge with the infrastructure dataset

In [78]:
df = cases_F.merge(hospital_data, left_on='Nom_df2', right_on='Inst', how='left')

In [79]:
df

Unnamed: 0,institution,A.1.1.M,A.1.1.V,A.1.10.M,A.1.10.P,A.1.11.M,A.1.12.X,A.1.13.M,A.1.14.P,A.1.15.P,...,ErlOKPAmbB,ErlStatB,ErlKVGStatB,ErlKVGStatVB,ErlZvOKPStatB,ErlZvOKPStatVB,ErlLangB,PTageLang,AustLang,KostLangT
0,"Adus Medica AG - Adus Medica AG, Breitestrasse...",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,,,,,,,,,,
1,"Andreas Klinik - Andreas Klinik, Rigistrasse 1...",22.0,17.0,6.0,6.0,15.0,22.0,7.0,0.0,0.0,...,,,,,,,,,,
2,Asana Spital Leuggern AG - Asana Spital Leugge...,28.0,18.0,10.0,10.0,16.0,28.0,5.0,0.0,0.0,...,,,,,,,,,,
3,Asana Spital Menziken AG - Asana Spital Menzik...,23.0,8.0,11.0,11.0,11.0,23.0,3.0,0.0,0.0,...,,,,,,,,,,
4,Berit Klinik AG - Berit Klinik AG Rehabiliatio...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
237,Universit‰tsspital Z¸rich - Universit‰tsSpital...,830.0,166.0,356.0,356.0,466.0,819.0,313.0,731.0,465.0,...,,,,,,,,,,
238,Universit‰ts≠Kinderspital beider Basel (UKBB) ...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,,,,,,,,,,
239,"Uroviva Klinik AG - Uroviva Klinik AG, Z¸richs...",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,,,,,,,,,,
240,Venenklinik Bellevue AG - Venenklinik Bellevue...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,,,,,,,,,,


# Explore the C-section data

In [80]:
df[['G.1.4.P', 'G.1.5.P', 'G.1.6.P', 'G.1.7.P']]

# G.1.4.P: césariennes
# G.1.5.P: Césariennes en cas d'accouchement à faible risque
# G.1.6.P: Césariennes en cas d'accouchement à faible risque, âge <35
# G.1.7.P: Césariennes en cas d'accouchement à faible risque, âge >34
#  G.1.7.P +  G.1.6.P = G.1.5.P
# Therefore, G.1.4.P is the total amont of c-section, the most important variable!

Unnamed: 0,G.1.4.P,G.1.5.P,G.1.6.P,G.1.7.P
0,0.0,0.0,0.0,0.0
1,236.0,197.0,126.0,71.0
2,226.0,187.0,125.0,62.0
3,0.0,0.0,0.0,0.0
4,0.0,0.0,0.0,0.0
...,...,...,...,...
237,1212.0,686.0,380.0,306.0
238,0.0,0.0,0.0,0.0
239,0.0,0.0,0.0,0.0
240,0.0,0.0,0.0,0.0


In [81]:
# Which hospitals haven't perform any c-section?
test=df[df['G.1.4.P']==0]
#print(test)

# We can observe that by doing this operation we are deleting automatically the maison de naissances, etc.

In [82]:
# let's delete then
df_2=df[df['G.1.4.P']!=0]

In [83]:
df_2.shape

(99, 602)

In [84]:
df_2.to_csv('/content/drive/MyDrive/Module1_CDR/2018-2019/DF_2019_clean.csv')
