### Imports

In [2]:
import pandas as pd

### Read data from 'xlsx' file

In [4]:
# Read 'xlsx' file as dataframe
df = pd.read_excel('../data/processed/data_cuba.xlsx', index_col=None, sheet_name='base_limpia')
# Create a list with dataframe column names
cols = df.columns

### Create pivot table from dataframe

In [5]:
# Create a pivot table from df
consent = pd.pivot_table(df, values=cols[0], index=cols[27],
                         columns=cols[74], aggfunc='count')

### Clean data

In [6]:
# Convert index to columns and reset index
consent = consent.rename_axis('Hospital').reset_index().rename_axis('', axis='columns')

# Regular expressions to clean data typos
consent['Hospital'].replace(r'\n|^\s', '', regex=True, inplace=True)
consent['Hospital'].replace(r'^\s', '', regex=True, inplace=True)

consent.shape

(49, 4)

### Merge two dataframes based on a column

In [7]:
directorio = pd.read_excel(r'../data/processed/directorio_hospitales.xlsx', index_col=None, sheet_name='base_limpia', converters={'Long':str, 'Lat': str})

# Merge dataframes by a column
df_join = pd.merge(consent, directorio, on='Hospital', how='left')

df_join.shape

(49, 6)

In [8]:
# Remove rows with NaN values on column
df_join.dropna(subset=['Long'], inplace=True)
# Reset index
df_join = df_join.reset_index(drop=True)

# Save dataframe as 'xlsx' file
# df_join.to_excel('../data/processed/consentimiento_hospitales.xlsx', index = False)
# df_join.to_json('../data/processed/json/consentimiento_hospitales.json', orient='index', force_ascii=False)

df_join

Unnamed: 0,Hospital,"No, para ninguno","Sí, para algunos","Sí, para todos",Long,Lat
0,Hospital Guillermo Domínguez (Puerto Padre),1.0,,1.0,21.1873904251164,-76.58723572553677
1,Hospital General Docente 26 de Diciembre de Re...,1.0,,,22.492379052036,-79.55278177
2,Hospital Clínico Quirúrgico Dr. Luis Aldama Pa...,,,2.0,21.540445925169404,-77.40405268174251
3,Hospital General Docente Dr. Agostinho Neto,,1.0,1.0,20.1577360164937,-75.21762997
4,Hospital General Docente Vladimir Ilich Lenin,2.0,,,20.8882183791154,-76.26923029
5,Hospital General Municipal 27 de Noviembre,,,6.0,22.509655349554393,-83.50877625329261
6,Hospital Ginecobstétrico Materno Sur Mariana G...,7.0,,13.0,21.617746757370714,-75.92184000604436
7,Hospital Ginecobstétrico Ramón González Coro,40.0,6.0,30.0,23.1309224772831,-82.39780127
8,Hospital Materno Infantil provincial Isabel Ma...,2.0,,1.0,21.936654254801294,-79.4394423371609
9,Hospital Militar Luis Díaz Soto (Naval),2.0,1.0,5.0,23.153176268189828,-82.33258355
