In [1]:
!python --version

Python 3.7.4


# COVID-19 Ph Geocoding for Data Preparation

In [2]:
import numpy as np
import pandas as pd
pd.set_option('display.max_colwidth', -1)

This Jupyter Notebook has started re-run today, at:

In [3]:
pd.to_datetime('today')

Timestamp('2020-03-31 20:19:21.696974')

The [Coronavirus-ph-api](https://coronavirus-ph-api.herokuapp.com/#/) that was used here was created by [Mr. Robert Soriano](https://robsoriano.com/).

Please note that the results of the codes on this notebook depend on when this notebook was re-run.

In [4]:
df = pd.read_json("https://coronavirus-ph-api.herokuapp.com/cases")
df

Unnamed: 0,case_no,date,age,gender,nationality,hospital_admitted_to,had_recent_travel_history_abroad,status,other_information,resident_of
0,1.0,2020-01-30,38,F,Chinese,San Lazaro Hospital,Yes,Recovered,First case of COVID-19 in PH,
1,2.0,2020-01-30,44,M,Chinese,San Lazaro Hospital,Yes,Died,First COVID-19-related death in PH; first outside mainland China,
2,3.0,2020-01-30,60,F,Chinese,ACE Medical Center - Bohol,Yes,Recovered,Left the Philippines to China before was reported positive,
3,4.0,2020-03-05,48,M,Filipino,University of the East Ramon Magsaysay Memorial Medical Center,Yes,Recovered,First case involving a Filipino in the Philippines; employee of Deloitte PH in BGC,
4,5.0,2020-03-05,62,M,Filipino,Research Institute for Tropical Medicine (Muntinlupa),No,Died,Resident of Cainta; frequented Greenhills Mall and a prayer room in San Juan,
...,...,...,...,...,...,...,...,...,...,...
2079,2080.0,TBA,TBA,TBA,TBA,TBA,TBA,TBA,TBA,TBA
2080,2081.0,TBA,TBA,TBA,TBA,TBA,TBA,TBA,TBA,TBA
2081,2082.0,TBA,TBA,TBA,TBA,TBA,TBA,TBA,TBA,TBA
2082,2083.0,TBA,TBA,TBA,TBA,TBA,TBA,TBA,TBA,TBA


Preferrably, ```case_no``` could be set as the index but sometimes ```case_no``` contains ```NaN``` values. To solve this:

In [5]:
df = pd.DataFrame(df.drop(['case_no'], axis =1))

In [6]:
df.index += 1
df.index.names = ['case_no']

In [7]:
df.columns

Index(['date', 'age', 'gender', 'nationality', 'hospital_admitted_to',
       'had_recent_travel_history_abroad', 'status', 'other_information',
       'resident_of'],
      dtype='object')

# Before we begin...

Save this df then use it.

In [8]:
today_stamp = pd.to_datetime('today').strftime('%d-%m-%Y_%Hh%Mm%Ss')
today_stamp

'31-03-2020_20h19m25s'

In [9]:
df.to_csv('COVID19PHforMapping_{0}.csv'.format(today_stamp), index_label='case_no')
df = pd.read_csv('COVID19PHforMapping_{0}.csv'.format(today_stamp), index_col='case_no')
df

Unnamed: 0_level_0,date,age,gender,nationality,hospital_admitted_to,had_recent_travel_history_abroad,status,other_information,resident_of
case_no,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
1,2020-01-30,38,F,Chinese,San Lazaro Hospital,Yes,Recovered,First case of COVID-19 in PH,
2,2020-01-30,44,M,Chinese,San Lazaro Hospital,Yes,Died,First COVID-19-related death in PH; first outside mainland China,
3,2020-01-30,60,F,Chinese,ACE Medical Center - Bohol,Yes,Recovered,Left the Philippines to China before was reported positive,
4,2020-03-05,48,M,Filipino,University of the East Ramon Magsaysay Memorial Medical Center,Yes,Recovered,First case involving a Filipino in the Philippines; employee of Deloitte PH in BGC,
5,2020-03-05,62,M,Filipino,Research Institute for Tropical Medicine (Muntinlupa),No,Died,Resident of Cainta; frequented Greenhills Mall and a prayer room in San Juan,
...,...,...,...,...,...,...,...,...,...
2080,TBA,TBA,TBA,TBA,TBA,TBA,TBA,TBA,TBA
2081,TBA,TBA,TBA,TBA,TBA,TBA,TBA,TBA,TBA
2082,TBA,TBA,TBA,TBA,TBA,TBA,TBA,TBA,TBA
2083,TBA,TBA,TBA,TBA,TBA,TBA,TBA,TBA,TBA


Remove rows with ALL column values as ```TBA```.

In [10]:
df.columns[0:-1]

Index(['date', 'age', 'gender', 'nationality', 'hospital_admitted_to',
       'had_recent_travel_history_abroad', 'status', 'other_information'],
      dtype='object')

In [11]:
df = pd.DataFrame(df.drop(df[(df[df.columns[0:-1]] == 'TBA').all(axis=1)].index))

In [12]:
len(df)

1075

### Some data exploration.

In [13]:
df['status'].value_counts()

Admitted     962
Died         71 
Recovered    42 
Name: status, dtype: int64

In [14]:
df['nationality'].value_counts()

For Validation    541
Filipino          520
Chinese           4  
American          2  
Thai              2  
Indian            2  
Australian        1  
Taiwanese         1  
Japanese          1  
Indonesian        1  
Name: nationality, dtype: int64

In [15]:
df['hospital_admitted_to'].value_counts()

(For Validation at DOH Epidemiology Bureau)          323
St. Luke's Medical Center - Global City              72 
St. Luke's Medical Center - Quezon City              60 
The Medical City - Ortigas                           57 
Cardinal Santos Medical Center                       57 
                                                     .. 
St. Joseph Hospital and Medical Center               1  
Lucena United Doctors Hospital                       1  
Batangas Health Care Hospital - Jesus of Nazareth    1  
ACE Dumaguete Doctors, Inc.                          1  
Dagupan Doctors Villaflor Memorial Hospital          1  
Name: hospital_admitted_to, Length: 140, dtype: int64

For our mapping purposes, we prefer only those rows without ```(For Validation at DOH Epidemiology Bureau)``` as their column value for ```df['hospital_admitted_to']```.

In [16]:
df = df.loc[df['hospital_admitted_to'] != '(For Validation at DOH Epidemiology Bureau)']

In [17]:
len(df)

752

# Part I: Regex and Geocoding

Remove words that start with small letters, then retain those words at the right side of ";"; then finally remove " " at the front/left side of the strings.

reference:

https://stackoverflow.com/questions/56845457/how-to-remove-words-starting-with-lowercase-from-a-sentence-using-regex

https://www.datacamp.com/community/tutorials/python-regular-expression-tutorial


In [18]:
import re

df['hospital'] = df['hospital_admitted_to'].apply(lambda x: re.sub(r"\b[a-z]+[\w'.]\s*", "", str(x)).split(';', 1)[-1]).str.lstrip()
df['hospital']

case_no
1      San Lazaro Hospital                                    
2      San Lazaro Hospital                                    
3      ACE Medical Center - Bohol                             
4      University East Ramon Magsaysay Memorial Medical Center
5      Research Institute Tropical Medicine (Muntinlupa)      
                             ...                              
837    Cagayan Valley Medical Center                          
838    Cagayan Valley Medical Center                          
866    Chinese General Hospital Medical Center                
935    St. Luke's Medical Center - Global City                
988    New Era General Hospital                               
Name: hospital, Length: 752, dtype: object

Remove ```', Inc.'```

Reference:

https://stackoverflow.com/questions/55533962/removing-specific-word-in-a-string-in-pandas

In [19]:
df['hospital'] = df['hospital'].apply(lambda x: ' '.join(w for w in x.split() if not w in set(['Inc.']))).str.rstrip(',')

How many 'hospitals are there?

In [20]:
hosp_names = df['hospital'].unique()
len(hosp_names)

139

### Geocoding

reference:
    
https://towardsdatascience.com/geocode-with-python-161ec1e62b89

In [21]:
df['hospital'].value_counts()

St. Luke's Medical Center - Global City        72
St. Luke's Medical Center - Quezon City        60
Cardinal Santos Medical Center                 57
The Medical City - Ortigas                     57
Makati Medical Center                          48
                                               ..
Gentri Medical Center Hospital                 1 
Amang Rodriguez Memorial Medical Center        1 
Lucena United Doctors Hospital                 1 
Metro North Medical Center Hospital            1 
Dagupan Doctors Villaflor Memorial Hospital    1 
Name: hospital, Length: 139, dtype: int64

In [22]:
import ssl
import certifi
import geopy.geocoders
ctx = ssl.create_default_context(cafile=certifi.where())
geopy.geocoders.options.default_ssl_context = ctx

from geopy.geocoders import Nominatim
from geopy.extra.rate_limiter import RateLimiter

locator = Nominatim(user_agent="myGeocoder")
geocode = RateLimiter(locator.geocode, min_delay_seconds=1)

### In case ```GeocoderTimedOut``` occurs:

reference:

https://gis.stackexchange.com/questions/173569/avoid-time-out-error-nominatim-geopy-open-street-maps

In [23]:
from geopy.exc import GeocoderTimedOut

def do_geocode(address):
    try:
        return locator.geocode(address)
    except GeocoderTimedOut:
        return do_geocode(address)

Apply ```do_geocode``` to unique values. Definitely saves a lot of time!:

Reference:

https://stackoverflow.com/questions/31414481/new-column-with-coordinates-using-geopy-pandas

In [24]:
%%time

hosp_locs = pd.Series(hosp_names).apply(do_geocode)

geodict_lat = dict(zip(hosp_names, hosp_locs.apply(lambda x: x.latitude if(np.all(pd.notnull(x))) else x)))
geodict_long = dict(zip(hosp_names, hosp_locs.apply(lambda x: x.longitude if(np.all(pd.notnull(x))) else x)))

Wall time: 2min 21s


In [25]:
df['hosp_lat'] = df['hospital'].map(geodict_lat)

In [26]:
df['hosp_long'] = df['hospital'].map(geodict_long)

What hospitals were not successfully geocoded?

In [27]:
df['hospital'][np.isnan(df['hosp_lat']) & np.isnan(df['hosp_long'])].value_counts()

University East Ramon Magsaysay Memorial Medical Center     19
Our Lady Lourdes Hospital                                   7 
Dr. Paulino J. Garcia Memorial Research Medical Hospital    4 
DOH Epidemiology Bureau                                     4 
Dr. Jose N. Rodriguez Memorial Hospital Sanitarium          3 
Perpetual Health Medical Center - Biñan                     2 
City General Trias Doctors Medical Center                   2 
Quezon City Health Department (Reporting Facility)          2 
Dr. Pablo O. Torre Memorial Hospital                        2 
Makati Health Department                                    2 
Southern Isabela Medical Center                             1 
Gerona Hospital Sacred Heart                                1 
St. Joseph Hospital Medical Center                          1 
Ramon Magsaysay Memorial Hospital                           1 
Our Lady Mt. Carmel Medical Center                          1 
Hospital Infant Jesus Medical Center                   

# Part II: Manual Data Cleaning

#### ```San Juan City Epidemiology Surveillance Unit```

Try:

In [28]:
do_geocode('San Juan City Health Office')

Location(San Juan City Health Office Ⅰ, F. Sevilla, Pipeline, Pedro Cruz, San Juan, 1st District, San Juan, Metro Manila, 1500, Philippines, (14.603709550000001, 121.0271937340311, 0.0))

Then replace:

In [29]:
df['hospital'].replace('San Juan City Epidemiology Surveillance Unit', 
                       value='San Juan City Health Office', inplace=True)

Rinse and repeat.

#### ```Our Lady Lourdes Hospital```      

In [30]:
do_geocode('Our Lady of Lourdes Hospital')

Location(Our Lady Of Lourdes Hospital, Vinzons Avenue, Barangay IV, Daet, Camarines Norte, Bicol, 4600, Philippines, (14.121015700000001, 122.9470513, 0.0))

In [31]:
df['hospital'].replace('Our Lady Lourdes Hospital', 
                       value='Our Lady of Lourdes Hospital', inplace=True)

#### ```Makati Health Department```

Consider: Health Department Division Chiefs Office - City Government of Makati

In [32]:
do_geocode('Makati City Hall')

Location(Makati, Makati 1st District, Makati, Metro Manila, 1226, Philippines, (14.5568853, 121.023532, 0.0))

In [33]:
df['hospital'].replace('Makati Health Department', 
                       value='Makati City Hall', inplace=True)

#### ```University East Ramon Magsaysay Memorial Medical Center, Inc.``` and other iterations

In [34]:
do_geocode('University of the East Ramon Magsaysay Memorial Medical Center')

Location(University of the East - Ramon Magsaysay Memorial Medical Center, Aurora Boulevard, Pipeline, Doña Imelda, Galas, 4th District, Quezon City, Metro Manila, 1016, Philippines, (14.6066064, 121.02067860955255, 0.0))

In [35]:
df['hospital'].replace(['University The East Ramon -  Magsaysay Memorial Medical Center', 
                        'University The East Ramon Magsaysay Memorial Medical Center',
                        'University East Ramon Magsaysay Memorial Medical Center, Inc', 
                        'University East Ramon Magsaysay Memorial Medical Center, Inc.',
                        'University East–Ramon Magsaysay Memorial Medical Center', 
                        'University East–Ramon Magsaysay Memorial Hospital', 
                        'University of the East Ramon - Magsaysay Memorial Medical Center', 
                        'University of The East Ramon Magsaysay Memorial Medical Center', 
                        'University East Ramon Magsaysay Memorial Medical Center', 
                        'Ramon Magsaysay Memorial Hospital'], 
                        value='University of the East Ramon Magsaysay Memorial Medical Center', inplace=True)

#### ```UP–Philippine General Hospital```

In [36]:
do_geocode('Philippine General Hospital')

Location(Philippine General Hospital, Apacible, Barangay 676, Ermita, Fifth District, Manila, Metro Manila, 1000, Philippines, (14.5780333, 120.9855392067257, 0.0))

In [37]:
df['hospital'].replace('UP–Philippine General Hospital', 
                       value='Philippine General Hospital', inplace=True)

#### ```Taguig CESU```

In [38]:
do_geocode('Taguig City Hall')

Location(Taguig City Hall, Col. P. Cruz, Tuktukan, Taguig, Metro Manila, 1637, Philippines, (14.528892599999999, 121.06994669784399, 0.0))

In [39]:
df['hospital'].replace('Taguig CESU', 
                       value='Taguig City Hall', inplace=True)

#### ```Far Eastern University–Dr. Nicanor Reyes Medical Foundation```

In [40]:
do_geocode('Far Eastern University – Nicanor Reyes Medical Foundation')

Location(Far Eastern University - Nicanor Reyes Medical Foundation, Regalado Avenue, Fairview, 5th District, Quezon City, Metro Manila, 1121, Philippines, (14.69890385, 121.06719218653689, 0.0))

In [41]:
df['hospital'].replace('Far Eastern University–Dr. Nicanor Reyes Medical Foundation', 
                       value='Far Eastern University – Nicanor Reyes Medical Foundation', inplace=True)

#### ```Quezon City Health Department ()``` and other iterations

In [42]:
do_geocode('Health Dept., Quezon City')

Location(Health Dept., Makatarungan, Central, Quezon City, 4th District, Quezon City, Metro Manila, 1100, Philippines, (14.64610395, 121.05234535, 0.0))

In [43]:
df['hospital'].replace(['Quezon City Health Department ()', 
                        'Quezon City Health Department', 
                        'Quezon City Health Department (Reporting Facility)'],
                       value='Health Dept., Quezon City', inplace=True)

#### ```Fe Del Mundo Medical Center ```

In [44]:
do_geocode('Dr. Fe Del Mundo Medical Center')

Location(Fe Del Mundo Medical Center, 11, Banawe, Doña Aurora, Galas, 4th District, Quezon City, Metro Manila, 1113, Philippines, (14.62060365, 121.0092301561046, 0.0))

In [45]:
df['hospital'].replace('Fe Del Mundo Medical Center',
                       value='Dr. Fe Del Mundo Medical Center', inplace=True)

#### ```Dr. Pablo O. Torre Memorial Hospital```

In [46]:
do_geocode('Riverside Hospital Bacolod')

Location(Riverside, B.S. Aquino Drive, Lacson Tourism Strip, Bacolod, Negros Occidental, Western Visayas, 6100, Philippines, (10.68310545, 122.95768201600865, 0.0))

In [47]:
df['hospital'].replace('Dr. Pablo O. Torre Memorial Hospital',
                       value='Riverside Hospital Bacolod', inplace=True)

#### ```Nueva Ecija Doctors Hospital, Inc.```

In [48]:
do_geocode('Nueva Ecija Doctors Hospital')

Location(Nueva Ecija Doctors Hospital, Maharlika Highway, Shell Select, Primavera Homes, San Juan Accfa, Nueva Ecija, Central Luzon, 3100, Philippines, (15.4578272, 120.94883314561189, 0.0))

In [49]:
df['hospital'].replace('Nueva Ecija Doctors Hospital, Inc.',
                       value='Nueva Ecija Doctors Hospital', inplace=True)

#### ```Perpetual Health Medical Center–Biñan```

In [50]:
do_geocode('Perpetual Help Medical Center - Biñan')

Location(Perpetual Help Medical Center - Biñan, National Highway, South Springville, San Antonio, Biñan, Laguna, Calabarzon, 4024, Philippines, (14.33043835, 121.08533919092724, 0.0))

In [51]:
df['hospital'].replace('Perpetual Health Medical Center–Biñan',
                       value='Perpetual Help Medical Center - Biñan', inplace=True)

#### ```City General Trias Doctors Medical Center```

In [52]:
do_geocode('City of General Trias Doctors Medical Center')

Location(City Of General Trias Doctors Medical Center, Governor's Drive, Gentri Heghts, Manggahan, General Trias, Cavite, Calabarzon, 4107, Philippines, (14.2912185, 120.90387619553523, 0.0))

In [53]:
df['hospital'].replace('City General Trias Doctors Medical Center',
                       value='City of General Trias Doctors Medical Center', inplace=True)

#### ```Divine Grace Medical Center```

In [54]:
do_geocode('Divine Grace Hospital')

Location(Alfamart, Antero Soriano Highway, Tejero, General Trias, Cavite, Calabarzon, 4107, Philippines, (14.3973674, 120.8680362, 0.0))

In [55]:
df['hospital'].replace('Divine Grace Medical Center',
                       value='Divine Grace Hospital', inplace=True)

#### ```New Clark City (Grand Princess)```

reference: 
    
https://globalnation.inquirer.net/186134/fwd-444-repatriates-from-mv-grand-princess-arrive

In [56]:
do_geocode("Athlete's Village, New Clark City")

Location(Athlete's Village, Cristo Rey, Tarlac, Central Luzon, 2023, Philippines, (15.34300985, 120.533940575, 0.0))

In [57]:
df['hospital'].replace('New Clark City (Grand Princess)',
                       value="Athlete's Village, New Clark City", inplace=True)

#### ```DOH–Epidemiology Bureau, RESU-NCR (), Regional Epidemiology Surveillance Unit–NCR```

In [58]:
do_geocode('Department of Health')

Location(Department of Health, Santa Cruz, Third District, Manila, Metro Manila, Philippines, (14.615603799999999, 120.98128128126282, 0.0))

In [59]:
df['hospital'].replace(['DOH–Epidemiology Bureau', 'RESU-NCR ()', 'Regional Epidemiology Surveillance Unit–NCR'],
                       value="Department of Health", inplace=True)

#### ```ACE Medical Center Tagbilaran, Bohol```

In [60]:
do_geocode('ACE Medical Center Bohol')

Location(ACE Medical Center, Carlos P. Garcia East Avenue, Camella Homes, Sitio Ubos, Bool, Bohol, Central Visayas, 6390, Philippines, (9.6344977, 123.8692981, 0.0))

In [61]:
df['hospital'].replace('ACE Medical Center Tagbilaran, Bohol',
                       value="ACE Medical Center Bohol", inplace=True)

#### ```Las Piñas Doctors Hospital, Inc.```

In [62]:
do_geocode('Las Piñas Doctors Hospital')

Location(Las Piñas Doctors Hospital, CAA Avenue, Belisario, Pulanglupa Dos, Las Piñas, 1st District, Las Piñas, Metro Manila, 1742, Philippines, (14.455615300000002, 120.99371706729627, 0.0))

In [63]:
df['hospital'].replace('Las Piñas Doctors Hospital, Inc.',
                       value="Las Piñas Doctors Hospital", inplace=True)

#### ```San Pedro Doctors Hospital, Inc.```

In [64]:
do_geocode('San Pedro Doctors Hospital')

Location(San Pedro Doctor's Hospital, National Highway, Purok 6, Landayan, San Pedro, Laguna, Calabarzon, 4023, Philippines, (14.3488642, 121.0649328, 0.0))

In [65]:
df['hospital'].replace('San Pedro Doctors Hospital, Inc.',
                       value="San Pedro Doctors Hospital", inplace=True)

#### ```Batangas Health Care Hospital - Jesus Nazareth```

In [66]:
do_geocode('Jesus of Nazareth Hospital')

Location(Jesus of Nazareth Hospital, Dr. Ramon Road, Gulod Labac, Batangas City, Batangas, Calabarzon, 4200, Philippines, (13.7596952, 121.07689673232872, 0.0))

In [67]:
df['hospital'].replace('Batangas Health Care Hospital - Jesus Nazareth',
                       value="Jesus of Nazareth Hospital", inplace=True)

#### ```Our Lady Mt. Carmel Medical Center ```

In [68]:
do_geocode('Our Lady of Mt. Carmel Medical Center')

Location(Our Lady of Mt. Carmel Medical Center, MacArthur Highway, Rich Town I Subdivision, Marlboro, Sindalan, Pampanga, Central Luzon, 2000, Philippines, (15.09639535, 120.62385982698073, 0.0))

In [69]:
df['hospital'].replace('Our Lady Mt. Carmel Medical Center',
                       value="Our Lady of Mt. Carmel Medical Center", inplace=True)

#### ```Southern Isabela Medical Center```

In [70]:
do_geocode('Southern Isabela General Hospital')

Location(Southern Isabela General Hospital, Recto Street, Metro Ville Subdivision, Rosario, Isabela, Cagayan Valley, 3311, Philippines, (16.680094, 121.54669799009702, 0.0))

In [71]:
df['hospital'].replace('Southern Isabela Medical Center',
                       value="Southern Isabela General Hospital", inplace=True)

#### ```Golden Gate Batangas Hospital, Inc.```

In [72]:
do_geocode('Golden Gate General Hospital')

Location(Golden Gate General Hospital, P. Prieto Street, 5, Poblacion, Batangas City, Batangas, Calabarzon, 4200, Philippines, (13.75746355, 121.06178834701583, 0.0))

In [73]:
df['hospital'].replace(['Golden Gate Batangas Hospital, Inc.', 
                        'Golden Gate Batangas Hospital'],
                       value="Golden Gate General Hospital", inplace=True)

#### ```Hospital Infant Jesus Medical Center```

In [74]:
do_geocode('Hospital of the Infant Jesus Medical Center')

Location(Hospital of the Infant Jesus Medical Center, Laon Laan Street, Sampaloc, 4th District, Manila, Metro Manila, 1015, Philippines, (14.614006150000002, 120.98905680835242, 0.0))

In [75]:
df['hospital'].replace(['Hospital Infant Jesus Medical Center', 'Hospital of the Infant Jesus'],
                       value="Hospital of the Infant Jesus Medical Center", inplace=True)

#### ```RHS 4A Camp Vicente Lim```

In [76]:
do_geocode('Camp Vicente Lim')

Location(Camp Vicente Lim, Mayapa, Calamba, Laguna, Calabarzon, 4029, Philippines, (14.2156332, 121.12148261443787, 0.0))

In [77]:
df['hospital'].replace('RHS 4A Camp Vicente Lim',
                       value="Camp Vicente Lim", inplace=True)

#### ```Cainta Rural Health Unit```

Note: There are four (4) [Cainta Rural Health Units](https://itisdoh.pbsp.org.ph/facilities_list.php?scope=040000000&scope_level=1&typ=1&ac=1&facility1=26&Submit=Search). Since the specific Health Unit cannot be verified upon checking on Google, we will use the coordinates of Cainta, Rizal instead.

In [78]:
do_geocode('Cainta, Rizal')

Location(Cainta, Rizal, Calabarzon, Philippines, (14.592419, 121.12351813800596, 0.0))

In [79]:
df['hospital'].replace('Cainta Rural Health Unit',
                       value="Cainta, Rizal", inplace=True)

#### ```General Trias City Health Office```

In [80]:
do_geocode('Health Center General Trias')

Location(General Trias, Cavite, Calabarzon, 4106, Philippines, (14.38629, 120.8802961, 0.0))

In [81]:
df['hospital'].replace('General Trias City Health Office',
                       value="Health Center General Trias", inplace=True)

#### ```Qualimed–Santa Rosa Hospital```

In [82]:
do_geocode('QualiMed Santa Rosa')

Location(QualiMed Hospital Nuvali, West Nature Avenue, Lakeside Evozone, Santo Domingo, Santa Rosa, Laguna, Calabarzon, 4026, Philippines, (14.232012600000001, 121.05271221787301, 0.0))

In [83]:
df['hospital'].replace(['Qualimed–Santa Rosa Hospital', 'Qualimed - Sta. Rosa Hospital'],
                       value="QualiMed Santa Rosa", inplace=True)

#### ```Sacred Heart Hospital Malolos```

In [84]:
do_geocode('Sacred Heart Hospital of Malolos')

Location(Sacred Heart Hospital of Malolos, Dr. Peralta, Guinhawa Subdivision, Villa Tierra Subdivision, Malolos, Bulacan, Central Luzon, 3000, Philippines, (14.8522512, 120.81768247237855, 0.0))

In [85]:
df['hospital'].replace('Sacred Heart Hospital Malolos',
                       value="Sacred Heart Hospital of Malolos", inplace=True)

#### ```UHBI–Parañaque Doctors Hospital, Inc.```

In [86]:
do_geocode('Parañaque Doctors Hospital')

Location(Parañaque Doctors' Hospital, Doña Soledad Avenue, Aeropark, Don Bosco, Parañaque, Metro Manila, 1711, Philippines, (14.485656899999999, 121.02867842425692, 0.0))

In [87]:
df['hospital'].replace('UHBI–Parañaque Doctors Hospital, Inc.',
                       value="Parañaque Doctors Hospital", inplace=True)

#### ```Uni–Health Southwoods Hospital And Medical Center, Inc.```

In [88]:
do_geocode('UniHealth Southwoods Hospital And Medical Center')

Location(Unihealth Southwoods Hospital and Medical Center, Ecocentrum Avenue, Southwoods Ecocentrum, San Francisco, Biñan, Laguna, Calabarzon, 4024, Philippines, (14.33162575, 121.04759467589082, 0.0))

In [89]:
df['hospital'].replace('Uni–Health Southwoods Hospital And Medical Center, Inc.',
                       value="UniHealth Southwoods Hospital And Medical Center", inplace=True)

#### ```San Juan Dios Education Foundation Inc. Hospital```

In [90]:
do_geocode('San Juan de Dios Educational Foundation Inc. - Hospital')

Location(San Juan de Dios Educational Foundation Inc. - Hospital, 2772, Roxas Boulevard, Metropolitan Park, Barangay 76, Zone 10, Pasay, District 1, Pasay, Metro Manila, 1300, Philippines, (14.53854745, 120.9933039111655, 0.0))

In [91]:
df['hospital'].replace('San Juan Dios Education Foundation Inc. Hospital',
                       value="San Juan de Dios Educational Foundation Inc. - Hospital", inplace=True)

#### ```Our Lady Pillar Medical Center```

In [92]:
do_geocode('Our Lady of the Pillar Medical Center')

Location(Our Lady of the Pillar Medical Center, Tamsui Avenue, Treelane 2, Bayan Luma IV, Imus, Cavite, Calabarzon, 4103, Philippines, (14.4191702, 120.93924976550068, 0.0))

In [93]:
df['hospital'].replace('Our Lady Pillar Medical Center',
                       value="Our Lady of the Pillar Medical Center", inplace=True)

#### ```Lucena United Doctors Hospital```

In [94]:
do_geocode('Lucena United Doctors, Quezon')

Location(Lucena United Doctors General Hospital, Old Manila South Road, Isabang, Quezon, Calabarzon, 4301, Philippines, (13.946984050000001, 121.58508706872695, 0.0))

In [95]:
df['hospital'].replace('Lucena United Doctors Hospital',
                       value="Lucena United Doctors, Quezon", inplace=True)

# ---

#### ```DOH Epidemiology Bureau```

In [96]:
do_geocode('Department of Health (Philippines)')

Location(Department of Health, Santa Cruz, Third District, Manila, Metro Manila, Philippines, (14.615603799999999, 120.98128128126282, 0.0))

In [97]:
df['hospital'].replace('DOH Epidemiology Bureau',
                       value="Department of Health (Philippines)", inplace=True)

#### ```Dr. Paulino J. Garcia Memorial Research Medical Hospital```

In [98]:
do_geocode('Dr. Paulino J. Garcia Memorial Research and Medical Center')

Location(Dr. Paulino J. Garcia Memorial Research and Medical Center, Gen Tinio Street, Melojavilla, Ciudad de Real, Cabanatuan, Nueva Ecija, Central Luzon, 3100, Philippines, (15.489462849999999, 120.97275970736415, 0.0))

In [99]:
df['hospital'].replace('Dr. Paulino J. Garcia Memorial Research Medical Hospital',
                       value="Dr. Paulino J. Garcia Memorial Research and Medical Center", inplace=True)

#### ```Dr. Jose N. Rodriguez Memorial Hospital Sanitarium```

In [100]:
do_geocode('Dr. Jose N. Rodriguez Memorial Hospital')

Location(Dr. Jose N. Rodriguez Memorial Hospital, St. Joseph Avenue, Nhc-tala Estate Subdivision, Sto. Nino Village, 186, Caloocan, Metro Manila, 1427, Philippines, (14.76667625, 121.06468840472849, 0.0))

In [101]:
df['hospital'].replace('Dr. Jose N. Rodriguez Memorial Hospital Sanitarium',
                       value="Dr. Jose N. Rodriguez Memorial Hospital", inplace=True)

#### ```Perpetual Health Medical Center - Biñan```

In [102]:
do_geocode('Perpetual Help Medical Center - Biñan')

Location(Perpetual Help Medical Center - Biñan, National Highway, South Springville, San Antonio, Biñan, Laguna, Calabarzon, 4024, Philippines, (14.33043835, 121.08533919092724, 0.0))

In [103]:
df['hospital'].replace('Perpetual Health Medical Center - Biñan',
                       value="Perpetual Help Medical Center - Biñan", inplace=True)

#### ```Gerona Hospital Sacred Heart```

In [104]:
do_geocode('Gerona Hospital of the Sacred Heart')

Location(Gerona Hospital of the Sacred Heart, Gerona - Pura Road, Singat, Tarlac, Central Luzon, 2302, Philippines, (15.6085847, 120.60561125193583, 0.0))

In [105]:
df['hospital'].replace('Gerona Hospital Sacred Heart',
                       value="Gerona Hospital of the Sacred Heart", inplace=True)

#### `Uni-Health Southwoods Hospital Medical Center`

In [106]:
do_geocode('Unihealth Southwoods Hospital Medical Center')

Location(Unihealth Southwoods Hospital and Medical Center, Ecocentrum Avenue, Southwoods Ecocentrum, San Francisco, Biñan, Laguna, Calabarzon, 4024, Philippines, (14.33162575, 121.04759467589082, 0.0))

In [107]:
df['hospital'].replace('Uni-Health Southwoods Hospital Medical Center',
                       value="Unihealth Southwoods Hospital Medical Center", inplace=True)

#### `St. Joseph Hospital Medical Center`

In [108]:
do_geocode('Bataan St. Joseph Hospital Medical Center')

Location(Bataan St. Joseph Hospital and Medical Center, Don Manuel Banzon Avenue, Poblacion, Balanga, Bataan, Central Luzon, 2100, Philippines, (14.6814951, 120.5416046, 0.0))

In [109]:
df['hospital'].replace('St. Joseph Hospital Medical Center',
                       value="Bataan St. Joseph Hospital Medical Center", inplace=True)

#### `Uhbi-Parañaque Doctors Hospital`

In [110]:
do_geocode('Parañaque Doctors Hospital')

Location(Parañaque Doctors' Hospital, Doña Soledad Avenue, Aeropark, Don Bosco, Parañaque, Metro Manila, 1711, Philippines, (14.485656899999999, 121.02867842425692, 0.0))

In [111]:
df['hospital'].replace('Uhbi-Parañaque Doctors Hospital',
                       value="Parañaque Doctors Hospital", inplace=True)

Let's view the revised ```hospital``` string values that we weren't able to geocode previously.

In [112]:
df['hospital'][np.isnan(df['hosp_lat']) & np.isnan(df['hosp_long'])].unique()

array(['University of the East Ramon Magsaysay Memorial Medical Center',
       'Dr. Jose N. Rodriguez Memorial Hospital',
       'Bataan St. Joseph Hospital Medical Center',
       'Department of Health (Philippines)',
       'Our Lady of Lourdes Hospital',
       'Our Lady of Mt. Carmel Medical Center', 'QualiMed Santa Rosa',
       'Riverside Hospital Bacolod',
       'Dr. Paulino J. Garcia Memorial Research and Medical Center',
       'Sacred Heart Hospital of Malolos', 'Health Dept., Quezon City',
       'Perpetual Help Medical Center - Biñan',
       'Unihealth Southwoods Hospital Medical Center',
       'City of General Trias Doctors Medical Center', 'Makati City Hall',
       'Lucena United Doctors, Quezon', 'Jesus of Nazareth Hospital',
       'Hospital of the Infant Jesus Medical Center',
       'Golden Gate General Hospital',
       'Gerona Hospital of the Sacred Heart',
       'Our Lady of the Pillar Medical Center',
       'Parañaque Doctors Hospital', 'Camp Vicente Lim',


In [113]:
df['hospital'].index[np.isnan(df['hosp_lat']) & np.isnan(df['hosp_long'])].unique()

Int64Index([  4,  21,  61,  64,  66,  73,  89,  94,  99, 100, 131, 142, 169,
            195, 224, 267, 285, 286, 387, 388, 391, 392, 393, 394, 469, 470,
            472, 473, 475, 476, 486, 495, 519, 528, 560, 584, 628, 629, 648,
            651, 652, 656, 670, 688, 689, 690, 691, 692, 693, 694, 714, 716,
            735, 758, 785, 795, 796, 797, 798, 801, 802, 803],
           dtype='int64', name='case_no')

How many from each 'hospital'?

In [114]:
df['hospital'][np.isnan(df['hosp_lat']) & np.isnan(df['hosp_long'])].value_counts()

University of the East Ramon Magsaysay Memorial Medical Center    20
Our Lady of Lourdes Hospital                                      7 
Department of Health (Philippines)                                4 
Dr. Paulino J. Garcia Memorial Research and Medical Center        4 
Dr. Jose N. Rodriguez Memorial Hospital                           3 
City of General Trias Doctors Medical Center                      2 
Health Dept., Quezon City                                         2 
Makati City Hall                                                  2 
Riverside Hospital Bacolod                                        2 
Perpetual Help Medical Center - Biñan                             2 
Camp Vicente Lim                                                  1 
Bataan St. Joseph Hospital Medical Center                         1 
Lucena United Doctors, Quezon                                     1 
Parañaque Doctors Hospital                                        1 
Our Lady of Mt. Carmel Medical Cen

Now let's geocode these:

In [115]:
nan_hosp_names = df['hospital'][np.isnan(df['hosp_lat']) & np.isnan(df['hosp_long'])].unique()

In [116]:
%%time

nan_hosp_locs = pd.Series(nan_hosp_names).apply(do_geocode)

nan_geodict_lat = dict(zip(nan_hosp_names, nan_hosp_locs.apply(lambda x: x.latitude if(np.all(pd.notnull(x))) else x)))
nan_geodict_long = dict(zip(nan_hosp_names, nan_hosp_locs.apply(lambda x: x.longitude if(np.all(pd.notnull(x))) else x)))

Wall time: 24 s


In [117]:
nan_hosp_locs_lat = df['hospital'].map(nan_geodict_lat)

In [118]:
nan_hosp_locs_long = df['hospital'].map(nan_geodict_long)

Fill up tha ```NaN``` values:

In [119]:
df['hosp_lat'] = df['hosp_lat'].fillna(nan_hosp_locs_lat)

In [120]:
df['hosp_long'] = df['hosp_long'].fillna(nan_hosp_locs_long)

Check if there are stil ```NaN``` values. There should be 0.

In [121]:
df['hosp_lat'].isna().sum()

0

In [122]:
df['hosp_long'].isna().sum()

0

In [123]:
#Run this for checking
df['hospital'][np.isnan(df['hosp_lat']) & np.isnan(df['hosp_long'])].value_counts()

Series([], Name: hospital, dtype: int64)

##### (Repeat manual data cleaning everytime the dateset is refreshed.)

### Out of Place Points

These are locations that should have been within Philippine Territory but were found outside because of the limitations of geocoding.

#### ```St. Louis Hospital```

In [124]:
do_geocode('St. Louis Hospital')

Location(St.Louis Hospital, NH44;48, Krishnagiri, Tamil Nadu, 635001, India, (12.5252135, 78.2149263, 0.0))

In [125]:
do_geocode('St. Louis Hospital Tacurong')

Location(Saint Louis Hospital, Tacurong - Isulan National Highway, New Isabela, Purok Sampaguita, Tacurong, Sultan Kudarat, Soccsksargen, 9800, Philippines, (6.67852345, 124.66699554588106, 0.0))

In [126]:
df['hospital'].replace(['St. Louis Hospital', 'St. Loius Hospital'],
                       value="St. Louis Hospital Tacurong", inplace=True)

In [127]:
df.loc[df['hospital']=="St. Louis Hospital Tacurong",'hosp_lat'] = do_geocode('St. Louis Hospital Tacurong').latitude
df.loc[df['hospital']=="St. Louis Hospital Tacurong",'hosp_long'] = do_geocode('St. Louis Hospital Tacurong').longitude

#### ```The Medical City```

In [128]:
do_geocode('The Medical City')

Location(Medical City, Rusafa, بغداد, Al Resafa, محافظة بغداد, Iraq, (33.3486967, 44.37779385218179, 0.0))

In [129]:
do_geocode('The Medical City - Ortigas')

Location(The Medical City, Ortigas Avenue, Ugong, Pasig, Metro Manila, 1604, Philippines, (14.589964, 121.06906339610711, 0.0))

In [130]:
df['hospital'].replace('The Medical City',
                       value="The Medical City - Ortigas", inplace=True)

In [131]:
df.loc[df['hospital']=="The Medical City - Ortigas",'hosp_lat'] = do_geocode('The Medical City - Ortigas').latitude
df.loc[df['hospital']=="The Medical City - Ortigas",'hosp_long'] = do_geocode('The Medical City - Ortigas').longitude

#### ```Lung Center Philippine```

In [132]:
do_geocode('Lung Center Philippine')

Location(Embassy of the Philippines, 6-11, Suffolk Street, St. James's, Covent Garden, City of Westminster, London, Greater London, England, SW1Y 4HG, United Kingdom, (51.5086102, -0.13067022021816063, 0.0))

In [133]:
do_geocode('Lung Center of the Philippines')

Location(Lung Center of the Philippines, Quezon Avenue, East Triangle, Pinyahan, Quezon City, 4th District, Quezon City, Metro Manila, 1100, Philippines, (14.64748145, 121.04587919677965, 0.0))

In [134]:
df['hospital'].replace('Lung Center Philippine',
                       value="Lung Center of the Philippines", inplace=True)

In [135]:
df.loc[df['hospital']=="Lung Center of the Philippines",'hosp_lat'] = do_geocode('Lung Center of the Philippines').latitude
df.loc[df['hospital']=="Lung Center of the Philippines",'hosp_long'] = do_geocode('Lung Center of the Philippines').longitude

#### ```CDJ Hospital```

In [136]:
do_geocode('CDJ Hospital')

Location(Aeroporto de Conceição do Araguaia - Bendito Roque, PA-447, Cruzeiro, Conceição do Araguaia, Microrregião de Conceição do Araguaia, Mesorregião Sudeste Paraense, Pará, Região Norte, Brasil, (-8.349763800000002, -49.304938509872315, 0.0))

It seems that the existence of a 'CDJ Hospital' locally cannot be identified. Better to drop it.

In [137]:
df['hospital'].replace('CDJ Hospital',
                       value="TBA", inplace=True)

In [138]:
df = pd.DataFrame(df.drop(df[(df['hospital'] == 'TBA')].index))

#### ```Victoriano Luna Medical Center```

In [139]:
do_geocode('Victoriano Luna Medical Center')

Location(Ing. Alberto Victoriano Luna, Polo, Grünbein, Partido de Bahía Blanca, Buenos Aires, Argentina, (-38.7496219, -62.1953897, 0.0))

In [140]:
do_geocode('AFP Medical Center')

Location(Armed Forces of the Philippines Medical Center, V. Luna Avenue, East Triangle, Pinyahan, Quezon City, 4th District, Quezon City, Metro Manila, 1100, Philippines, (14.63468575, 121.05210336549419, 0.0))

In [141]:
df['hospital'].replace('Victoriano Luna Medical Center',
                       value="AFP Medical Center", inplace=True)

In [142]:
df.loc[df['hospital']=="AFP Medical Center",'hosp_lat'] = do_geocode('AFP Medical Center').latitude
df.loc[df['hospital']=="AFP Medical Center",'hosp_long'] = do_geocode('AFP Medical Center').longitude

#### ```La Union Medical Center```

In [143]:
do_geocode('La Union Medical Center')

Location(Union Medical Center, 322, West South Street, Union, Union County, South Carolina, 29379, United States of America, (34.7123931, -81.62929901086534, 0.0))

In [144]:
do_geocode('La Union Medical Center Ilocos')

Location(La Union Medical Center (LUMC), Manila North Road, Nazareno, La Union, Ilocos, 2504, Philippines, (16.3466222, 120.3635465, 0.0))

In [145]:
df['hospital'].replace('La Union Medical Center',
                       value="La Union Medical Center Ilocos", inplace=True)

In [146]:
df.loc[df['hospital']=="La Union Medical Center Ilocos",'hosp_lat'] = do_geocode('La Union Medical Center Ilocos').latitude
df.loc[df['hospital']=="La Union Medical Center Ilocos",'hosp_long'] = do_geocode('La Union Medical Center Ilocos').longitude

#### ```Providence Hospital```

In [147]:
do_geocode('Providence Hospital')

Location(Providence Hospital, 1150, Varnum Street Northeast, Washington, District of Columbia, 20017, United States of America, (38.9445417, -76.99340684192414, 0.0))

In [148]:
do_geocode('Providence Hospital Quezon')

Location(Providence Hospital, Quezon Avenue, West Triangle, Quezon City, 1st District, Quezon City, Metro Manila, 1104, Philippines, (14.641224650000002, 121.03167331232544, 0.0))

In [149]:
df['hospital'].replace('Providence Hospital',
                       value="Providence Hospital Quezon", inplace=True)

In [150]:
df.loc[df['hospital']=="Providence Hospital Quezon",'hosp_lat'] = do_geocode('Providence Hospital Quezon').latitude
df.loc[df['hospital']=="Providence Hospital Quezon",'hosp_long'] = do_geocode('Providence Hospital Quezon').longitude

#### ```Manila Medical Center```

In [151]:
do_geocode('Manila Medical Center')

Location(Manila, Humboldt County, California, United States of America, (40.8517929, -124.1622856, 0.0))

In [152]:
do_geocode('Medical Center Manila')

Location(Medical Center Manila, General Luna Street, Barangay 676, Ermita, Fifth District, Manila, Metro Manila, 1000, Philippines, (14.58248, 120.98543971329903, 0.0))

In [153]:
df['hospital'].replace('Manila Medical Center',
                       value="Medical Center Manila", inplace=True)

In [154]:
df.loc[df['hospital']=="Medical Center Manila",'hosp_lat'] = do_geocode('Medical Center Manila').latitude
df.loc[df['hospital']=="Medical Center Manila",'hosp_long'] = do_geocode('Medical Center Manila').longitude

#### ```Adventist Medical Center```

In [155]:
do_geocode('Adventist Medical Center')

Location(Adventist Medical Center, 10123, Southeast Market Street, Russellville, Hazelwood, Portland, Multnomah County, Oregon, 97216, United States of America, (45.5126758, -122.55906204416371, 0.0))

In [156]:
do_geocode('Adventist Medical Center Manila')

Location(Adventist Medical Center Manila, 1945, Donada, Barangay 36, Zone 3, Pasay, District 1, Pasay, Metro Manila, 1300, Philippines, (14.55602185, 120.9953013915066, 0.0))

In [157]:
df['hospital'].replace('Adventist Medical Center',
                       value="Adventist Medical Center Manila", inplace=True)

In [158]:
df.loc[df['hospital']=="Adventist Medical Center Manila",'hosp_lat'] = do_geocode('Adventist Medical Center Manila').latitude
df.loc[df['hospital']=="Adventist Medical Center Manila",'hosp_long'] = do_geocode('Adventist Medical Center Manila').longitude

#### ```National Center Mental Health```

In [159]:
do_geocode('National Center Mental Health')

Location(Mental Health, Heather Street, South Cambie, Vancouver, Metro Vancouver Regional District, British Columbia, V5Z, Canada, (49.244127649999996, -123.12149095596945, 0.0))

In [160]:
do_geocode('National Center for Mental Health')

Location(National Center for Mental Health, Nueve de Febrero, Block 41 Zone 8, Mauway, Mandaluyong, Metro Manila, 1553, Philippines, (14.5816602, 121.04126559258651, 0.0))

In [161]:
df['hospital'].replace('National Center Mental Health',
                       value="National Center for Mental Health", inplace=True)

In [162]:
df.loc[df['hospital']=="National Center for Mental Health",'hosp_lat'] = do_geocode('National Center for Mental Health').latitude
df.loc[df['hospital']=="National Center for Mental Health",'hosp_long'] = do_geocode('National Center for Mental Health').longitude

#### `Sinai Hospital`

In [163]:
do_geocode('Sinai Hospital')

Location(سيناء, جنوب سيناء, Egypt / مصر, (29.52316965, 33.80547257650801, 0.0))

In [164]:
do_geocode('New Sinai MDI Hospital')

Location(New Sinai MDI Hospital, National Highway, Tagapo, Platero, Biñan, Laguna, Calabarzon, 4026, Philippines, (14.31764895, 121.09872571949757, 0.0))

In [165]:
df['hospital'].replace('Sinai Hospital',
                       value="New Sinai MDI Hospital", inplace=True)

In [166]:
df.loc[df['hospital']=="New Sinai MDI Hospital",'hosp_lat'] = do_geocode('New Sinai MDI Hospital').latitude
df.loc[df['hospital']=="New Sinai MDI Hospital",'hosp_long'] = do_geocode('New Sinai MDI Hospital').longitude

### Save ```df``` to ```.csv```

Finally.

In [167]:
today_stamp = pd.to_datetime('today').strftime('%d-%m-%Y_%Hh%Mm%Ss')
df.to_csv('COVID19PHforMappingModified_{0}.csv'.format(today_stamp), index_label='case_no')

This Jupyter Notebook has finished re-run today, at:

In [168]:
pd.to_datetime('today')

Timestamp('2020-03-31 20:23:52.040312')