In [40]:
#Import required libraries
import sys
import pandas as pd
import numpy as np
import urllib
import sqlalchemy

In [41]:
#Import project specific functions
#sys.path.append('..')
from column_map import column_map
from yesno_functions import *
from format_datetime import *

In [42]:
#Import shared functions
sys.path.append('..')
#sys.path.append('..\..')
from IPM_Shared_Code_public.Python.utils import get_config
from IPM_Shared_Code_public.Python.delta_functions import *
from IPM_Shared_Code_public.Python.sql_functions import *
from IPM_Shared_Code_public.Python.google_functions import *

### Use the config file to setup connections

In [43]:
config = get_config('c:\Projects\config.ini')

driver = config['srv']['driver']
server = config['srv']['server']
dwh = config['db']['crowdsdb']
cred_file = config['google']['path_to_file']

In [44]:
con_string = 'Driver={' + driver + '};Server=' + server +';Database=' + dwh + ';Trusted_Connection=Yes;'
params = urllib.parse.quote_plus(con_string)
engine = sqlalchemy.create_engine("mssql+pyodbc:///?odbc_connect=%s" % params)

### Execute the function to get the renamed columns for this sheet

In [45]:
#Call the column map function to get the dictionary to be used for renaming and subsetting the columns
col_rename = column_map('ambassador_dpr')

In [46]:
#Because of duplicate column names these columns are renamed based on the column index and the keys and 
#values need to be switched
col_rename = {v[0]: k for k, v in col_rename.items()}

In [47]:
cols = list(col_rename.values())

### Read the site reference list from SQL

In [63]:
sql = '''select *
         from (select *,
                      row_number() over(partition by site_desc, park_borough order by site_desc) as n
               from crowdsdb.dbo.tbl_ref_park_sites
               where active = 1) as t
         where n = 1'''

In [64]:
site_ref = (pd.read_sql(con = engine, sql = sql)
            .rename(columns = {'park_borough': 'borough'}))[['site_id', 'site_desc', 'borough']]

### Read the current data from SQL

In [50]:
sql = 'select * from crowdsdb.dbo.tbl_dpr_ambassador'

In [51]:
ambass_sql = (pd.read_sql(con = engine, sql = sql)
              .drop(columns = ['ambassador_id', 'patroncount'])
              .fillna(value = np.nan, axis = 1))

In [52]:
sql_cols = list(ambass_sql.columns.values)

In [53]:
if ambass_sql.shape()[0] > 0:
    format_datetime(ambass_sql, 'encounter_timestamp')
    format_datetime(ambass_sql, 'encounter_datetime')

In [54]:
float_cols = ['sd_pdcontact', 'closed_approach', 'closed_outcome', 'closed_pdcontact']
for c in float_cols:
    ambass_sql[c] = ambass_sql[c].astype(float)

In [55]:
hash_rows(ambass_sql, exclude_cols = ['site_id', 'encounter_timestamp', 'encounter_datetime'], hash_name = 'row_hash')

### Read the latest data from Google Sheets

In [56]:
ambass = (read_google_sheet(cred_file, '_Combined INTERNAL PARKS Ambassador COVID-19 Reporting (Responses)', 
                            'Form Responses 1', drop_empty_cols = False, evaluate_formulas = True, header= None)
          #Always remove row 0 with the column headers
          .iloc[1:]
          .rename(columns = col_rename)
          .fillna(value = np.nan, axis = 1))[cols]

In [57]:
ambass.head()

Unnamed: 0,encounter_timestamp,encounter_datetime,site_desc,location_adddesc,park_division,firstname_1,lastname_1,firstname_2,lastname_2,firstname_3,...,sd_amenity,sd_pdcontact,sd_comments,closed_amenity,closed_patroncount,closed_approach,closed_outcome,closed_pdcontact,closed_comments,borough
1,6/30/2020 13:46:58,6/30/2020 13:00:00,St. John's Park,Troy & Schenectady Avenues,Recreation,Andrea,Williams,Clinton,Alston,,...,,,,,,,,,,Brooklyn
2,6/30/2020 12:54:28,6/30/2020 12:40:00,Tanahey Playground,,Aquatics,Brian,Hyman,George,Giraldo,Luke,...,,,,,,,,,,Manhattan
3,6/30/2020 12:52:57,6/30/2020 12:30:00,Alfred E. Smith Playground,,Aquatics,Brian,Hyman,George,Giraldo,Luke,...,,,,,,,,,,Manhattan
4,6/30/2020 12:14:15,6/30/2020 10:10:00,John Jay Park,Children's playground,Recreation,David,Velesquez,Matthew,McEnerey,Alisa,...,Playground,,Camp or daycare with 3 adults and 12 children ...,,,,,,,Manhattan
5,6/30/2020 12:06:34,6/30/2020 13:00:00,Midland Field,Midland Beach,Aquatics,Candace,Senior,Crystal,Teoh,Junbin,...,Bench/sitting area,,,,,,,,,Staten Island


In [58]:
yesno = ['sd_pdcontact', 'closed_approach', 'closed_outcome', 'closed_pdcontact']

In [59]:
yesno_cols(ambass, yesno)

In [60]:
#Remove rows with no timestamp because these rows have no data
ambass = ambass[ambass['encounter_timestamp'].notnull()]

In [61]:
ambass = ambass.merge(site_ref, how = 'left', on = ['site_desc', 'borough'])[sql_cols]

In [26]:
format_datetime(ambass, 'encounter_timestamp')
format_datetime(ambass, 'encounter_datetime')

In [27]:
ambass.head()

Unnamed: 0,encounter_timestamp,encounter_datetime,site_id,location_adddesc,park_division,firstname_1,lastname_1,firstname_2,lastname_2,firstname_3,...,sd_amenity,sd_pdcontact,sd_comments,closed_amenity,closed_patroncount,closed_approach,closed_outcome,closed_pdcontact,closed_comments,borough
0,06-15-2020 08:25:09,06-15-2020 08:00:00,X104,,Recreation,jeantheia,Hampton,,,,...,Open field/multi-purpose play area,,,,,,,,,Bronx
1,06-15-2020 08:08:27,06-15-2020 15:00:00,X045,,Recreation,jabor,forde,,,,...,Bench/sitting area,,there were over 200 people in the park,,,,,,,Bronx
2,06-14-2020 18:28:53,06-14-2020 18:30:00,R046,,Park Administrator Ambassador,eric,means,sean,capers,toosdhi,...,Parking Lot,,,,,,,,,Staten Island
3,06-14-2020 18:27:33,06-14-2020 18:30:00,R046,,Park Administrator Ambassador,Dane,Ambrose,Moses,Peters,,...,Walking path,,,,,,,,,Staten Island
4,06-14-2020 18:26:39,06-14-2020 18:30:00,R046,,Park Administrator Ambassador,Waynette,Saunders,,,,...,Baseball field,,,,,,,,,Staten Island


In [28]:
hash_rows(ambass, exclude_cols = ['site_id', 'encounter_timestamp', 'encounter_datetime'], hash_name = 'row_hash')

### Find the deltas based on the row hashes

In [29]:
ambass_deltas = (check_deltas(new_df = ambass, old_df = ambass_sql, on = ['encounter_timestamp', 'site_id', 'encounter_datetime'], 
                              hash_name = 'row_hash', dml_col = 'dml_verb'))[sql_cols + ['dml_verb']]

In [30]:
ambass_deltas.head()

Unnamed: 0,encounter_timestamp,encounter_datetime,site_id,location_adddesc,park_division,firstname_1,lastname_1,firstname_2,lastname_2,firstname_3,...,sd_pdcontact,sd_comments,closed_amenity,closed_patroncount,closed_approach,closed_outcome,closed_pdcontact,closed_comments,borough,dml_verb
0,06-15-2020 08:25:09,06-15-2020 08:00:00,X104,,Recreation,jeantheia,Hampton,,,,...,,,,,,,,,Bronx,I
1,06-15-2020 08:08:27,06-15-2020 15:00:00,X045,,Recreation,jabor,forde,,,,...,,there were over 200 people in the park,,,,,,,Bronx,I
2,06-14-2020 18:28:53,06-14-2020 18:30:00,R046,,Park Administrator Ambassador,eric,means,sean,capers,toosdhi,...,,,,,,,,,Staten Island,I
3,06-14-2020 18:27:33,06-14-2020 18:30:00,R046,,Park Administrator Ambassador,Dane,Ambrose,Moses,Peters,,...,,,,,,,,,Staten Island,I
4,06-14-2020 18:26:39,06-14-2020 18:30:00,R046,,Park Administrator Ambassador,Waynette,Saunders,,,,...,,,,,,,,,Staten Island,I


In [31]:
ambass_inserts = ambass_deltas[ambass_deltas['dml_verb'] == 'I'][sql_cols]

In [32]:
ambass_inserts.head()

Unnamed: 0,encounter_timestamp,encounter_datetime,site_id,location_adddesc,park_division,firstname_1,lastname_1,firstname_2,lastname_2,firstname_3,...,sd_amenity,sd_pdcontact,sd_comments,closed_amenity,closed_patroncount,closed_approach,closed_outcome,closed_pdcontact,closed_comments,borough
0,06-15-2020 08:25:09,06-15-2020 08:00:00,X104,,Recreation,jeantheia,Hampton,,,,...,Open field/multi-purpose play area,,,,,,,,,Bronx
1,06-15-2020 08:08:27,06-15-2020 15:00:00,X045,,Recreation,jabor,forde,,,,...,Bench/sitting area,,there were over 200 people in the park,,,,,,,Bronx
2,06-14-2020 18:28:53,06-14-2020 18:30:00,R046,,Park Administrator Ambassador,eric,means,sean,capers,toosdhi,...,Parking Lot,,,,,,,,,Staten Island
3,06-14-2020 18:27:33,06-14-2020 18:30:00,R046,,Park Administrator Ambassador,Dane,Ambrose,Moses,Peters,,...,Walking path,,,,,,,,,Staten Island
4,06-14-2020 18:26:39,06-14-2020 18:30:00,R046,,Park Administrator Ambassador,Waynette,Saunders,,,,...,Baseball field,,,,,,,,,Staten Island


In [33]:
ambass_inserts.to_sql('tbl_dpr_ambassador', engine, index = False, if_exists = 'append')

In [34]:
ambass_updates = ambass_deltas[ambass_deltas['dml_verb'] == 'U'][sql_cols]

In [35]:
ambass_updates.head()

Unnamed: 0,encounter_timestamp,encounter_datetime,site_id,location_adddesc,park_division,firstname_1,lastname_1,firstname_2,lastname_2,firstname_3,...,sd_amenity,sd_pdcontact,sd_comments,closed_amenity,closed_patroncount,closed_approach,closed_outcome,closed_pdcontact,closed_comments,borough


In [36]:
sql_update(ambass_updates, 'tbl_dpr_ambassador', engine, ['encounter_timestamp', 'encounter_datetime', 'site_id'])