In [1]:
#Import required libraries
import gspread
import sys
from oauth2client.service_account import ServiceAccountCredentials
import pandas as pd
import numpy as np
import urllib
import sqlalchemy
from gspread_dataframe import set_with_dataframe
from gspread_dataframe import get_as_dataframe

In [2]:
#Import project specific functions
from column_map import column_map
from yesno_functions import *

In [3]:
#Import shared functions
sys.path.append('../..')
from IPM_Shared_Code_public.Python.google_creds_functions import create_assertion_session
from IPM_Shared_Code_public.Python.utils import get_config
from IPM_Shared_Code_public.Python.delta_functions import *

It will be compatible before version 1.0.
Read more <https://git.io/Jeclj#file-rn-md>
  deprecate('Deprecate "authlib.client", USE "authlib.integrations.requests_client" instead.', '1.0', 'Jeclj', 'rn')


### Use the config file to setup connections

In [4]:
config = get_config('c:\Projects\config.ini')

driver = config['srv']['driver']
server = config['srv']['server']
dwh = config['db']['crowdsdb']
cred_file = config['google']['path_to_file']

In [5]:
con_string = 'Driver={' + driver + '};Server=' + server +';Database=' + dwh + ';Trusted_Connection=Yes;'
params = urllib.parse.quote_plus(con_string)
engine = sqlalchemy.create_engine("mssql+pyodbc:///?odbc_connect=%s" % params)

### Execute the function to get the renamed columns for this sheet

In [6]:
#Call the column map function to get the dictionary to be used for renaming and subsetting the columns
col_rename = column_map('ambassador_dpr')

In [7]:
#Because of duplicate column names these columns are renamed based on the column index and the keys and 
#values need to be switched
col_rename = {v[0]: k for k, v in col_rename.items()}

In [8]:
cols = list(col_rename.values())

### Read the site reference list from SQL

In [10]:
sql = 'select * from crowdsdb.dbo.tbl_ref_sites'

In [11]:
site_ref = pd.read_sql(con = engine, sql = sql)[['site_id', 'site_desc']]

### Read the current data from SQL

In [12]:
sql = 'select * from crowdsdb.dbo.tbl_dpr_ambassador'

In [13]:
ambass_sql = (pd.read_sql(con = engine, sql = sql)
              .drop(columns = ['ambassador_id'])
              .fillna(value = np.nan, axis = 1))

In [14]:
sql_cols = list(ambass_sql.columns.values)

In [15]:
hash_rows(ambass_sql, exclude_cols = ['site_id', 'encounter_timestamp'], hash_name = 'row_hash')

### Read the latest data from Google Sheets

In [16]:
scope = ['https://spreadsheets.google.com/feeds',
         'https://www.googleapis.com/auth/drive']
creds = ServiceAccountCredentials.from_json_keyfile_name(cred_file, scope)
client = gspread.authorize(creds)

In [17]:
sheet = client.open('_Combined INTERNAL PARKS Ambassador COVID-19 Reporting (Responses)')

In [18]:
ws = sheet.worksheet('Form Responses 1')

In [19]:
ambass = (get_as_dataframe(ws, evaluate_formulas = True, header= None)
          #Always remove row 0 with the column headers
          .iloc[1:]
          .rename(columns = col_rename)
          .fillna(value = np.nan, axis = 1))[cols]

In [20]:
ambass.head()

Unnamed: 0,encounter_timestamp,encounter_datetime,site_desc,location_adddesc,park_division,firstname_1,lastname_1,firstname_2,lastname_2,firstname_3,...,sd_amenity,sd_pdcontact,sd_comments,closed_amenity,closed_patroncount,closed_approach,closed_outcome,closed_pdcontact,closed_comments,borough
1,5/26/2020 20:06:28,5/25/2020 15:30:00,Rockaway Beach Boardwalk,B102nd street.,AQUATICS,Steve,Nicola,Ben,Peters,,...,Walking path,No,The patron without a mask was offered a mask a...,,,,,,,Queens
2,5/26/2020 19:13:22,5/26/2020 16:31:00,Walker Park,,Recreation,Dana,Vitolo,,,,...,,,,Tennis courts,4.0,Yes,No,No,,Staten Island
3,5/26/2020 17:57:13,5/26/2020 12:00:00,Lost Battalion Hall Recreation Center,In the park,Recreation,Chris,Wong,,,,...,Bench/sitting area,No,,,,,,,,Queens
4,5/26/2020 17:40:13,5/26/2020 17:39:00,Chelsea Park,,Recreation,Bobby,Vangelatos,Dana,Brown,,...,,,,Adult fitness equipment,5.0,Yes,Yes,No,All but 1 patron left the area,Manhattan
5,5/26/2020 17:36:27,5/26/2020 15:00:00,John Paul Jones Park,,Recreation,Peter,Lovett,,,,...,,,,,,,,,,Brooklyn


In [21]:
yesno = ['sd_pdcontact', 'closed_approach', 'closed_outcome', 'closed_pdcontact']

In [22]:
yesno_cols(ambass, yesno)

In [23]:
ambass = ambass.merge(site_ref, how = 'left', on = 'site_desc')[sql_cols]

In [24]:
ambass.head()

Unnamed: 0,encounter_timestamp,encounter_datetime,site_id,location_adddesc,park_division,firstname_1,lastname_1,firstname_2,lastname_2,firstname_3,...,sd_amenity,sd_pdcontact,sd_comments,closed_amenity,closed_patroncount,closed_approach,closed_outcome,closed_pdcontact,closed_comments,borough
0,5/26/2020 20:06:28,5/25/2020 15:30:00,,B102nd street.,AQUATICS,Steve,Nicola,Ben,Peters,,...,Walking path,0.0,The patron without a mask was offered a mask a...,,,,,,,Queens
1,5/26/2020 19:13:22,5/26/2020 16:31:00,,,Recreation,Dana,Vitolo,,,,...,,,,Tennis courts,4.0,1.0,0.0,0.0,,Staten Island
2,5/26/2020 17:57:13,5/26/2020 12:00:00,,In the park,Recreation,Chris,Wong,,,,...,Bench/sitting area,0.0,,,,,,,,Queens
3,5/26/2020 17:40:13,5/26/2020 17:39:00,,,Recreation,Bobby,Vangelatos,Dana,Brown,,...,,,,Adult fitness equipment,5.0,1.0,1.0,0.0,All but 1 patron left the area,Manhattan
4,5/26/2020 17:36:27,5/26/2020 15:00:00,,,Recreation,Peter,Lovett,,,,...,,,,,,,,,,Brooklyn


In [None]:
hash_rows(ambass, exclude_cols = ['site_id', 'encounter_timestamp'], hash_name = 'row_hash')