In [1]:
#Import required libraries
import gspread
import sys
from oauth2client.service_account import ServiceAccountCredentials
import pandas as pd
import numpy as np
import urllib
import sqlalchemy
from gspread_dataframe import set_with_dataframe
from gspread_dataframe import get_as_dataframe

In [2]:
#Import project specific functions
from column_map import column_map
from yesno_functions import *

In [3]:
#Import shared functions
sys.path.append('../..')
from IPM_Shared_Code_public.Python.google_creds_functions import create_assertion_session
from IPM_Shared_Code_public.Python.utils import get_config
from IPM_Shared_Code_public.Python.delta_functions import *
from IPM_Shared_Code_public.Python.sql_functions import *

It will be compatible before version 1.0.
Read more <https://git.io/Jeclj#file-rn-md>
  deprecate('Deprecate "authlib.client", USE "authlib.integrations.requests_client" instead.', '1.0', 'Jeclj', 'rn')


### Use the config file to setup connections

In [4]:
config = get_config('c:\Projects\config.ini')

driver = config['srv']['driver']
server = config['srv']['server']
dwh = config['db']['crowdsdb']
cred_file = config['google']['path_to_file']

In [5]:
con_string = 'Driver={' + driver + '};Server=' + server +';Database=' + dwh + ';Trusted_Connection=Yes;'
params = urllib.parse.quote_plus(con_string)
engine = sqlalchemy.create_engine("mssql+pyodbc:///?odbc_connect=%s" % params)

### Execute the function to get the renamed columns for this sheet

In [6]:
#Call the column map function to get the dictionary to be used for renaming and subsetting the columns
col_rename = column_map('patrol_dpr')

In [7]:
#Because of duplicate column names these columns are renamed based on the column index and the keys and 
#values need to be switched
col_rename = {v[0]: k for k, v in col_rename.items()}

In [8]:
cols = list(col_rename.values())

### Read the current data from SQL

In [9]:
sql = 'select * from crowdsdb.dbo.tbl_dpr_patrol'

In [10]:
patrol_sql = (pd.read_sql(con = engine, sql = sql)
              .drop(columns = ['patrol_id', 'patroncount'])
              .fillna(value = np.nan, axis = 1))

In [11]:
sql_cols = list(patrol_sql.columns.values)

In [12]:
patrol_sql.head()

Unnamed: 0,encounter_timestamp,encounter_datetime,site_id,location_adddesc,park_division,visit_reason,firstname_1,lastname_1,firstname_2,lastname_2,...,sd_patronscomplied,sd_patronsnocomply,sd_amenity,summonscount_a01,summonscount_a03,summonscount_a04,summonscount_a22,other_summonstype,other_summonscount,borough


In [13]:
hash_rows(patrol_sql, exclude_cols = ['encounter_timestamp'], hash_name = 'row_hash')

### Read the site reference list from SQL

In [14]:
sql = 'select * from crowdsdb.dbo.tbl_ref_sites'

In [15]:
site_ref = pd.read_sql(con = engine, sql = sql)[['site_id', 'site_desc', 'borough']]

### Read the latest data from Google Sheets

In [16]:
scope = ['https://spreadsheets.google.com/feeds',
         'https://www.googleapis.com/auth/drive']
creds = ServiceAccountCredentials.from_json_keyfile_name(cred_file, scope)
client = gspread.authorize(creds)

In [17]:
sheet = client.open('COMBINED Patrol Reporting Responses')

In [18]:
ws = sheet.worksheet('MASTER')

In [35]:
#Read the worksheet as a data frame, rename the columns and subset the columns to only include those
#in the column list
patrol = (get_as_dataframe(ws, evaluate_formulas = True, header= None)
          #Always remove row 0 with the column headers
          .iloc[1:]
          .rename(columns = col_rename)
          .fillna(value = np.nan, axis = 1))[cols]

In [37]:
patrol.head()

Unnamed: 0,encounter_timestamp,encounter_datetime,site_desc,location_adddesc,park_division,firstname_1,lastname_1,firstname_2,lastname_2,firstname_3,...,sd_pdassist,sd_pdcontact,sd_comments,summonscount_a01,summonscount_a03,summonscount_a04,summonscount_a22,other_summonstype,other_summonscount,borough
1,5/28/2020 21:21:03,5/28/2020 21:26:00,Riverside Park South,,PEP,Arm,Zaman,Zuriel,Lara,,...,,,,,,,,,,Manhattan
2,5/28/2020 20:42:30,5/28/2020 20:41:00,Central Park-East 96th St Playground,locked and closed.,PEP,C. 596,h. 596,t.,B.,,...,,,,,,,,,,Manhattan
3,5/28/2020 20:32:07,5/28/2020 20:32:00,Central Park-James Michael Levin Playground,locked and closed.,PEP,C. 596,h. 596,t.,B.,,...,,,,,,,,,,Manhattan
4,5/28/2020 20:31:48,5/28/2020 18:00:00,Ferry Point Park,,PEP,Rayquan,piper,Anthony,Valentin,,...,No,,,,,,,,,Bronx
5,5/28/2020 20:31:14,5/28/2020 20:30:00,Central Park-Pat Hoffman Friedman Playground,locked and closed,PEP,c. 596,h. 596,t.,B.,,...,,,,,,,,,,Manhattan


In [38]:
yesno = ['closed_education', 'closed_outcome', 'closed_summonsissued', 'closed_pdassist',
         'closed_pdcontact', 'sd_summonsissued', 'sd_pdassist', 'sd_pdcontact']

In [40]:
yesno_cols(patrol, yesno)

In [41]:
#Remove any rows with no data, presumably these are rows with no timestamp
patrol = patrol[patrol['encounter_timestamp'].notna()]

In [42]:
patrol = patrol.merge(site_ref, how = 'left', on = ['site_desc', 'borough'])[sql_cols]

In [44]:
hash_rows(patrol, exclude_cols = ['site_id', 'encounter_timestamp'], hash_name = 'row_hash')

In [45]:
patrol_deltas = (check_deltas(new_df = patrol, old_df = patrol_sql, on = ['site_id', 'encounter_timestamp'], 
                              hash_name = 'row_hash', dml_col = 'dml_verb'))[sql_cols + ['dml_verb']]

In [46]:
patrol_inserts = patrol_deltas[patrol_deltas['dml_verb'] == 'I'][sql_cols]

In [47]:
patrol_inserts.head()

Unnamed: 0,encounter_timestamp,encounter_datetime,site_id,location_adddesc,park_division,visit_reason,firstname_1,lastname_1,firstname_2,lastname_2,...,sd_patronscomplied,sd_patronsnocomply,sd_amenity,summonscount_a01,summonscount_a03,summonscount_a04,summonscount_a22,other_summonstype,other_summonscount,borough
0,5/28/2020 21:21:03,5/28/2020 21:26:00,M353,,PEP,Standard Patrol,Arm,Zaman,Zuriel,Lara,...,,,,,,,,,,Manhattan
1,5/28/2020 20:42:30,5/28/2020 20:41:00,M010-176,locked and closed.,PEP,Standard Patrol,C. 596,h. 596,t.,B.,...,,,,,,,,,,Manhattan
2,5/28/2020 20:32:07,5/28/2020 20:32:00,M010-103,locked and closed.,PEP,Standard Patrol,C. 596,h. 596,t.,B.,...,,,,,,,,,,Manhattan
3,5/28/2020 20:31:48,5/28/2020 18:00:00,X126,,PEP,Fixed Post,Rayquan,piper,Anthony,Valentin,...,13.0,0.0,Soccer field,,,,,,,Bronx
4,5/28/2020 20:31:14,5/28/2020 20:30:00,M010-143,locked and closed,PEP,Standard Patrol,c. 596,h. 596,t.,B.,...,,,,,,,,,,Manhattan


In [48]:
patrol_inserts.to_sql('tbl_dpr_patrol', engine, index = False, if_exists = 'append')

DataError: (pyodbc.DataError) ('22007', '[22007] [Microsoft][ODBC SQL Server Driver][SQL Server]The conversion of a nvarchar data type to a datetime data type resulted in an out-of-range value. (242) (SQLExecDirectW); [22007] [Microsoft][ODBC SQL Server Driver][SQL Server]The statement has been terminated. (3621)')
[SQL: INSERT INTO tbl_dpr_patrol (encounter_timestamp, encounter_datetime, site_id, location_adddesc, park_division, visit_reason, firstname_1, lastname_1, firstname_2, lastname_2, firstname_3, lastname_3, patrol_method, encounter_type, closed_amenity, closed_patroncount, closed_education, closed_outcome, closed_summonsissued, closed_pdassist, closed_pdcontact, closed_comments, sd_summonsissued, sd_pdassist, sd_pdcontact, sd_comments, sd_patronscomplied, sd_patronsnocomply, sd_amenity, summonscount_a01, summonscount_a03, summonscount_a04, summonscount_a22, other_summonstype, other_summonscount, borough) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)]
[parameters: (('5/28/2020 21:21:03', '5/28/2020 21:26:00', 'M353', None, 'PEP', 'Standard Patrol', 'Arm', 'Zaman', 'Zuriel', 'Lara', None, None, 'Mobile', 'No encounter', None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, 'Manhattan'), ('5/28/2020 20:42:30', '5/28/2020 20:41:00', 'M010-176', 'locked and closed. ', 'PEP', 'Standard Patrol', 'C. 596', 'h. 596', 't. ', 'B. ', None, None, 'Mobile', 'No encounter', None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, 'Manhattan'), ('5/28/2020 20:32:07', '5/28/2020 20:32:00', 'M010-103', 'locked and closed. ', 'PEP', 'Standard Patrol', 'C. 596', 'h. 596', 't. ', 'B. ', None, None, 'Mobile', 'No encounter', None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, 'Manhattan'), ('5/28/2020 20:31:48', '5/28/2020 18:00:00', 'X126', None, 'PEP', 'Fixed Post', 'Rayquan ', 'piper ', 'Anthony ', 'Valentin ', None, None, 'Mobile', 'Yes, patrons educated on social distancing (not trespassing)', None, None, None, None, None, None, None, None, None, None, None, None, 13.0, 0.0, 'Soccer field', None, None, None, None, None, None, 'Bronx'), ('5/28/2020 20:31:14', '5/28/2020 20:30:00', 'M010-143', 'locked and closed', 'PEP', 'Standard Patrol', 'c. 596', 'h. 596', 't. ', 'B. ', None, None, 'Mobile', 'No encounter', None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, 'Manhattan'), ('5/28/2020 20:29:55', '5/28/2020 20:20:00', 'M010-031', 'locked and closed. ', 'PEP', 'Standard Patrol', 'C. 596', 'h. 596', 't. ', 'B. ', None, None, 'Mobile', 'No encounter', None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, 'Manhattan'), ('5/28/2020 20:14:27', '5/28/2020 8:07:00', 'B131', None, 'PEP', 'Standard Patrol', 'I', 'Gubarev ', 'Z', 'Hamilton ', 'S', 'Johnson ', 'Mobile', 'Yes, patrons who trespassed/violated rules', 'Skate Park', 9.0, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, 'Brooklyn'), ('5/28/2020 20:12:53', '5/28/2020 20:05:00', 'X058', None, 'PEP', 'Standard Patrol', 'kevin ', 'Feliciano ', 'naji', 'Williams ', None, None, 'Mobile', 'No encounter', None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, 'Bronx')  ... displaying 10 of 17994 total bound parameter sets ...  ('4/14/2020 18:05:59', '4/14/2020 19:24:00', 'R088', 'Altamont Ave Trails', 'PEP', 'Standard Patrol', 'Mercedes', 'Velilla', None, None, None, None, 'Mobile', 'Yes, patrons educated on social distancing (not trespassing)', None, None, None, None, None, None, None, None, None, None, None, 'a young adult couple male and female walking their bicycles on the path', 2.0, 0.0, 'Walking path', None, None, None, None, None, None, 'Staten Island'), ('4/14/2020 17:54:43', '4/14/2020 20:00:00', 'M056', ' Did not encounter anyone', 'PEP', 'Standard Patrol', 'Officer Martinez', 'Off Medina', None, None, None, None, 'On foot', 'No encounter', None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, 'Manhattan'))]
(Background on this error at: http://sqlalche.me/e/9h9h)

In [None]:
patrol_updates = patrol_deltas[patrol_deltas['dml_verb'] == 'U'][sql_cols]

In [None]:
patrol_updates.head()

In [None]:
sql_update(patrol_updates, 'tbl_dpr_patrol', engine, ['encounter_timestamp', 'site_id'])