In [2]:
import gspread
import sys
from oauth2client.service_account import ServiceAccountCredentials
import pandas as pd
import numpy as np
import urllib
import sqlalchemy
from gspread_dataframe import set_with_dataframe
from gspread_dataframe import get_as_dataframe

In [7]:
from column_map import column_map

In [77]:
sys.path.append('../..')
from IPM_Shared_Code_public.Python.google_creds_functions import create_assertion_session
from IPM_Shared_Code_public.Python.utils import get_config
from IPM_Shared_Code_public.Python.delta_functions import *

### Use the config file to setup connections

In [86]:
config = get_config('c:\Projects\config.ini')

driver = config['srv']['driver']
server = config['srv']['server']
dwh = config['db']['crowdsdb']
cred_file = config['google']['path_to_file']

### Execute the function to get the columns for this sheet

In [97]:
#Call the column map function to get the dictionary to be used for renaming and subsetting the columns
col_rename = column_map('patrol_dpr')

In [98]:
cols = list(col_rename.values())

### Read the current data from SQL

In [100]:
con_string = 'Driver={' + driver + '};Server=' + server +';Database=' + dwh + ';Trusted_Connection=Yes;'
params = urllib.parse.quote_plus(con_string)
engine = sqlalchemy.create_engine("mssql+pyodbc:///?odbc_connect=%s" % params)

In [101]:
sql = 'select * from crowdsdb.dbo.tbl_dpr_patrol'

In [113]:
patrol_sql = (pd.read_sql(con = engine, sql = sql)
              .drop(columns = ['patrol_id'])
              .fillna(value = np.nan, axis = 1))

In [115]:
patrol_sql.head()

Unnamed: 0,encounter_timestamp,encounter_datetime,site_id,location_adddesc,park_division,firstname_1,lastname_1,firstname_2,lastname_2,firstname_3,...,sd_patronscomplied,sd_patronsnocomply,sd_amenity,summonscount_a01,summonscount_a03,summonscount_a04,summonscount_a22,other_summonstype,other_summonscount,borough


In [90]:
hash_rows(patrol_sql, exclude_cols = ['encounter_timestamp'], hash_name = 'row_hash')

### Read the latest data from Google Sheets

In [6]:
scope = ['https://spreadsheets.google.com/feeds',
         'https://www.googleapis.com/auth/drive']
creds = ServiceAccountCredentials.from_json_keyfile_name(cred_file, scope)
client = gspread.authorize(creds)

In [8]:
sheet = client.open('COMBINED Patrol Reporting Responses')

In [9]:
ws = sheet.worksheet('MASTER')

In [84]:
#patrol_hist = client.open_by_url('https://docs.google.com/spreadsheets/d/name/edit#gid=0/revisions')

In [85]:
#patrol_hist = (get_as_dataframe(hist.worksheet('MASTER'), evaluate_formulas = True, header= 0)
#               .rename(columns = col_rename))[list(col_rename.values())]

In [65]:
#patrol_hist = patrol_hist[patrol_hist['encounter_timestamp'].notna()]

In [69]:
#Read the worksheet as a data frame, rename the columns and subset the columns to only include those
#in the column list
patrol = (get_as_dataframe(ws, evaluate_formulas = True, header= 0)
          .rename(columns = col_rename)
          .fillna(value = np.nan, axis = 1))[cols]

In [70]:
#Remove any rows with no data, presumably these are rows with no timestamp
patrol = patrol[patrol['encounter_timestamp'].notna()]

In [78]:
hash_rows(patrol, exclude_cols = ['encounter_timestamp'], hash_name = 'row_hash')

In [79]:
patrol.head()

Unnamed: 0,encounter_timestamp,encounter_datetime,property,location_adddesc,parks_division,firstname_1,lastname_1,firstname_2,lastname_2,firstname_3,...,sd_patronsnocomply,sd_amenity,summonscount_a01,summonscount_a03,summonscount_a04,summonscount_a22,other_summonstype,other_summonscount,borough,row_hash
0,5/19/2020 20:37:55,5/19/2020 20:36:00,J.J. Byrne Playground,,PEP,K,Romero,R,Anderson,,...,,,,,,,,,Brooklyn,77160e7a67215d2734a361fe5a45b436252c9dc30af78d...
1,5/19/2020 20:34:58,5/19/2020 19:55:00,Inwood Hill Park - Dyckman Ballfield,,PEP,Ariel,Junco,McTerry,Obioha,,...,,,,,,,,,Manhattan,14957cc4dc27b4d61800c0adb76ef16ce015ae813ba0a9...
2,5/19/2020 20:25:18,5/19/2020 18:30:00,Battery Park-Battery Park,,PEP,daniel,palladino,a,mota,,...,,,,,,,,,Manhattan,031558724f154509734ea4b2dfaf09d901c0ed286486de...
3,5/19/2020 20:13:33,5/19/2020 20:12:00,Fort Greene Park,,PEP,Daniel,Gitel,,,,...,,,,,,,,,Brooklyn,26afcb5c3042047ab9e70faf24e6e7745cb97b6cea5910...
4,5/19/2020 20:12:57,5/19/2020 20:15:00,Josephine Caminiti Playground,,PEP,j,hengber,b,jospeh,,...,,,,,,,,,Queens,0be26e167788a86cfc36f497e615bc15cd3bdc0aa61ac6...


In [91]:
patrol_deltas = (check_deltas(new_df = patrol, old_df = patrol_sql, on = 'encounter_timestamp', 
                              hash_name = 'row_hash', dml_col = 'dml_verb'))

In [92]:
patrol_inserts = patrol_deltas[patrol_deltas['dml_verb'] == 'I']

In [94]:
patrol_inserts.head()

Unnamed: 0,encounter_timestamp,encounter_datetime,property,location_adddesc,parks_division,firstname_1,lastname_1,firstname_2,lastname_2,firstname_3,...,sd_amenity_old,summonscount_a01_old,summonscount_a03_old,summonscount_a04_old,summonscount_a22_old,other_summonstype_old,other_summonscount_old,borough_old,row_hash_old,dml_verb
0,5/19/2020 20:37:55,5/19/2020 20:36:00,J.J. Byrne Playground,,PEP,K,Romero,R,Anderson,,...,,,,,,,,,,I
1,5/19/2020 20:34:58,5/19/2020 19:55:00,Inwood Hill Park - Dyckman Ballfield,,PEP,Ariel,Junco,McTerry,Obioha,,...,,,,,,,,,,I
2,5/19/2020 20:25:18,5/19/2020 18:30:00,Battery Park-Battery Park,,PEP,daniel,palladino,a,mota,,...,,,,,,,,,,I
3,5/19/2020 20:13:33,5/19/2020 20:12:00,Fort Greene Park,,PEP,Daniel,Gitel,,,,...,,,,,,,,,,I
4,5/19/2020 20:12:57,5/19/2020 20:15:00,Josephine Caminiti Playground,,PEP,j,hengber,b,jospeh,,...,,,,,,,,,,I


In [93]:
patrol_updates = patrol_deltas[patrol_deltas['dml_verb'] == 'U']

In [95]:
patrol_updates.head()

Unnamed: 0,encounter_timestamp,encounter_datetime,property,location_adddesc,parks_division,firstname_1,lastname_1,firstname_2,lastname_2,firstname_3,...,sd_amenity_old,summonscount_a01_old,summonscount_a03_old,summonscount_a04_old,summonscount_a22_old,other_summonstype_old,other_summonscount_old,borough_old,row_hash_old,dml_verb
