In [1]:
#Import required libraries
import gspread
import sys
from oauth2client.service_account import ServiceAccountCredentials
import pandas as pd
import numpy as np
import urllib
import sqlalchemy
from gspread_dataframe import set_with_dataframe
from gspread_dataframe import get_as_dataframe
import datetime

In [2]:
#Import project specific functions
from column_map import column_map
from yesno_functions import *
from format_datetime import *

In [3]:
#Import shared functions
sys.path.append('..')
from IPM_Shared_Code_public.Python.google_creds_functions import create_assertion_session
from IPM_Shared_Code_public.Python.utils import get_config
from IPM_Shared_Code_public.Python.delta_functions import *
from IPM_Shared_Code_public.Python.sql_functions import *

It will be compatible before version 1.0.
Read more <https://git.io/Jeclj#file-rn-md>
  deprecate('Deprecate "authlib.client", USE "authlib.integrations.requests_client" instead.', '1.0', 'Jeclj', 'rn')


### Use the config file to setup connections

In [4]:
config = get_config('c:\Projects\config.ini')

driver = config['srv']['driver']
server = config['srv']['server']
dwh = config['db']['crowdsdb']
cred_file = config['google']['path_to_file']

In [5]:
con_string = 'Driver={' + driver + '};Server=' + server +';Database=' + dwh + ';Trusted_Connection=Yes;'
params = urllib.parse.quote_plus(con_string)
engine = sqlalchemy.create_engine("mssql+pyodbc:///?odbc_connect=%s" % params)

### Execute the function to get the renamed columns for this sheet

In [6]:
#Call the column map function to get the dictionary to be used for renaming and subsetting the columns
col_rename = column_map('crowds_dpr')

In [7]:
#Because of duplicate column names these columns are renamed based on the column index and the keys and 
#values need to be switched
col_rename = {v[0]: k for k, v in col_rename.items()}

In [8]:
cols = list(col_rename.values())

### Read the site reference list from SQL

In [9]:
sql = 'select * from crowdsdb.dbo.tbl_ref_sites'

In [10]:
site_ref = pd.read_sql(con = engine, sql = sql)[['site_id', 'desc_location']]

### Read the current data from SQL

In [11]:
sql = 'select * from crowdsdb.dbo.tbl_dpr_crowds'

In [12]:
crowds_sql = (pd.read_sql(con = engine, sql = sql)
              .drop(columns = ['crowds_id'])
              .fillna(value = np.nan, axis = 1))

In [13]:
format_datetime(crowds_sql, 'encounter_timestamp')

In [14]:
float_cols = ['patroncount', 'in_playground']
for c in float_cols:
    crowds_sql[c] = crowds_sql[c].astype(float)

In [15]:
crowds_sql.head()

Unnamed: 0,encounter_timestamp,park_district,patroncount,in_playground,action_taken,amenity,comments,site_id,borough
0,06-01-2020 10:54:14,Q-12,12.0,1.0,Approached the crowd; they ignored the employee,Playground,,Q005-11,Queens
1,06-01-2020 10:22:51,X-14,11.0,0.0,Did not approach the crowd; the crowd remains,Soccer field,Playing soccer,X147A-ZN01,Bronx
2,05-31-2020 16:31:20,Q-13,13.0,0.0,Approached the crowd; they ignored the employee,Skate Park,,Q108,Queens
3,05-31-2020 15:04:12,Q-07,30.0,1.0,Approached the crowd; they complied with instr...,Basketball court,,Q397,Queens
4,05-31-2020 14:38:52,X-02,15.0,0.0,Did not approach the crowd; the crowd remains,Baseball field,Central communication was called,X255,Bronx


In [16]:
sql_cols = list(crowds_sql.columns.values)

In [17]:
hash_rows(crowds_sql, exclude_cols = ['site_id', 'encounter_timestamp'], hash_name = 'row_hash')

### Read the latest data from Google Sheets

In [18]:
scope = ['https://spreadsheets.google.com/feeds',
         'https://www.googleapis.com/auth/drive']
#creds = ServiceAccountCredentials.from_json_keyfile_name(cred_file, scope)
#client = gspread.authorize(creds)

In [None]:
session = create_assertion_session(cred_file, scope)

In [None]:
client = Client(None, session)

In [19]:
sheet = client.open('Crowds_Combined')

In [20]:
ws = sheet.worksheet('Sheet1')

In [21]:
crowds = (get_as_dataframe(ws, evaluate_formulas = True, header= None)
          #Always remove row 0 with the column headers
          .iloc[1:]
          .rename(columns = col_rename)
          .fillna(value = np.nan, axis = 1))[cols]

In [22]:
#Remove the rows where there timestamp is null because these sheets have extra rows full of nulls
crowds = crowds[crowds['encounter_timestamp'].notnull()]

In [23]:
format_datetime(crowds, 'encounter_timestamp')

In [24]:
crowds.head()

Unnamed: 0,encounter_timestamp,park_district,patroncount,in_playground,action_taken,amenity,comments,desc_location,borough
1,06-14-2020 20:14:59,Q-13,15.0,Yes,Did not approach the crowd; the crowd remains,Skate Park,PATRONS ARE CUTTING HOLES IN THE FENCING TO TH...,Q108 | Laurelton Playground,Queens
2,06-14-2020 20:09:47,Q-13,20.0,No,Did not approach the crowd; the crowd remains,Bench/sitting area,"BROOKVILLE PK. BBQ AREA,,,TRESSPASSING IN A CL...",Q008 | Brookville Park,Queens
3,06-14-2020 20:03:57,Q-13,6.0,No,Did not approach the crowd; the crowd remains,Handall court,PATRONS ARE TRESSPASSING IN A CLOSED HBC AR...,Q008 | Brookville Park,Queens
4,06-14-2020 19:58:37,Q-13,10.0,No,Did not approach the crowd; the crowd remains,Adult fitness equipment,PATRONS ARE TRESSPASSING IN THE CLOSED EXERCIS...,Q008 | Brookville Park,Queens
5,06-14-2020 16:27:01,X-15,500.0,No,Did not approach the crowd; the crowd remains,Open field/multi-purpose play area,,X039-ZN01 | Pelham Bay Park-Aileen Ryan Rec. C...,Bronx


In [25]:
yesno = ['in_playground']

In [26]:
yesno_cols(crowds, yesno)

In [27]:
crowds = crowds.merge(site_ref, how = 'left', on = 'desc_location')[sql_cols]

In [28]:
hash_rows(crowds, exclude_cols = ['site_id', 'encounter_timestamp'], hash_name = 'row_hash')

In [29]:
crowds.head()

Unnamed: 0,encounter_timestamp,park_district,patroncount,in_playground,action_taken,amenity,comments,site_id,borough,row_hash
0,06-14-2020 20:14:59,Q-13,15.0,1.0,Did not approach the crowd; the crowd remains,Skate Park,PATRONS ARE CUTTING HOLES IN THE FENCING TO TH...,Q108,Queens,f106cdb413144d450f520cf55ba2da427b4dbc78167095...
1,06-14-2020 20:09:47,Q-13,20.0,0.0,Did not approach the crowd; the crowd remains,Bench/sitting area,"BROOKVILLE PK. BBQ AREA,,,TRESSPASSING IN A CL...",Q008,Queens,6b90acb801d252e78959c0060146a8604daed89fa30995...
2,06-14-2020 20:03:57,Q-13,6.0,0.0,Did not approach the crowd; the crowd remains,Handall court,PATRONS ARE TRESSPASSING IN A CLOSED HBC AR...,Q008,Queens,bd2c80c48fb38b5f44657c53339dee7632af3f6fb755a6...
3,06-14-2020 19:58:37,Q-13,10.0,0.0,Did not approach the crowd; the crowd remains,Adult fitness equipment,PATRONS ARE TRESSPASSING IN THE CLOSED EXERCIS...,Q008,Queens,de4f1120700333d63e47e247eeba751c05299ac74b8ab4...
4,06-14-2020 16:27:01,X-15,500.0,0.0,Did not approach the crowd; the crowd remains,Open field/multi-purpose play area,,X039-ZN01,Bronx,0acfd5ec3408e1eecea269af04b2322f2f5fc5c09f7f59...


### Find the deltas based on the row hashes

In [30]:
crowds_deltas = (check_deltas(new_df = crowds, old_df = crowds_sql, on = ['encounter_timestamp', 'site_id'], 
                              hash_name = 'row_hash', dml_col = 'dml_verb'))[sql_cols + ['dml_verb']]

In [31]:
crowds_deltas.head()

Unnamed: 0,encounter_timestamp,park_district,patroncount,in_playground,action_taken,amenity,comments,site_id,borough,dml_verb
0,06-14-2020 20:14:59,Q-13,15.0,1.0,Did not approach the crowd; the crowd remains,Skate Park,PATRONS ARE CUTTING HOLES IN THE FENCING TO TH...,Q108,Queens,I
1,06-14-2020 20:09:47,Q-13,20.0,0.0,Did not approach the crowd; the crowd remains,Bench/sitting area,"BROOKVILLE PK. BBQ AREA,,,TRESSPASSING IN A CL...",Q008,Queens,I
2,06-14-2020 20:03:57,Q-13,6.0,0.0,Did not approach the crowd; the crowd remains,Handall court,PATRONS ARE TRESSPASSING IN A CLOSED HBC AR...,Q008,Queens,I
3,06-14-2020 19:58:37,Q-13,10.0,0.0,Did not approach the crowd; the crowd remains,Adult fitness equipment,PATRONS ARE TRESSPASSING IN THE CLOSED EXERCIS...,Q008,Queens,I
4,06-14-2020 16:27:01,X-15,500.0,0.0,Did not approach the crowd; the crowd remains,Open field/multi-purpose play area,,X039-ZN01,Bronx,I


In [32]:
crowds_inserts = crowds_deltas[crowds_deltas['dml_verb'] == 'I'][sql_cols]

In [33]:
crowds_inserts.head()

Unnamed: 0,encounter_timestamp,park_district,patroncount,in_playground,action_taken,amenity,comments,site_id,borough
0,06-14-2020 20:14:59,Q-13,15.0,1.0,Did not approach the crowd; the crowd remains,Skate Park,PATRONS ARE CUTTING HOLES IN THE FENCING TO TH...,Q108,Queens
1,06-14-2020 20:09:47,Q-13,20.0,0.0,Did not approach the crowd; the crowd remains,Bench/sitting area,"BROOKVILLE PK. BBQ AREA,,,TRESSPASSING IN A CL...",Q008,Queens
2,06-14-2020 20:03:57,Q-13,6.0,0.0,Did not approach the crowd; the crowd remains,Handall court,PATRONS ARE TRESSPASSING IN A CLOSED HBC AR...,Q008,Queens
3,06-14-2020 19:58:37,Q-13,10.0,0.0,Did not approach the crowd; the crowd remains,Adult fitness equipment,PATRONS ARE TRESSPASSING IN THE CLOSED EXERCIS...,Q008,Queens
4,06-14-2020 16:27:01,X-15,500.0,0.0,Did not approach the crowd; the crowd remains,Open field/multi-purpose play area,,X039-ZN01,Bronx


In [34]:
crowds_inserts.to_sql('tbl_dpr_crowds', engine, index = False, if_exists = 'append')

In [35]:
crowds_updates = crowds_deltas[crowds_deltas['dml_verb'] == 'U'][sql_cols]

In [36]:
crowds_updates.head()

Unnamed: 0,encounter_timestamp,park_district,patroncount,in_playground,action_taken,amenity,comments,site_id,borough


In [37]:
sql_update(crowds_updates, 'tbl_dpr_crowds', engine, ['encounter_timestamp', 'site_id'])