# Implementing CPDB categorization on Checkbook NYC 

In [34]:
# Capital Spending Project: CPDB categorization
import pandas as pd
import numpy as np
import re
import time
import sqlalchemy
from sqlalchemy import create_engine
from sqlalchemy.orm import sessionmaker

### establish db

In [35]:
checkbook = pd.read_csv('checkbook_nyc_groupby_capital_project_fy.csv')
checkbook['typecategory'] = None

In [36]:
db_url = 'sqlite:///checkbook.db'
engine = create_engine(db_url)
Session = sessionmaker(bind=engine)
session = Session()

In [37]:
table_name = 'capital_projects'
checkbook.to_sql(table_name, engine, if_exists='replace', index=False)

80932

In [38]:
# test
query_test = "SELECT * FROM capital_projects;"
result = pd.read_sql(query_test, engine)
result.shape

(80932, 22)

## Running categorization process on Checkbook DB

In [39]:
# Group projects into the ITT, Vehicles, and Equipment category

query_itt_vehicles_equipment = """
UPDATE capital_projects SET typecategory = 'ITT, Vehicles, and Equipment'
WHERE (
upper([Budget Code]) LIKE '%VEHICLE%'
OR upper([Budget Code]) LIKE '%AMBULANCE%'
OR upper([Budget Code]) LIKE '%BUSES%'
OR upper([Budget Code]) LIKE '%TRUCK%'
OR upper([Budget Code]) LIKE '%TRAILER%'
OR upper([Budget Code]) LIKE '%VANS%'

OR upper([Budget Code]) LIKE '%EQUIPMENT%'
OR upper([Budget Code]) LIKE '%CRANE%'
OR upper([Budget Code]) LIKE '%FURNITURE%'
OR upper([Budget Code]) LIKE '%PORTABLE%'

OR upper([Budget Code]) LIKE '% IT %'
OR upper([Budget Code]) LIKE '%AUDIO%'
OR upper([Budget Code]) LIKE '%CAMERA%'
OR upper([Budget Code]) LIKE '%COMPUTERS%'
OR upper([Budget Code]) LIKE '%DATA%'
OR upper([Budget Code]) LIKE '%DOITT%'
OR upper([Budget Code]) LIKE '%FISA%'
OR upper([Budget Code]) LIKE '%GIS%'
OR upper([Budget Code]) LIKE '%HARDWARE%'
OR upper([Budget Code]) LIKE '%MAINFRAME%'
OR upper([Budget Code]) LIKE '%MOBILE%'
OR upper([Budget Code]) LIKE '%PHONE%'
OR upper([Budget Code]) LIKE '%PRINTER%'
OR upper([Budget Code]) LIKE '%SERVER%'
OR upper([Budget Code]) LIKE '%SOFTWARE%'
OR upper([Budget Code]) LIKE '%RADIO %'
OR upper([Budget Code]) LIKE '%RADIOS%'
OR upper([Budget Code]) LIKE '%VIDEO%'
OR upper([Budget Code]) LIKE '%VOIP%'
OR upper([Budget Code]) LIKE '%WIFI%'

-- New trigger words
OR upper([Budget Code]) LIKE '%A/V%'
OR upper([Budget Code]) LIKE '% AV %'
OR upper([Budget Code]) LIKE '%ACCESS%UPGRADE%'
OR upper([Budget Code]) LIKE '%AIMS%'
OR upper([Budget Code]) LIKE '%ANALYTICS%'
OR upper([Budget Code]) LIKE '%ANTENA%'
OR upper([Budget Code]) LIKE '%APPARATUS%'
OR upper([Budget Code]) LIKE '%ARREST%PROCESSING%'
OR upper([Budget Code]) LIKE '%ARRESTOR%'
OR upper([Budget Code]) LIKE '%ASSET%TRACKER%'
OR upper([Budget Code]) LIKE '%AUTOMATIC%'
OR upper([Budget Code]) LIKE '%AUTOMATION%'
OR upper([Budget Code]) LIKE '%BARGE%'
OR upper([Budget Code]) LIKE '%BED%'
OR upper([Budget Code]) LIKE '% BMS %'
OR upper([Budget Code]) LIKE '%BOATS%'
OR upper([Budget Code]) LIKE '% BUS %'
OR upper([Budget Code]) LIKE '%BUS'
OR upper([Budget Code]) LIKE '%CART%'
OR upper([Budget Code]) LIKE '%CATHETERIZATION%'
OR upper([Budget Code]) LIKE '%CCTV%'
OR upper([Budget Code]) LIKE '%CHECKBOOK%NYC%'
OR upper([Budget Code]) LIKE '%CISCO%'
OR upper([Budget Code]) LIKE '%CITYTIME%'
OR upper([Budget Code]) LIKE '%COMMUNICATIONS%'
OR upper([Budget Code]) LIKE '%COMMUNICATION%EQUI%'
OR upper([Budget Code]) LIKE '%COMPRESSOR%'
OR upper([Budget Code]) LIKE '%COMPUTER%EQUIP%'
OR upper([Budget Code]) LIKE '%COMPUTER%REPL%'
OR upper([Budget Code]) LIKE '%COMPUTER%SYS%'
OR upper([Budget Code]) LIKE '%COMP%UPGRADE%'
OR upper([Budget Code]) LIKE '%COMPUTERIZED%'
OR upper([Budget Code]) LIKE '%COPIER%'
OR upper([Budget Code]) LIKE '%CT%SCAN%'
OR upper([Budget Code]) LIKE '%CURTAIN%'
OR upper([Budget Code]) LIKE '%CYTOMETER%'
OR upper([Budget Code]) LIKE '%DCTV%'
OR upper([Budget Code]) LIKE '%DESKTOP%'
OR upper([Budget Code]) LIKE '%DEVICE%'
OR upper([Budget Code]) LIKE '%DIGITAL%'
OR upper([Budget Code]) LIKE '%DISINFECTION%'
OR upper([Budget Code]) LIKE '%E-TICKET%'
OR upper([Budget Code]) LIKE '%ECTP%'
OR upper([Budget Code]) LIKE '%ELECTRONIC%'
OR upper([Budget Code]) LIKE '%EMAIL%'
OR upper([Budget Code]) LIKE '%ENCRYPT%'
OR upper([Budget Code]) LIKE '%ENGINE%'
OR upper([Budget Code]) LIKE '%ENTERPRISE%SOLUTIONS%'
OR upper([Budget Code]) LIKE '%EQ%PURCHASE%'
OR upper([Budget Code]) LIKE '%EQUIP%'
OR upper([Budget Code]) LIKE '%FERRY%BO%'
OR upper([Budget Code]) LIKE '%FFE%'
OR upper([Budget Code]) LIKE '%FILM%'
OR upper([Budget Code]) LIKE '%FIREBOAT%'
OR upper([Budget Code]) LIKE '%FLAT%BED%'
OR upper([Budget Code]) LIKE '%FLEET%'
OR upper([Budget Code]) LIKE '%FMS%'
OR upper([Budget Code]) LIKE '%FORKLIFT%'
OR upper([Budget Code]) LIKE '%GENERATOR%'
OR upper([Budget Code]) LIKE '%GPS%'
OR upper([Budget Code]) LIKE '%HDTV%'
OR upper([Budget Code]) LIKE '%HELICOPTER%'
OR upper([Budget Code]) LIKE '%HHS%ACC%'
OR upper([Budget Code]) LIKE '%INFO%MANAGE%'
OR upper([Budget Code]) LIKE '%INFO%SECURITY%'
OR upper([Budget Code]) LIKE '%INTERFACE%'
OR upper([Budget Code]) LIKE 'IT %'
OR upper([Budget Code]) LIKE '% IT'
OR upper([Budget Code]) LIKE '%IVR %'
OR upper([Budget Code]) LIKE '%KEYWORD%'
OR upper([Budget Code]) LIKE '%KITS%'
OR upper([Budget Code]) LIKE '%LADDER%'
OR upper([Budget Code]) LIKE '% LAN %'
OR upper([Budget Code]) LIKE '%LUNG%UNIT%'
OR upper([Budget Code]) LIKE '%MACHINE%'
OR upper([Budget Code]) LIKE '%MAMOGRAM%'
OR upper([Budget Code]) LIKE '%MANHOLE%COVER%'
OR upper([Budget Code]) LIKE '%MANHOLE%RING%'
OR upper([Budget Code]) LIKE '%MANOMETRY%'
OR upper([Budget Code]) LIKE '%MED%EQMT%'
OR upper([Budget Code]) LIKE '%MEDIA%UPGRADE%'
OR upper([Budget Code]) LIKE '%MICROFILM%'
OR upper([Budget Code]) LIKE '%MICROSCOPE%'
OR upper([Budget Code]) LIKE '%MICROSPECT%'
OR upper([Budget Code]) LIKE '%MINI%BUS%'
OR upper([Budget Code]) LIKE '%MINI%VAN%'
OR upper([Budget Code]) LIKE '%MOBILIZING%'
OR upper([Budget Code]) LIKE '%MONITOR%'
OR upper([Budget Code]) LIKE '%MOORING%UPGRADE%'
OR upper([Budget Code]) LIKE '%MOSAICS%'
OR upper([Budget Code]) LIKE '%MOVEABLE%'
OR upper([Budget Code]) LIKE '%MRI%'
OR upper([Budget Code]) LIKE '%NETWORK%'
OR upper([Budget Code]) LIKE '%NOVAS%'
OR upper([Budget Code]) LIKE '%NYCAPP%'
OR upper([Budget Code]) LIKE '%NYCAPS%'
OR upper([Budget Code]) LIKE '%NYCSERV%'
OR upper([Budget Code]) LIKE '%OAISIS%'
OR upper([Budget Code]) LIKE '%PAPERLESS%'
OR upper([Budget Code]) LIKE '%PASSENGER%VAN%'
OR upper([Budget Code]) LIKE '%PIANO%'
OR upper([Budget Code]) LIKE '%PORTAL%'
OR upper([Budget Code]) LIKE '%POWER%WASHER%'
OR upper([Budget Code]) LIKE '%PRINTING%'
OR upper([Budget Code]) LIKE '%PROJECTORS%'
OR upper([Budget Code]) LIKE '%PROJECTION%SYS%'
OR upper([Budget Code]) LIKE '%PROJECTION%SOUND%SYSTEM%'
OR upper([Budget Code]) LIKE '%PUMPERS%'
OR upper([Budget Code]) LIKE '%PURCHASE%'
OR upper([Budget Code]) LIKE '%REAL%TIME%INFO%'
OR upper([Budget Code]) LIKE '%REAL%TIME%SIGN%'
OR upper([Budget Code]) LIKE '%RECORDER%'
OR upper([Budget Code]) LIKE '%REFRIGERATOR%'
OR upper([Budget Code]) LIKE '%REPORTING%'
OR upper([Budget Code]) LIKE '%RESPONSE%BOAT%'
OR upper([Budget Code]) LIKE '% RIG %'
OR upper([Budget Code]) LIKE '%ROBOT%'
OR upper([Budget Code]) LIKE '%ROUTER%'
OR upper([Budget Code]) LIKE '%SATELLITE%'
OR upper([Budget Code]) LIKE '%SCANNER%'
OR upper([Budget Code]) LIKE '%SCANNING%'
OR upper([Budget Code]) LIKE '%SCHOOL%BUS%'
OR upper([Budget Code]) LIKE '%SELF%CHECK%'
OR upper([Budget Code]) LIKE '%SEQUENCER%'
OR upper([Budget Code]) LIKE '%SHUTTLE%'
OR upper([Budget Code]) LIKE '%SIMULATOR%'
OR upper([Budget Code]) LIKE '%SKID%STEER%'
OR upper([Budget Code]) LIKE '%SPECTROMETER%'
OR upper([Budget Code]) LIKE '%SPECTROPHMETER%'
OR upper([Budget Code]) LIKE '%SUBURBANS%'
OR upper([Budget Code]) LIKE '%SURVEILLANCE%'
OR upper([Budget Code]) LIKE '%SONOGRAM%'
OR upper([Budget Code]) LIKE '%SOUND%SYST%'
OR upper([Budget Code]) LIKE '%STREAMING%'
OR upper([Budget Code]) LIKE '%SWEEPER%'
OR upper([Budget Code]) LIKE '%SYSTEM%INTEGRATOR%'
OR upper([Budget Code]) LIKE '%SYSTEM%UPGRADE%'
OR upper([Budget Code]) LIKE '% TABLES%'
OR upper([Budget Code]) LIKE '% TECH%'
OR upper([Budget Code]) LIKE 'TECH%'
OR upper([Budget Code]) LIKE '%TELECOM%'
OR upper([Budget Code]) LIKE '%TELEMETRY%'
OR upper([Budget Code]) LIKE '%TOOLCAT%'
OR upper([Budget Code]) LIKE '%TRACTOR%'
OR upper([Budget Code]) LIKE '%TRAM%'
OR upper([Budget Code]) LIKE '%TRANSPORTATION%BUS%'
OR upper([Budget Code]) LIKE '%TROLLEY%'
OR upper([Budget Code]) LIKE '%ULTRASOUND%'
OR upper([Budget Code]) LIKE '%VAN'
OR upper([Budget Code]) LIKE '%VASCULAR%'
OR upper([Budget Code]) LIKE '%VENDING%'
OR upper([Budget Code]) LIKE '%VESSEL%'
OR upper([Budget Code]) LIKE '%VIRTUAL%'
OR upper([Budget Code]) LIKE '%VOICE%'
OR upper([Budget Code]) LIKE '%WAGON%'
OR upper([Budget Code]) LIKE '%WEB%PROXY%'
OR upper([Budget Code]) LIKE '%WEBSITE%'
OR upper([Budget Code]) LIKE '%WI-FI%'
OR upper([Budget Code]) LIKE '%WIRELESS%'
OR upper([Budget Code]) LIKE '%WIRETAP%'
OR upper([Budget Code]) LIKE '%WORK%STATION%'
OR upper([Budget Code]) LIKE '% X%RAY%'
OR upper([Budget Code]) LIKE '%ZAMBONI%'

-- Systems
OR upper([Budget Code]) LIKE '%ADDRESS%SYST%'
OR upper([Budget Code]) LIKE '%ADMIN%SYS%'
OR upper([Budget Code]) LIKE '%ASSET%SYS%'
OR upper([Budget Code]) LIKE '%ASSIGNMENT%SYST%'
OR upper([Budget Code]) LIKE '%ALARM%SYST%'
OR upper([Budget Code]) LIKE '%AUTOMATED%SYST%'
OR upper([Budget Code]) LIKE '%BILLING%SYST%'
OR upper([Budget Code]) LIKE '%BIOVAULT%SYST%'
OR upper([Budget Code]) LIKE '%COMMUNICATION%SYST%'
OR upper([Budget Code]) LIKE '%CRIME%SYST%'
OR upper([Budget Code]) LIKE '%ENROLLMENT%SYS%'
OR upper([Budget Code]) LIKE '%EQ %SYS%'
OR upper([Budget Code]) LIKE '%FAST%PASS%SYS%'
OR upper([Budget Code]) LIKE '%FILING%SYS%'
OR upper([Budget Code]) LIKE '%FIN% SYS%'
OR upper([Budget Code]) LIKE '%IMAGING%SYST%'
OR upper([Budget Code]) LIKE '%INFO%SYST%'
OR upper([Budget Code]) LIKE '%INGEST%SYST%'
OR upper([Budget Code]) LIKE '%INTELL%SYST%'
OR upper([Budget Code]) LIKE '%LAUNDRY%SYS%'
OR upper([Budget Code]) LIKE '%LICENSING%SYS%'
OR upper([Budget Code]) LIKE '%LOAD%SYS%'
OR upper([Budget Code]) LIKE '%MANAGEMENT%SYST%'
OR upper([Budget Code]) LIKE '%MEASUREMENT%SYST%'
OR upper([Budget Code]) LIKE '%MED%SYST%'
OR upper([Budget Code]) LIKE '%MEDIA%SYST%'
OR upper([Budget Code]) LIKE '%PRESERVATION%SYST%'
OR upper([Budget Code]) LIKE '%PREVENTION%SYST%'
OR upper([Budget Code]) LIKE '%PROCESSING%SYST%'
OR upper([Budget Code]) LIKE '%SECURITY%SYST%'
OR upper([Budget Code]) LIKE '%SYSTEMWIDE%SYST%'
OR upper([Budget Code]) LIKE '%TAX%SYST%'
OR upper([Budget Code]) LIKE '%TICKET%SYST%'
OR upper([Budget Code]) LIKE '%TRACKING%SYST%'
OR upper([Budget Code]) LIKE '%WORKFORCE%SYST%'
)
AND( upper([Budget Code]) NOT LIKE '%GARAGE%' );
"""

In [40]:
# lump sum
query_lump_sum = """
UPDATE capital_projects SET typecategory = 'Lump Sum'
WHERE (
upper([Budget Code]) LIKE '%LUMP SUM%'
OR upper([Budget Code]) LIKE '% FUND%'
OR upper([Budget Code]) LIKE 'FUND%'
OR upper([Budget Code]) LIKE '%SURVEY%'
OR upper([Budget Code]) LIKE '%SUPERVISION%'
OR upper([Budget Code]) LIKE '%PROGRAM%'

--new
OR upper([Budget Code]) LIKE '%10%YEAR%PLAN%'
OR upper([Budget Code]) LIKE '%ACQUISITION%CITYWIDE%'
OR upper([Budget Code]) LIKE '%AGENCY%PROPOSED%PROJECT%'
OR upper([Budget Code]) LIKE '%AGREEMENT%'
OR upper([Budget Code]) LIKE '%APPLICATION%'
OR upper([Budget Code]) LIKE '%ASSESSMENT%'
OR upper([Budget Code]) LIKE '%ASSOC%'
OR upper([Budget Code]) LIKE '%AUDITOR%'
OR upper([Budget Code]) LIKE '%AVIATION%'
OR upper([Budget Code]) LIKE '%BIOSWALES%STORMWATER%'
OR upper([Budget Code]) LIKE '%BPL%INFRASTRUCTURE%RECONSTRUCTION%'
OR upper([Budget Code]) LIKE '%BUNDLE%PROJECTS%'
OR upper([Budget Code]) LIKE '%CAPITAL%COMP%'
OR upper([Budget Code]) LIKE '%CAMPAIGN%'
OR upper([Budget Code]) LIKE '%CALTHOLIC%MANAGE%'
OR upper([Budget Code]) LIKE '%CITY%WIDE%ACQUISITION%'
OR upper([Budget Code]) LIKE '%CITY%WIDE%MANAGEMENT%'
OR upper([Budget Code]) LIKE '%CITY%WIDE%MEASURES%'
OR upper([Budget Code]) LIKE '%CITY%WIDE%SECURITY%'
OR upper([Budget Code]) LIKE '%COACH%'
OR upper([Budget Code]) LIKE '%COALITION%'
OR upper([Budget Code]) LIKE '%COMB%'
OR upper([Budget Code]) LIKE '%COMMITTEE%'
OR upper([Budget Code]) LIKE '%COMPLIANCE%'
OR upper([Budget Code]) LIKE '%COMPUTER%PROG%'
OR upper([Budget Code]) LIKE '%CONTRACT%'
OR upper([Budget Code]) LIKE '%CONSERVATION%'
OR upper([Budget Code]) LIKE '%CONSULTANT%'
OR upper([Budget Code]) LIKE '%CONTRACTS%'
OR upper([Budget Code]) LIKE '%COOKING%PROJECT%'
OR upper([Budget Code]) LIKE '%COOPERATIVE%'
OR upper([Budget Code]) LIKE '%CORP%'
OR upper([Budget Code]) LIKE '%COUNCIL%'
OR upper([Budget Code]) LIKE '%DISTRIBUTION%FY%'
OR upper([Budget Code]) LIKE '%ELLA%FY%'
OR upper([Budget Code]) LIKE '%EMERGENCY%CONT%'
OR upper([Budget Code]) LIKE '%EMERGENCY%SAFETY%SYSTEMS%'
OR upper([Budget Code]) LIKE '%EXAMINATION%'
OR upper([Budget Code]) LIKE '%FERRY%PROJECTS%'
OR upper([Budget Code]) LIKE '%FUND'
OR upper([Budget Code]) LIKE '%GENERAL%'
OR upper([Budget Code]) LIKE '%HUD %'
OR upper([Budget Code]) LIKE '%HOLD %'
OR upper([Budget Code]) LIKE '% HOLD%'
OR upper([Budget Code]) LIKE '%IFA %'
OR upper([Budget Code]) LIKE '%IMPACT%STATEMENT%'
OR upper([Budget Code]) LIKE '%IMPROVEMENT%CITYWIDE%'
OR upper([Budget Code]) LIKE '% INC%'
OR upper([Budget Code]) LIKE '%INITIATIVE%'
OR upper([Budget Code]) LIKE '%INTITIATIVE%'
OR upper([Budget Code]) LIKE '%INSPCTN%'
OR upper([Budget Code]) LIKE '%INSPECTN%'
OR upper([Budget Code]) LIKE '%INSPECTION%'
OR upper([Budget Code]) LIKE '%INVESTIG%'
OR upper([Budget Code]) LIKE '%JOB%ORDER%CONTRACT%'
OR upper([Budget Code]) LIKE '% LLC%'
OR upper([Budget Code]) LIKE '%LUMP%'
OR upper([Budget Code]) LIKE '%MANAGEMENT%'
OR upper([Budget Code]) LIKE '%MAINTENANCE%'
OR upper([Budget Code]) LIKE '%MASTER%PLAN%'
OR upper([Budget Code]) LIKE '%MGNT%SVCS%'
OR upper([Budget Code]) LIKE '%MGNT%SVCES%'
OR upper([Budget Code]) LIKE '%MISC %'
OR upper([Budget Code]) LIKE '%MISC. %'
OR upper([Budget Code]) LIKE '%MISCELANNOUES%'
OR upper([Budget Code]) LIKE '%MITIGATION%PGM%'
OR upper([Budget Code]) LIKE '%MITIGATION%PROGRAM%'
OR upper([Budget Code]) LIKE '%MONITORING%'
OR upper([Budget Code]) LIKE '%NATIONAL%ASSOC%'
OR upper([Budget Code]) LIKE '%NEW%NEED%'
OR upper([Budget Code]) LIKE '%OPPORTUNIT%'
OR upper([Budget Code]) LIKE '%ORGANIZATION%'
OR upper([Budget Code]) LIKE '%OVERSIGHT%'
OR upper([Budget Code]) LIKE '%PARTNERSHIP%'
OR upper([Budget Code]) LIKE '%PED%SAFETY%'
OR upper([Budget Code]) LIKE '%PEDESTRIAN%SAFETY%'
OR upper([Budget Code]) LIKE '%PEDESTRIAN%SYS%'
OR upper([Budget Code]) LIKE '%PILOT%'
OR upper([Budget Code]) LIKE '%PLANNED%PARENTHOOD%OF%NYC'
OR upper([Budget Code]) LIKE '%PRIORITY%GRID%'
OR upper([Budget Code]) LIKE '%PROCUREMENT%'
OR upper([Budget Code]) LIKE '%PROFESSIONAL%SERVICE%'
OR upper([Budget Code]) LIKE '%PROJECT%RENEWAL'
OR upper([Budget Code]) LIKE '%PROG%'
OR upper([Budget Code]) LIKE '%PROTECTION%CITYWIDE%'
OR upper([Budget Code]) LIKE '%REIMBURSEMENT%'
OR upper([Budget Code]) LIKE '%RESILIENCY%MEASURES%'
OR upper([Budget Code]) LIKE '%RESOURCES%'
OR upper([Budget Code]) LIKE '%REVITAL%PROJECT%'
OR upper([Budget Code]) LIKE '%SAFE%COMMUNIT%'
OR upper([Budget Code]) LIKE '%SAFE%ROUTE%SCHOOL%'
OR upper([Budget Code]) LIKE '%SAFE%ROUTE%TO %'
OR upper([Budget Code]) LIKE '%SAFETY%IMPROVEMENT%'
OR upper([Budget Code]) LIKE '%SAFETY%PROJECT%'
OR upper([Budget Code]) LIKE '%SAMPLING%'
OR upper([Budget Code]) LIKE 'SCA%'
OR upper([Budget Code]) LIKE '%SE%&%WM%'
OR upper([Budget Code]) LIKE '%SEEDING%'
OR upper([Budget Code]) LIKE '%SERVICE%'
OR upper([Budget Code]) LIKE '%SERVICE%AGREEMENT%'
OR upper([Budget Code]) LIKE '%SEWER%WATER%'
OR upper([Budget Code]) LIKE '%SEWER%WM%'
OR upper([Budget Code]) LIKE '%SOCIETY%'
OR upper([Budget Code]) LIKE '%STAFF%'
OR upper([Budget Code]) LIKE '%STORM%WM%'
OR upper([Budget Code]) LIKE '%STORM%WATER%MANAGEMENT%'
OR upper([Budget Code]) LIKE '%STORM%WATER%MGMT%'
OR upper([Budget Code]) LIKE '%STRATEGIC%PLAN%'
OR upper([Budget Code]) LIKE '%STUDIES%'
OR upper([Budget Code]) LIKE '%SUPPORT%'
OR upper([Budget Code]) LIKE '%SURPLUS%'
OR upper([Budget Code]) LIKE '%SWER%WM%'
OR upper([Budget Code]) LIKE '%SWR%WM%'
OR upper([Budget Code]) LIKE '%SWR%W/M%'
OR upper([Budget Code]) LIKE '%SYSTEMWIDE%RENO%NEW%EXPAN%'
OR upper([Budget Code]) LIKE '%TASK%ORDER%'
OR upper([Budget Code]) LIKE '%TEN%YEAR%PLAN%'
OR upper([Budget Code]) LIKE '%TESTING%'
OR upper([Budget Code]) LIKE '%TORTS%'
OR upper([Budget Code]) LIKE '%TREES%'	
OR upper([Budget Code]) LIKE '%ULURP%'
OR upper([Budget Code]) LIKE '%VARIOUS%LOCATIONS%'
OR upper([Budget Code]) LIKE '%VARIOUS%WORK%'
OR upper([Budget Code]) LIKE '%VISION%ZERO%'
OR upper([Budget Code]) LIKE '%VOLUNTEERS%'
OR upper([Budget Code]) LIKE '%W/M%SEWER%'
OR upper([Budget Code]) LIKE '%W/M%SWR%'
OR upper([Budget Code]) LIKE '%WATER%MAIN%SEWER%'
OR upper([Budget Code]) LIKE '%WM%SEWER%'
OR upper([Budget Code]) LIKE '%WM%SWR%'

)
AND( upper([Budget Code]) NOT LIKE '%SPACE%')
AND( upper([Budget Code]) NOT LIKE '%RESTOR%' )
AND typecategory IS NULL;
"""

In [41]:
# fixed asset categorization

query_fixed_asset = """
UPDATE capital_projects SET typecategory = 'Fixed Asset'
WHERE (
upper([Budget Code]) LIKE '%AUDITORIUM%'
OR upper([Budget Code]) LIKE '%BASIN%'
OR upper([Budget Code]) LIKE '%BATHROOM%'
OR upper([Budget Code]) LIKE '%BOILER%'
OR upper([Budget Code]) LIKE '%BORING%'
OR upper([Budget Code]) LIKE '%BLVD%'
OR upper([Budget Code]) LIKE '%BRIDGE%'
OR upper([Budget Code]) LIKE '%BUILDING%'
OR upper([Budget Code]) LIKE '%BULKHEAD%'
OR upper([Budget Code]) LIKE '%CAFETERIA%'
OR upper([Budget Code]) LIKE '%CANAL%'
OR upper([Budget Code]) LIKE '%CEILING%'
OR upper([Budget Code]) LIKE '%CENTER%'
OR upper([Budget Code]) LIKE '%CLIMATE%'
OR upper([Budget Code]) LIKE '%CONSTRUCTION%'
OR upper([Budget Code]) LIKE '%COOLING%'
OR upper([Budget Code]) LIKE '%CULVERT%'
OR upper([Budget Code]) LIKE '%DAY CARE%'
OR upper([Budget Code]) LIKE '%DEPOT%'
OR upper([Budget Code]) LIKE '%ELECTRICAL%'
OR upper([Budget Code]) LIKE '%ELEVATOR%'
OR upper([Budget Code]) LIKE '%ESCALATOR%'
OR upper([Budget Code]) LIKE '%EXTERIOR%'
OR upper([Budget Code]) LIKE '%FACILITY%'
OR upper([Budget Code]) LIKE '%FENC%'
OR upper([Budget Code]) LIKE '%FIELD%'
OR upper([Budget Code]) LIKE '%FIXTURE%'
OR upper([Budget Code]) LIKE '%FLOOR%'
OR upper([Budget Code]) LIKE '%GARAGE%'
OR upper([Budget Code]) LIKE '%GARDEN%'
OR upper([Budget Code]) LIKE '%GREENWAY%'
OR upper([Budget Code]) LIKE '%GYM%'
OR upper([Budget Code]) LIKE '%HALL%'
OR upper([Budget Code]) LIKE '%HEATING%'
OR upper([Budget Code]) LIKE '%HOSPITAL%'
OR upper([Budget Code]) LIKE '%HOUSE%'
OR upper([Budget Code]) LIKE '%HVAC%'
OR upper([Budget Code]) LIKE '%INTERIOR%'
OR upper([Budget Code]) LIKE '%KITCHEN%'
OR upper([Budget Code]) LIKE '%LAB%'
OR upper([Budget Code]) LIKE '%LANDING%'
OR upper([Budget Code]) LIKE '%LIBRARY%'
OR upper([Budget Code]) LIKE '%LIGHTING%'
OR upper([Budget Code]) LIKE '%MASONRY%'
OR upper([Budget Code]) LIKE '%MILLING%'
OR upper([Budget Code]) LIKE '%MTS%'
OR upper([Budget Code]) LIKE '%MUSEUM%'
OR upper([Budget Code]) LIKE '%PARAPET%'
OR upper([Budget Code]) LIKE '%PARK%'
OR upper([Budget Code]) LIKE '%PIER%'
OR upper([Budget Code]) LIKE '%PIPE%'
OR upper([Budget Code]) LIKE '%PIPING%'
OR upper([Budget Code]) LIKE '%PLANT%'
OR upper([Budget Code]) LIKE '%PLAYGROUND%'
OR upper([Budget Code]) LIKE '%PLAZA%'
OR upper([Budget Code]) LIKE '%POOL%'
OR upper([Budget Code]) LIKE '%RAMP%'
OR upper([Budget Code]) LIKE '%RECON%'
OR upper([Budget Code]) LIKE '%REHAB%'
OR upper([Budget Code]) LIKE '%RENOVAT%'
OR upper([Budget Code]) LIKE '%REPLACE%'
OR upper([Budget Code]) LIKE '%RESTORATION%'
OR upper([Budget Code]) LIKE '%ROOF%'
OR upper([Budget Code]) LIKE '%ROOM%'
OR upper([Budget Code]) LIKE '%SEWER%'
OR upper([Budget Code]) LIKE '%SHELTER%'
OR upper([Budget Code]) LIKE '%SIDEWALK%'
OR upper([Budget Code]) LIKE '%SITE ACQ%'
OR upper([Budget Code]) LIKE '%SPRAY%BOOTH%'
OR upper([Budget Code]) LIKE '%STADIUM%'
OR upper([Budget Code]) LIKE '%STATION%'
OR upper([Budget Code]) LIKE '%STREET%'
OR upper([Budget Code]) LIKE '%TERMINAL%'
OR upper([Budget Code]) LIKE '%THEATER%'
OR upper([Budget Code]) LIKE '%TOILET%'
OR upper([Budget Code]) LIKE '%TUNNEL%'
OR upper([Budget Code]) LIKE '%VENTILATION%'
OR upper([Budget Code]) LIKE '%WASTE%'
OR upper([Budget Code]) LIKE '%WINDOW%'
OR upper([Budget Code]) LIKE '%WPC%'
OR upper([Budget Code]) LIKE '%WWTP%'

--new
OR upper([Budget Code]) LIKE '%A/C%'
OR upper([Budget Code]) LIKE '%ACQ%'
OR upper([Budget Code]) LIKE '%ACQUISITION%'
OR upper([Budget Code]) LIKE '%AERATION%'
OR upper([Budget Code]) LIKE '%ADA%'
OR upper([Budget Code]) LIKE '%ADDITION%'
OR upper([Budget Code]) LIKE '%AIR%BOX%'
OR upper([Budget Code]) LIKE '%AIR%CON%'
OR upper([Budget Code]) LIKE '%ALARM%BOX%'
OR upper([Budget Code]) LIKE '%ANNEX%'
OR upper([Budget Code]) LIKE '%ARENA%'
OR upper([Budget Code]) LIKE '%ART%'
OR upper([Budget Code]) LIKE '%ASBESTOS%'
OR upper([Budget Code]) LIKE '%ASPHALT%'
OR upper([Budget Code]) LIKE '%ATRIUM%'
OR upper([Budget Code]) LIKE '%ATC%'
OR upper([Budget Code]) LIKE '%AQUARIUM%'
OR upper([Budget Code]) LIKE '%BACKFLOW%SYS%'
OR upper([Budget Code]) LIKE '%BARRIER%'
OR upper([Budget Code]) LIKE '%BOAT%LAUNCH%'
OR upper([Budget Code]) LIKE '%BIKEWAY%'
OR upper([Budget Code]) LIKE '%BIN%'
OR upper([Budget Code]) LIKE '%BIOSWALE%'
OR upper([Budget Code]) LIKE '%BLDG%'
OR upper([Budget Code]) LIKE '%BLDG%ACQ%'
OR upper([Budget Code]) LIKE '%BLUEBELT%'
OR upper([Budget Code]) LIKE '%BOARDWALK%'
OR upper([Budget Code]) LIKE '%BURNER%'
OR upper([Budget Code]) LIKE '%CABLING%'
OR upper([Budget Code]) LIKE '%CALL%BOX%'
OR upper([Budget Code]) LIKE '%CAMPUS%'
OR upper([Budget Code]) LIKE '%CAR%PORT%'
OR upper([Budget Code]) LIKE '%CATWALK%'
OR upper([Budget Code]) LIKE '%CEMETERY%'
OR upper([Budget Code]) LIKE '%CHAMBER%'
OR upper([Budget Code]) LIKE '%CHILLER%'
OR upper([Budget Code]) LIKE '%CLINIC%'
OR upper([Budget Code]) LIKE '%COMFORT%ST%'
OR upper([Budget Code]) LIKE '%COMPACTOR%'
OR upper([Budget Code]) LIKE '%COMPUTER%LAB%'
OR upper([Budget Code]) LIKE '%COMPUTER%ROOM%'
OR upper([Budget Code]) LIKE '%CONDUIT%'
OR upper([Budget Code]) LIKE '%CONSOLIDATION%'
OR upper([Budget Code]) LIKE '%CONSTRUCT%'
OR upper([Budget Code]) LIKE '%CONVERSION%'
OR upper([Budget Code]) LIKE '%CORRIDOR%'
OR upper([Budget Code]) LIKE '%COURT%'
OR upper([Budget Code]) LIKE '% CTS %'
OR upper([Budget Code]) LIKE 'CTS %'
OR upper([Budget Code]) LIKE '% CTS'
OR upper([Budget Code]) LIKE '%CSO%'
OR upper([Budget Code]) LIKE '%DAM%'
OR upper([Budget Code]) LIKE '%DEMOLITION%'
OR upper([Budget Code]) LIKE '%DEVELOPMENT%'
OR upper([Budget Code]) LIKE '%DIGESTER%'
OR upper([Budget Code]) LIKE '%DIMMER%SYST%'
OR upper([Budget Code]) LIKE '%DOCK%'
OR upper([Budget Code]) LIKE '%DOG%RUN%'
OR upper([Budget Code]) LIKE '%DOOR%'
OR upper([Budget Code]) LIKE '%DORM%'
OR upper([Budget Code]) LIKE '%DUCT%'
OR upper([Budget Code]) LIKE '%EE%UPGRADE%'
OR upper([Budget Code]) LIKE '%EJECTOR%'
OR upper([Budget Code]) LIKE '%ELECTRIC%SYS%'
OR upper([Budget Code]) LIKE '%ELEC%UPGRADE%'
OR upper([Budget Code]) LIKE '%ENCLOS%'
OR upper([Budget Code]) LIKE '%ENERGY%UPGRADE%'
OR upper([Budget Code]) LIKE '%ENTRANCE%'
OR upper([Budget Code]) LIKE '%ENTRY'
OR upper([Budget Code]) LIKE '%ESPLANADE%'
OR upper([Budget Code]) LIKE '%EXHAUST%SYSTEM%'
OR upper([Budget Code]) LIKE '%EXHIBIT%'
OR upper([Budget Code]) LIKE '%EXPANSION%'
OR upper([Budget Code]) LIKE '%EXTENSION%'
OR upper([Budget Code]) LIKE '%FACADE%'
OR upper([Budget Code]) LIKE '%FACILIT%'
OR upper([Budget Code]) LIKE '%FIRE%ALAR%'
OR upper([Budget Code]) LIKE '%FIRE%SAFETY%'
OR upper([Budget Code]) LIKE '%FIRE%SYSTEM%'
OR upper([Budget Code]) LIKE '%FIRE%UPGRADE%'
OR upper([Budget Code]) LIKE '%FLAG%'
OR upper([Budget Code]) LIKE '%FOUNTAIN%'
OR upper([Budget Code]) LIKE '%GALLERY%'
OR upper([Budget Code]) LIKE '%GAS%'
OR upper([Budget Code]) LIKE '%GATE%'
OR upper([Budget Code]) LIKE '%GOLF%COURSE%'
OR upper([Budget Code]) LIKE '%GOVERNORS%ISLAND%'
OR upper([Budget Code]) LIKE '% GRAT%'
OR upper([Budget Code]) LIKE '%GUARDS%'
OR upper([Budget Code]) LIKE '%HEAT%'
OR upper([Budget Code]) LIKE '%HIGH%LINE%'
OR upper([Budget Code]) LIKE '%HOT%WATER%'
OR upper([Budget Code]) LIKE '%INFRASTRUCTURE%'
OR upper([Budget Code]) LIKE '%INSTALLATION%'
OR upper([Budget Code]) LIKE '%INTERCONNECTION%'
OR upper([Budget Code]) LIKE '%IRRIGATION%'
OR upper([Budget Code]) LIKE '%LAMP%'
OR upper([Budget Code]) LIKE '%LAWNS%'
OR upper([Budget Code]) LIKE '%LEASE%RENEW%'
OR upper([Budget Code]) LIKE '%LIGHTS%'
OR upper([Budget Code]) LIKE '%LIGHTPOLE%'
OR upper([Budget Code]) LIKE '%LOCAL%LAW%11%'
OR upper([Budget Code]) LIKE '%LOUNGE%'
OR upper([Budget Code]) LIKE '%MARINA%'
OR upper([Budget Code]) LIKE '%MEDIAN%'
OR upper([Budget Code]) LIKE '%MICROFILTRATION%'
OR upper([Budget Code]) LIKE '%MODERNIZATION%'
OR upper([Budget Code]) LIKE '% MOVE %'
OR upper([Budget Code]) LIKE '%NEW%BRANCH%'
OR upper([Budget Code]) LIKE 'NDF%'
OR upper([Budget Code]) LIKE '%OBSERVATORY%'
OR upper([Budget Code]) LIKE '%OFFICES%'
OR upper([Budget Code]) LIKE '%OUTDOOR%SPACE%'
OR upper([Budget Code]) LIKE '%OUTFITTIN%'
OR upper([Budget Code]) LIKE '%OUT-FITTIN%'
OR upper([Budget Code]) LIKE '%OVERPASS%'
OR upper([Budget Code]) LIKE '%PASSIVE%LANDSCAPE%'
OR upper([Budget Code]) LIKE '%PATH%'
OR upper([Budget Code]) LIKE '%PAVEMENT%'
OR upper([Budget Code]) LIKE '% PEN %'
OR upper([Budget Code]) LIKE '%PEDESTALS%'
OR upper([Budget Code]) LIKE '%PLDG%'
OR upper([Budget Code]) LIKE '%PLGD%'
OR upper([Budget Code]) LIKE '%PLGRD%'
OR upper([Budget Code]) LIKE '%PLYG%'
OR upper([Budget Code]) LIKE '%PRINTSHOP%'
OR upper([Budget Code]) LIKE '%PROMENADE%'
OR upper([Budget Code]) LIKE '%PUMP%'
OR upper([Budget Code]) LIKE '%PV PANELS%'
OR upper([Budget Code]) LIKE '%REFURB%'
OR upper([Budget Code]) LIKE '%RELOCAT%'
OR upper([Budget Code]) LIKE '%REMOVAL%'
OR upper([Budget Code]) LIKE '%RENO%'
OR upper([Budget Code]) LIKE '%REPAIR%SHOP%'
OR upper([Budget Code]) LIKE '%REP% WM %'
OR upper([Budget Code]) LIKE '%RESTITUTION%'
OR upper([Budget Code]) LIKE '%RESURFACING%'
OR upper([Budget Code]) LIKE '%RETAINING%WALL%'
OR upper([Budget Code]) LIKE '%RETRO%FIT%'
OR upper([Budget Code]) LIKE '%RIDING%RING%'
OR upper([Budget Code]) LIKE '%RINK%'
OR upper([Budget Code]) LIKE '%RD%IMPROVEMENT%'
OR upper([Budget Code]) LIKE '%ROAD%IMPROVEMENT%'
OR upper([Budget Code]) LIKE '%ROAD%WIDE%'
OR upper([Budget Code]) LIKE '%ROPE%'
OR upper([Budget Code]) LIKE '%SCHOOLYARDS%'
OR upper([Budget Code]) LIKE '%SBS %'
OR upper([Budget Code]) LIKE '% SBS%'
OR upper([Budget Code]) LIKE '%SEAT %'
OR upper([Budget Code]) LIKE '%SEATING%'
OR upper([Budget Code]) LIKE '%SECURITY%INSTALL%'
OR upper([Budget Code]) LIKE '%SECURITY%SYS%'
OR upper([Budget Code]) LIKE '%SECURITY%UPG%'
OR upper([Budget Code]) LIKE '%SHAFT%'
OR upper([Budget Code]) LIKE '%SHED%'
OR upper([Budget Code]) LIKE '%SHORELINE%'
OR upper([Budget Code]) LIKE '%SHOWER%'
OR upper([Budget Code]) LIKE '%SIGN%'
OR upper([Budget Code]) LIKE '%SKATE P%'
OR upper([Budget Code]) LIKE '%SKYLIGHT%'
OR upper([Budget Code]) LIKE '%SMOKE%SYSTEM%'
OR upper([Budget Code]) LIKE '%SOLAR%PHOTO%'
OR upper([Budget Code]) LIKE '%SOLAR%PV%'
OR upper([Budget Code]) LIKE '%SPACE%'
OR upper([Budget Code]) LIKE '%SOCCER%'
OR upper([Budget Code]) LIKE '%SPORT%AREA%'
OR upper([Budget Code]) LIKE '%SPORT%COMPLEX%'
OR upper([Budget Code]) LIKE '%SPACE%IMPRO%'
OR upper([Budget Code]) LIKE '%SPRINKLER%'
OR upper([Budget Code]) LIKE '%STABILIZATION%'
OR upper([Budget Code]) LIKE '%STAGE%'
OR upper([Budget Code]) LIKE '%STAIR%'
OR upper([Budget Code]) LIKE '%STAIRCASE%'
OR upper([Budget Code]) LIKE '%STEAM%'
OR upper([Budget Code]) LIKE '%STM%SWR%'
OR upper([Budget Code]) LIKE '%STORM%SWR%'
OR upper([Budget Code]) LIKE '%STORAGE%'
OR upper([Budget Code]) LIKE '%STRM%SWR%'
OR upper([Budget Code]) LIKE '%STRUCTURE%'
OR upper([Budget Code]) LIKE '%STUDIO%'
OR upper([Budget Code]) LIKE '%SWING%'
OR upper([Budget Code]) LIKE '%SWR%'
OR upper([Budget Code]) LIKE '%TANK%'
OR upper([Budget Code]) LIKE '%TOW%POUND%'
OR upper([Budget Code]) LIKE '%TOWER%'
OR upper([Budget Code]) LIKE '%TRAIL%'
OR upper([Budget Code]) LIKE '%TRNG%RM%'
OR upper([Budget Code]) LIKE '%TRUNK%'
OR upper([Budget Code]) LIKE '%TURNSTILE%'
OR upper([Budget Code]) LIKE '%UNDERGROUND%'
OR upper([Budget Code]) LIKE '%UNDERPASS%'
OR upper([Budget Code]) LIKE '%VALVE%'
OR upper([Budget Code]) LIKE '% WALL%'
OR upper([Budget Code]) LIKE '%WAREHOUSE%'
OR upper([Budget Code]) LIKE '%WATER%MAIN%'
OR upper([Budget Code]) LIKE '%WATER%TOWER%'
OR upper([Budget Code]) LIKE '%WAYFINDING%'
OR upper([Budget Code]) LIKE '%WEIR%'
OR upper([Budget Code]) LIKE '%WETLAND%'
OR upper([Budget Code]) LIKE '%WIDENING%'
OR upper([Budget Code]) LIKE '%WIER%'
OR upper([Budget Code]) LIKE '%WING%'
OR upper([Budget Code]) LIKE '%WORKSPACE%'
OR upper([Budget Code]) LIKE '% WMS %'
OR upper([Budget Code]) LIKE '%WM%REPL%'
OR upper([Budget Code]) LIKE '%YACHT%CLUB%'
OR upper([Budget Code]) LIKE '%YARD%'
OR upper([Budget Code]) LIKE '%YMCA%'
OR upper([Budget Code]) LIKE '%YWHA%'
OR upper([Budget Code]) LIKE '%ZOO%'
)
AND typecategory IS NULL;
"""

# dpr specific -- debug 
query_dpr_fixed_asset = """
UPDATE capital_projects SET typecategory = 'Fixed Asset'
WHERE [Budget Code] ~ '[BMQRX][0-9][0-9][0-9]' AND [Agency] = 'Department of Parks and Recreation'
AND typecategory IS NULL;
"""

In [43]:
session.execute(query_itt_vehicles_equipment)
session.execute(query_lump_sum)
session.execute(query_fixed_asset)
#session.execute(query_dpr_fixed_asset)
# Commit the changes to the database
session.commit()

In [55]:
query_test = """
SELECT COUNT(*) 
FROM capital_projects
WHERE typecategory IS NOT NULL;
"""
result = session.execute(query_test)
print("Count:", result.scalar())

Count: 65863


In [57]:
checkbook_cats = pd.read_sql_table('capital_projects', engine)
engine.dispose()

In [82]:
checkbook_cats['typecategory'] = checkbook_cats['typecategory'].fillna('NULL')
cats = checkbook_cats.pivot_table(
    index='typecategory',
    values='Check Amount',
    aggfunc=['sum','count', lambda x: sum(x)/sum(checkbook_cats['Check Amount']) * 100],
    fill_value=0,
    margins=True,
    margins_name='Total'
)

cats

Unnamed: 0_level_0,sum,count,<lambda>
Unnamed: 0_level_1,Check Amount,Check Amount,Check Amount
typecategory,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2
Fixed Asset,43300400000.0,48186,35.798401
"ITT, Vehicles, and Equipment",11858360000.0,8859,9.803839
Lump Sum,11334030000.0,8818,9.370354
,54463460000.0,15069,45.027406
Total,120956200000.0,80932,100.0
