# Capital Spending Project Feasibility Assessment: The Universe of Checkbook NYC

In [1]:
import pandas as pd
import numpy as np
import seaborn as sns
from matplotlib import pyplot as plt
import re
import datetime
import matplotlib.font_manager

In [2]:
file_names = ['checkbooknyc/Citywide-Agencies/spending_29351_05312023_040004_part_0.csv', 
'checkbooknyc/Citywide-Agencies/spending_29351_05312023_040004_part_1.csv',
'checkbooknyc/Citywide-Agencies/spending_29351_05312023_040004_part_2.csv',
'checkbooknyc/Citywide-Agencies/spending_29351_05312023_040004_part_3.csv']
df0 = pd.read_csv(file_names[0])
df1 = pd.read_csv(file_names[1])
df2 = pd.read_csv(file_names[2])
df3 = pd.read_csv(file_names[3])
frames = [df0, df1, df2, df3]
df = pd.concat(frames)

facdb = pd.read_csv('facilities.csv')

In [3]:
df.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 1990934 entries, 0 to 490933
Data columns (total 20 columns):
 #   Column                     Dtype  
---  ------                     -----  
 0   Agency                     object 
 1   Associated Prime Vendor    float64
 2   Budget Code                object 
 3   Capital Project            object 
 4   Check Amount               float64
 5   Contract ID                object 
 6   Contract Purpose           object 
 7   Department                 object 
 8   Document ID                object 
 9   Emerging Business          object 
 10  Expense Category           object 
 11  Fiscal year                int64  
 12  Industry                   object 
 13  Issue Date                 object 
 14  M/WBE Category             object 
 15  Payee Name                 object 
 16  Spending Category          object 
 17  Sub Contract Reference ID  float64
 18  Sub Vendor                 object 
 19  Woman Owned Business       object 
dtypes: 

In [4]:
facdb.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 33429 entries, 0 to 33428
Data columns (total 38 columns):
 #   Column      Non-Null Count  Dtype  
---  ------      --------------  -----  
 0   facname     33421 non-null  object 
 1   addressnum  28093 non-null  object 
 2   streetname  28441 non-null  object 
 3   address     30375 non-null  object 
 4   city        33224 non-null  object 
 5   zipcode     33060 non-null  float64
 6   boro        33265 non-null  object 
 7   borocode    33265 non-null  float64
 8   bin         25749 non-null  float64
 9   bbl         31364 non-null  float64
 10  cd          32476 non-null  float64
 11  nta2010     32476 non-null  object 
 12  nta2020     32476 non-null  object 
 13  council     32434 non-null  float64
 14  schooldist  32475 non-null  float64
 15  policeprct  32476 non-null  float64
 16  ct2010      32476 non-null  float64
 17  ct2020      32476 non-null  float64
 18  factype     33403 non-null  object 
 19  facsubgrp   33429 non-nul

In [7]:
print("There are {} unique projects captured in Checkbook NYC datasaet.".format(len(df["Contract ID"].unique())))
print("There are {} city agencies captured in the Checkbook NYC dataset.".format(len(df["Agency"].unique())))
print("There are {} facility types in the FacDB dataset, {} facility groups, and {} facility subgroups.".format(len(facdb["factype"].unique()),
len(facdb["facgroup"].unique()), len(facdb["facsubgrp"].unique())))

There are 926562 unique projects captured in Checkbook NYC datasaet.
There are 30 city agencies captured in the Checkbook NYC dataset.
There are 603 facility types in the FacDB dataset, 25 facility groups, and 71 facility subgroups.


## Data Cleaning

In [13]:
df['Issue Date'] = pd.to_datetime(df['Issue Date'])
df['Fiscal year'] = pd.to_datetime(df['Fiscal year'], format='%Y')
df['Agency'] = df['Agency'].str.upper()
df['Budget Code'] = df['Budget Code'].str.upper()
df['Contract Purpose'] = df['Contract Purpose'].str.upper()
df['Spending Category'] = df['Spending Category'].str.upper()

weird_nums = df[df['Check Amount'] < 0]
print(weird_nums.shape[0])

# filter out na's for check amount

nine_check = df[df['Check Amount']==99999999]
print(nine_check.shape[0])
weird_nums.head()

14702
43


Unnamed: 0,Agency,Associated Prime Vendor,Budget Code,Capital Project,Check Amount,Contract ID,Contract Purpose,Department,Document ID,Emerging Business,Expense Category,Fiscal year,Industry,Issue Date,M/WBE Category,Payee Name,Spending Category,Sub Contract Reference ID,Sub Vendor,Woman Owned Business
476232,SCHOOL CONSTRUCTION AUTHORITY,,E705 (6TH PLAN - BL 2364),040A00353100021,-0.01,PON104420151543214,,402-044-705,20230102261-2-DSB-AD,No,CONSTRUCTION-BUILDINGS,2023-01-01,,2022-09-06,Non-M/WBE,IANNELLI CONSTRUCTION CO INC,CAPITAL CONTRACTS,,No,No
476233,SCHOOL CONSTRUCTION AUTHORITY,,E704 (402044E704),040A00280380020,-0.01,PON104420121277065,,IMPLEMENTATATION OF THE FIFTH FIVE-YEAR,20140066150-2-DSB-AD,No,CONSTRUCTION-BUILDINGS,2014-01-01,,2013-10-03,Non-M/WBE,ABAX INC.,CAPITAL CONTRACTS,,No,No
476234,DEPARTMENT OF PARKS AND RECREATION,,"FER6 (FERRY POINT PARK, BX: CONST ANCILLARY FA)",850P-1FERY6A400,-0.02,PRC2850QB16C24SC52,,400-846-210,20220194241-2-DSB-EFT,No,CONSTRUCTION-BUILDINGS,2022-01-01,,2021-11-30,Non-M/WBE,TRITON STRUCTURAL CONCRETE INC,CAPITAL CONTRACTS,,No,No
476235,SCHOOL CONSTRUCTION AUTHORITY,,D001 (402044D001),040A00183140020,-0.1,PON104420101065933,,402-044-D01,20160219584-2-DSB-AD,No,CONSTRUCTION-BUILDINGS,2016-01-01,,2016-05-31,Individuals and Others,DIVISION OF SCHOOL FACILITIES C/O MARK DAVID,CAPITAL CONTRACTS,,No,No
476236,SCHOOL CONSTRUCTION AUTHORITY,,E706 (IMPLEMENTATION OF THE SEVENTH FIVE-YEAR),040EMERLIT20020,-0.13,PON104420212151408,,402-044-706,20220022392-2-DSB-AD,No,CONSTRUCTION-BUILDINGS,2022-01-01,,2021-08-16,Non-M/WBE,LOUIS BERGER & ASSOC PC,CAPITAL CONTRACTS,,No,No


In [14]:
weird_nums_agencies = weird_nums['Agency'].unique()
print(weird_nums_agencies)

nine_check.head()

print(nine_check)

['SCHOOL CONSTRUCTION AUTHORITY' 'DEPARTMENT OF PARKS AND RECREATION'
 'CITY UNIVERSITY OF NEW YORK' 'QUEENS BOROUGH PUBLIC LIBRARY'
 'DEPARTMENT OF ENVIRONMENTAL PROTECTION' 'DEPARTMENT OF TRANSPORTATION'
 'DEPARTMENT OF CULTURAL AFFAIRS'
 'DEPARTMENT OF CITYWIDE ADMINISTRATIVE SERVICES'
 'DEPARTMENT OF SANITATION' 'DEPARTMENT OF SMALL BUSINESS SERVICES'
 'DEPARTMENT OF HEALTH AND MENTAL HYGIENE' 'POLICE DEPARTMENT'
 'NEW YORK PUBLIC LIBRARY' 'HOUSING PRESERVATION AND DEVELOPMENT'
 'FIRE DEPARTMENT'
 'DEPARTMENT OF INFORMATION TECHNOLOGY AND TELECOMMUNICATIONS'
 'DEPARTMENT OF HOMELESS SERVICES' 'BROOKLYN PUBLIC LIBRARY'
 'WATER SUPPLY' "ADMINISTRATION FOR CHILDREN'S SERVICES"
 'DEPARTMENT OF CORRECTION' 'DEPARTMENT OF SOCIAL SERVICES'
 'HEALTH AND HOSPITALS CORPORATION' 'DEPARTMENT OF EDUCATION']


Unnamed: 0,Agency,Associated Prime Vendor,Budget Code,Capital Project,Check Amount,Contract ID,Contract Purpose,Department,Document ID,Emerging Business,Expense Category,Fiscal year,Industry,Issue Date,M/WBE Category,Payee Name,Spending Category,Sub Contract Reference ID,Sub Vendor,Woman Owned Business
2,DEPARTMENT OF EDUCATION,,E706 (IMPLEMENTATION OF THE SEVENTH FIVE-YEAR),040SCA22 030,99999999.0,PON1040SCA20-24MC,,IMPLEMENTATION OF THE SEVENTH FIVE-YEAR,20220448629-1-DSB-EFT,No,ADMINISTRATIVE EXPENSES,2022-01-01,,2022-05-27,Individuals and Others,NYC SCHOOL CONSTRUCTION AUTHORITY,CAPITAL CONTRACTS,,No,No
3,DEPARTMENT OF EDUCATION,,E706 (IMPLEMENTATION OF THE SEVENTH FIVE-YEAR),040SCA21 030,99999999.0,PON1040SCA20-24MC,,IMPLEMENTATION OF THE SEVENTH FIVE-YEAR,20220448626-1-DSB-EFT,No,CONSTRUCTION-BUILDINGS,2022-01-01,,2022-05-27,Individuals and Others,NYC SCHOOL CONSTRUCTION AUTHORITY,CAPITAL CONTRACTS,,No,No
4,DEPARTMENT OF EDUCATION,,E706 (IMPLEMENTATION OF THE SEVENTH FIVE-YEAR),040SCA21 030,99999999.0,PON1040SCA20-24MC,,IMPLEMENTATION OF THE SEVENTH FIVE-YEAR,20220354644-1-DSB-EFT,No,CONSTRUCTION-BUILDINGS,2022-01-01,,2022-03-25,Individuals and Others,NYC SCHOOL CONSTRUCTION AUTHORITY,CAPITAL CONTRACTS,,No,No
5,DEPARTMENT OF EDUCATION,,E705 (IMPLEMENTATION OF THE SIXTH FIVE-YEAR ED),040SCA19 020,99999999.0,PON1040SCA15-19MC,,IMPLEMENTATION OF THE SIXTH FIVE-YEAR ED,20220354641-1-DSB-EFT,No,CONSTRUCTION-BUILDINGS,2022-01-01,,2022-03-25,Individuals and Others,NYC SCHOOL CONSTRUCTION AUTHORITY,CAPITAL CONTRACTS,,No,No
6,DEPARTMENT OF EDUCATION,,E705 (IMPLEMENTATION OF THE SIXTH FIVE-YEAR ED),040SCA19 020,99999999.0,PON1040SCA15-19MC,,IMPLEMENTATION OF THE SIXTH FIVE-YEAR ED,20220354642-1-DSB-EFT,No,CONSTRUCTION-BUILDINGS,2022-01-01,,2022-03-25,Individuals and Others,NYC SCHOOL CONSTRUCTION AUTHORITY,CAPITAL CONTRACTS,,No,No
