In [1]:
import pandas as pd
import numpy as np

In [2]:
businesses = pd.read_csv('../data/additional_data/businesses/businesses_NYC_2023_processed.csv')

In [3]:
businesses["License Expiration Date"] = pd.to_datetime(businesses["License Expiration Date"]).astype(np.int64) // 10**6
businesses["License Creation Date"] = pd.to_datetime(businesses["License Creation Date"]).astype(np.int64) // 10**6

In [4]:
data_time_range = (
    pd.Timestamp(year=2013, month=7, day=1, unit="ms"),
    pd.Timestamp(year=2024, month=6, day=30, unit="ms"),
)

In [5]:
businesses

Unnamed: 0,License Expiration Date,License Creation Date,Industry,Business Name,Longitude,Latitude
0,1112227200000,860457600000,Garage,THE GARDENS 75TH STREET OWNERS CORP.,-73.891136,40.750745
1,1104451200000,986169600000,Tobacco Retail Dealer,"JALIL, MOHAMMED ABDUL",-73.960737,40.650123
2,1640908800000,1511568000000,Laundries,SING WAH LAUNDROMAT INC.,-74.023621,40.634397
3,1590192000000,1402272000000,Sidewalk Cafe,DCB DELANCEY CORPORATION,-73.993820,40.720242
4,1627689600000,1465430400000,Secondhand Dealer - General,MD BROADWAY ELECTRONICS LLC,-73.906078,40.855206
...,...,...,...,...,...,...
172327,1609372800000,1375142400000,Tobacco Retail Dealer,JUNCTION MART INC,-73.869095,40.748608
172328,1753920000000,1193011200000,Secondhand Dealer - General,DASAM SONS INC.,-73.979629,40.776247
172329,1735603200000,1689120000000,Electronics Store,GALAXY LATINO GIFT SHOP CORP,-74.005189,40.650050
172330,1753920000000,1055289600000,Secondhand Dealer - General,1672 62ND STREET R & J COLLISION INC.,-73.991506,40.622725


In [6]:
businesses = businesses[businesses["License Expiration Date"] > businesses["License Creation Date"]]

In [7]:
middle_schools = pd.read_csv('../data/additional_data/schools/middle_schools_NYC_2021_processed.csv')

In [8]:
from haversine import haversine
from rtree.index import Index

In [9]:
def create_rtree_index(df, name):
    idx = Index()
    for i, row in enumerate(df.iterrows()):
        row = row[1]
        idx.insert(
            i,
            (row["Latitude"], row["Longitude"], row["Latitude"], row["Longitude"]),
            obj={"name": row[name], "industry": row["Industry"],"lat": row["Latitude"], "long": row["Longitude"], "active_from": row["License Creation Date"], "active_to": row["License Expiration Date"]},
        )
    return idx


businesses_idx = create_rtree_index(businesses, "Business Name")

In [10]:
issue_time = 1372723200000

In [11]:
def get_nearest_locations(idx, lat, lang, n):
    return [item.object for item in list(idx.nearest((lat, lang, lat, lang), n, objects=True))]

def computational_wrapper(row, lat_i, long_i, idx, time_i, n=16):
    locations = pd.DataFrame(get_nearest_locations(idx, row[lat_i], row[long_i], n))
    locations = locations[(locations["active_from"] <= row[time_i]) & (locations["active_to"] >= row[time_i])]
    if locations.empty:
        return computational_wrapper(row, lat_i, long_i, idx, time_i, n * 2)
    return (locations.iloc[0]["name"], locations.iloc[0]["industry"], haversine((row[lat_i], row[long_i]), (locations.iloc[0]["lat"], locations.iloc[0]["long"])))

In [12]:
middle_schools = middle_schools.assign(time=issue_time)

In [13]:
middle_schools

Unnamed: 0,name,Latitude,Longitude,time
0,P.S. 034 Franklin D. Roosevelt,40.726473,-73.975181,1372723200000
1,P.S. 140 Nathan Straus,40.719250,-73.983056,1372723200000
2,P.S. 184m Shuang Wen,40.711549,-73.986542,1372723200000
3,P.S. 188 The Island School,40.719598,-73.977904,1372723200000
4,University Neighborhood Middle School,40.713684,-73.986336,1372723200000
...,...,...,...,...
467,I.S. 347 School of Humanities,40.700690,-73.927481,1372723200000
468,"I.S. 349 Math, Science & Tech.",40.700690,-73.927481,1372723200000
469,J.H.S. 383 Philippa Schuyler,40.697781,-73.919777,1372723200000
470,P.S. /I.S. 384 Frances E. Carter,40.690353,-73.904803,1372723200000


In [14]:
lat_i = middle_schools.columns.tolist().index("Latitude") + 1
long_i = middle_schools.columns.tolist().index("Longitude") + 1
time_i = middle_schools.columns.tolist().index("time") + 1

In [15]:
res = []

for row in middle_schools.itertuples():
    res.append(computational_wrapper(row, lat_i, long_i, businesses_idx, time_i))

In [16]:
res = pd.DataFrame(
        res,
        columns=["Closest Business", "Industry of Business", "Distance to Business"],
        index=middle_schools.index,
    )

In [17]:
res

Unnamed: 0,Closest Business,Industry of Business,Distance to Business
0,D' MARGARITA DELI GROCERY CORP.CORP.,Tobacco Retail Dealer,0.191874
1,MASTER MINIMARKET & DELI CORP.,Tobacco Retail Dealer,0.017933
2,ASIAN AMERICAN CONSTRUCTION INC.,Home Improvement Contractor,0.171376
3,23 AVE. D CORP.,Tobacco Retail Dealer,0.130344
4,LAUNDRY QUEEN III INC.,Laundry,0.063496
...,...,...,...
467,BROTHER'S DRY CLEANER CRP,Laundry Jobber,0.093860
468,BROTHER'S DRY CLEANER CRP,Laundry Jobber,0.093860
469,"RAYDAN DELI & GROCERY 1, INC.",Tobacco Retail Dealer,0.085890
470,DORESANTIAGO HOME IMPROVEMENT INC.,Home Improvement Contractor,0.027451


In [18]:
middle_schools.merge(res, left_index=True, right_index=True)

Unnamed: 0,name,Latitude,Longitude,time,Closest Business,Industry of Business,Distance to Business
0,P.S. 034 Franklin D. Roosevelt,40.726473,-73.975181,1372723200000,D' MARGARITA DELI GROCERY CORP.CORP.,Tobacco Retail Dealer,0.191874
1,P.S. 140 Nathan Straus,40.719250,-73.983056,1372723200000,MASTER MINIMARKET & DELI CORP.,Tobacco Retail Dealer,0.017933
2,P.S. 184m Shuang Wen,40.711549,-73.986542,1372723200000,ASIAN AMERICAN CONSTRUCTION INC.,Home Improvement Contractor,0.171376
3,P.S. 188 The Island School,40.719598,-73.977904,1372723200000,23 AVE. D CORP.,Tobacco Retail Dealer,0.130344
4,University Neighborhood Middle School,40.713684,-73.986336,1372723200000,LAUNDRY QUEEN III INC.,Laundry,0.063496
...,...,...,...,...,...,...,...
467,I.S. 347 School of Humanities,40.700690,-73.927481,1372723200000,BROTHER'S DRY CLEANER CRP,Laundry Jobber,0.093860
468,"I.S. 349 Math, Science & Tech.",40.700690,-73.927481,1372723200000,BROTHER'S DRY CLEANER CRP,Laundry Jobber,0.093860
469,J.H.S. 383 Philippa Schuyler,40.697781,-73.919777,1372723200000,"RAYDAN DELI & GROCERY 1, INC.",Tobacco Retail Dealer,0.085890
470,P.S. /I.S. 384 Frances E. Carter,40.690353,-73.904803,1372723200000,DORESANTIAGO HOME IMPROVEMENT INC.,Home Improvement Contractor,0.027451


In [19]:
import dask.dataframe as dd

tickets = dd.read_parquet('../data/parking_tickets/parquet/full_data_cleaned.parquet')

tmp = tickets.head()

res = []

lat_i = tmp.columns.tolist().index("Latitude") + 1
long_i = tmp.columns.tolist().index("Longitude") + 1
time_i = tmp.columns.tolist().index("Issue Date") + 1

for row in tmp.itertuples():
    res.append(computational_wrapper(row, lat_i, long_i, businesses_idx, time_i))


res = pd.DataFrame(
    res,
    columns=["bussiness", "industry", "distance"],
    index=tmp.index,
)

tmp.merge(res, left_index=True, right_index=True)

Unnamed: 0,Summons Number,Plate ID,Registration State,Plate Type,Issue Date,Violation Code,Vehicle Body Type,Vehicle Make,Issuing Agency,Street Code1,...,Vehicle Color,Vehicle Year,Feet From Curb,Violation Post Code,Violation Description,Latitude,Longitude,bussiness,industry,distance
7285,1351801788,41666JM,NY,COM,1372723200000,14,VAN,,P,45440,...,,0,0,,,40.707333,-74.007828,"AHRC NEW YORK CITY FOUNDATION, INC.",Games of Chance,0.022238
7443,1351801790,99208MC,NY,COM,1372723200000,14,VAN,,P,45440,...,,0,0,,,40.707333,-74.007828,"AHRC NEW YORK CITY FOUNDATION, INC.",Games of Chance,0.022238
7733,1351801806,49965JG,NY,COM,1372723200000,14,VAN,,P,13590,...,,0,0,,,40.70362,-74.010706,PEARLSTONE BURGER CORPORATION,Sidewalk Cafe,0.050663
8322,1291521665,YANKYJAM,NY,PAS,1372723200000,46,SDN,BMW,F,5430,...,BLK,0,0,,,40.645503,-74.013282,S.N.A. FINEST DELI & GROCERY CORP.,Tobacco Retail Dealer,0.0274
8325,1335816963,XX865Y,NJ,PAS,1372723200000,78,DELV,CHEVR,P,24790,...,WHITE,0,0,,,40.71965,-74.010185,TRIBECA DELICATESSEN INC.,Tobacco Retail Dealer,0.007857
