In [None]:
import geopandas as gpd
import numpy as np
import pandas as pd

# Load shapefile
gdf = gpd.read_file("taxi_zones/taxi_zones.shp")

# Convert to WGS84 (lat/lon)
gdf = gdf.to_crs(epsg=4326)

# Get lat/lon bounds of NYC
minx, miny, maxx, maxy = gdf.total_bounds

# Grid config: 15 rows × 5 columns
rows, cols = 15, 5
lon_bins = np.linspace(minx, maxx, cols + 1)
lat_bins = np.linspace(miny, maxy, rows + 1)

# Compute centroids for each region
rows = 15  # vertical divisions (latitude)
cols = 5   # horizontal divisions (longitude)

region_data = []
region_id = 0

# loop over rows (latitude) *outside*, cols (longitude) *inside*
for i in range(rows):
    for j in range(cols):
        lat_center = (lat_bins[i] + lat_bins[i+1]) / 2
        lon_center = (lon_bins[j] + lon_bins[j+1]) / 2
        region_data.append({
            "region_id": region_id,
            "latitude": lat_center,
            "longitude": lon_center
        })
        region_id += 1

region_df = pd.DataFrame(region_data)
region_df.to_csv("libcity_nyctaxi_region_coords.csv", index=False)
print(region_df.head())


   region_id   latitude  longitude
0          0  40.510096 -74.200033
1          1  40.510096 -74.088917
2          2  40.510096 -73.977800
3          3  40.510096 -73.866684
4          4  40.510096 -73.755567


In [9]:
print(region_df)

    region_id   latitude  longitude
0           0  40.510096 -74.200033
1           1  40.510096 -74.088917
2           2  40.510096 -73.977800
3           3  40.510096 -73.866684
4           4  40.510096 -73.755567
..        ...        ...        ...
70         70  40.901552 -74.200033
71         71  40.901552 -74.088917
72         72  40.901552 -73.977800
73         73  40.901552 -73.866684
74         74  40.901552 -73.755567

[75 rows x 3 columns]


In [11]:
region_df['centers'] = region_df.apply(lambda row: [row['latitude'], row['longitude']], axis=1)


In [12]:
region_df['centers']

0     [40.51009597456496, -74.20003313323042]
1     [40.51009597456496, -74.08891667338708]
2     [40.51009597456496, -73.97780021354374]
3     [40.51009597456496, -73.86668375370041]
4     [40.51009597456496, -73.75556729385707]
                       ...                   
70    [40.90155219760837, -74.20003313323042]
71    [40.90155219760837, -74.08891667338708]
72    [40.90155219760837, -73.97780021354374]
73    [40.90155219760837, -73.86668375370041]
74    [40.90155219760837, -73.75556729385707]
Name: centers, Length: 75, dtype: object

In [13]:
#vec_field = [[0]*270]*270
vec_field = [[0]*75 for _ in range(75)]
for index, row in region_df.iterrows():
    for index1, row1 in region_df.iterrows():
        delta_x = row1['centers'][0] - row['centers'][0]
        delta_y = row1['centers'][1] - row['centers'][1]
        vec_field[index][index1]= np.array([delta_x,delta_y])
        
vec_field

[[array([0., 0.]),
  array([0.        , 0.11111646]),
  array([0.        , 0.22223292]),
  array([0.        , 0.33334938]),
  array([0.        , 0.44446584]),
  array([0.02796116, 0.        ]),
  array([0.02796116, 0.11111646]),
  array([0.02796116, 0.22223292]),
  array([0.02796116, 0.33334938]),
  array([0.02796116, 0.44446584]),
  array([0.05592232, 0.        ]),
  array([0.05592232, 0.11111646]),
  array([0.05592232, 0.22223292]),
  array([0.05592232, 0.33334938]),
  array([0.05592232, 0.44446584]),
  array([0.08388348, 0.        ]),
  array([0.08388348, 0.11111646]),
  array([0.08388348, 0.22223292]),
  array([0.08388348, 0.33334938]),
  array([0.08388348, 0.44446584]),
  array([0.11184464, 0.        ]),
  array([0.11184464, 0.11111646]),
  array([0.11184464, 0.22223292]),
  array([0.11184464, 0.33334938]),
  array([0.11184464, 0.44446584]),
  array([0.13980579, 0.        ]),
  array([0.13980579, 0.11111646]),
  array([0.13980579, 0.22223292]),
  array([0.13980579, 0.33334938]),
 

In [18]:
v_np = np.array(vec_field)
np.save('VF_NYCTaxi.npy', v_np)