In [1]:
import pandas as pd
import geopandas as gpd
from shapely.geometry import Point


In [2]:
df_311 = pd.read_excel('311_prepared.xlsx').drop(columns = 'Unnamed: 0')
df_311.head()

Unnamed: 0,open_date,longitude,latitude,zip_code
0,10/20/2023,-98.516167,29.444219,
1,11/2/2023,-98.534868,29.351602,78224.0
2,11/6/2023,-98.541934,29.535004,
3,11/28/2023,-98.569024,29.461083,78228.0
4,12/4/2023,-98.556656,29.528313,


## Get Zipcodes Using Geopandas

In [3]:
# Load your DataFrame
df = pd.read_excel('311_prepared.xlsx').drop(columns='Unnamed: 0')  # Must contain 'latitude' and 'longitude'

# Create geometry column with Point(longitude, latitude)
geometry = [Point(xy) for xy in zip(df['longitude'], df['latitude'])]

# Convert to GeoDataFrame
gdf_points = gpd.GeoDataFrame(df, geometry=geometry, crs='EPSG:4326')

# Load 2024 ZCTA shapefile (make sure .shp, .shx, .dbf, etc. are in the same folder)
gdf_zips = gpd.read_file('tl_2024_us_zcta520.shp')

# Ensure both GeoDataFrames use the same CRS
gdf_zips = gdf_zips.to_crs(gdf_points.crs)

# Spatial join (match points within ZIP polygons)
gdf_joined = gpd.sjoin(gdf_points, gdf_zips, how='left', predicate='within')

# Step 7: Add ZIP code column from shapefile 
gdf_joined['new_zips'] = gdf_joined['ZCTA5CE20']  # adjust if the column name is different

# Step 8: Optional - Drop spatial join metadata
gdf_joined = gdf_joined.drop(columns=['index_right'])

# Step 9: (Optional) Convert back to regular DataFrame
df_with_zip = pd.DataFrame(gdf_joined)

# Step 10: Done - Preview result
df = df_with_zip[['open_date',
                  'latitude',
                  'longitude',
                  'zip_code',
                  'new_zips']]

In [4]:
df_zip = df.dropna()

df_zip['zip_code'] = df.zip_code.astype(object)[0:-2]

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  This is separate from the ipykernel package so we can avoid doing imports until


In [5]:
df_zip

Unnamed: 0,open_date,latitude,longitude,zip_code,new_zips
1,11/2/2023,29.351602,-98.534868,78224.0,78224
3,11/28/2023,29.461083,-98.569024,78228.0,78228
5,12/6/2023,29.512414,-98.525528,78213.0,78213
6,12/16/2023,29.507367,-98.519163,78213.0,78213
8,12/19/2023,29.343525,-98.484157,78221.0,78214
...,...,...,...,...,...
2957,4/10/2025,29.430163,-98.651325,78227.0,78245
2958,4/10/2025,29.498957,-98.419498,78218.0,78218
2960,4/10/2025,29.493098,-98.381185,78218.0,78218
2965,4/12/2025,29.519262,-98.597456,,78240


In [6]:
df_zip

Unnamed: 0,open_date,latitude,longitude,zip_code,new_zips
1,11/2/2023,29.351602,-98.534868,78224.0,78224
3,11/28/2023,29.461083,-98.569024,78228.0,78228
5,12/6/2023,29.512414,-98.525528,78213.0,78213
6,12/16/2023,29.507367,-98.519163,78213.0,78213
8,12/19/2023,29.343525,-98.484157,78221.0,78214
...,...,...,...,...,...
2957,4/10/2025,29.430163,-98.651325,78227.0,78245
2958,4/10/2025,29.498957,-98.419498,78218.0,78218
2960,4/10/2025,29.493098,-98.381185,78218.0,78218
2965,4/12/2025,29.519262,-98.597456,,78240


In [8]:
df_check = df_zip[['zip_code','new_zips']]

In [9]:
df_check


Unnamed: 0,zip_code,new_zips
1,78224.0,78224
3,78228.0,78228
5,78213.0,78213
6,78213.0,78213
8,78221.0,78214
...,...,...
2957,78227.0,78245
2958,78218.0,78218
2960,78218.0,78218
2965,,78240


In [None]:
df_check.info()