In [1]:
!pip install geopandas requests gspread oauth2client



In [2]:
import pandas as pd
import geopandas as gpd
import requests
import json
import gspread
from oauth2client.service_account import ServiceAccountCredentials
from gspread_dataframe import set_with_dataframe
from shapely.geometry import Point
import random

In [3]:
# Authenticate user with Google account
from google.colab import auth
auth.authenticate_user()

from google.auth import default

# Get creds from your logged-in account
creds, _ = default()
gc = gspread.authorize(creds)

# Open Google Sheet by title (the visible sheet name in Drive, not path)
spreadsheet = gc.open("UTsTateLegIslaTurE_02122025")
worksheet = spreadsheet.worksheet("Sheet1")

# Load data
data = worksheet.get_all_records()

df = pd.DataFrame(data)

df.head()
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 104 entries, 0 to 103
Data columns (total 10 columns):
 #   Column                  Non-Null Count  Dtype 
---  ------                  --------------  ----- 
 0   District                104 non-null    int64 
 1   Office                  104 non-null    object
 2   Representative          104 non-null    object
 3   Webpage                 104 non-null    object
 4   Img_ID                  104 non-null    object
 5   Img_URL                 104 non-null    object
 6   Legislation_By_Senator  104 non-null    object
 7   Party                   104 non-null    object
 8   Email                   104 non-null    object
 9   County(ies)             104 non-null    object
dtypes: int64(1), object(9)
memory usage: 8.3+ KB


In [4]:
# Function to reformat "Last, First Middle" → "First Middle Last"
def reformat_name(name):
    if pd.isna(name):
        return name
    parts = name.split(",")  # Split into ['Last', ' First Middle']
    if len(parts) != 2:
        return name.strip()  # If not in expected format, leave as-is
    last = parts[0].strip()
    first_middle = parts[1].strip()
    return f"{first_middle} {last}"

# Apply to the column
df['Representative'] = df['Representative'].apply(reformat_name)

# Check results
df[['Representative']].head(10)


Unnamed: 0,Representative
0,Thomas W. Peterson
1,Michael J. Petersen
2,Jason E. Thompson
3,Tiara Auxier
4,Casey Snider
5,Matthew H. Gwynn
6,Ryan D. Wilcox
7,Jason B. Kyle
8,Jake Sawyer
9,Jill Koford


In [5]:
# Create DistrictKey column
df["DistrictKey"] = df.apply(
    lambda row: ("H" if row["Office"] == "State House" else "S") + str(row["District"]),
    axis=1
)

df.head()


Unnamed: 0,District,Office,Representative,Webpage,Img_ID,Img_URL,Legislation_By_Senator,Party,Email,County(ies),DistrictKey
0,1,State House,Thomas W. Peterson,https://house.utleg.gov/rep/PETERT,PETERT,https://le.utah.gov/images/legislator/house/PE...,https://le.utah.gov/asp/billsintro/SenResults....,R,tpeterson@le.utah.gov,"Box Elder, Cache",H1
1,2,State House,Michael J. Petersen,https://house.utleg.gov/rep/PETERM,PETERM,https://le.utah.gov/images/legislator/house/PE...,https://le.utah.gov/asp/billsintro/SenResults....,R,mpetersen@le.utah.gov,Cache,H2
2,3,State House,Jason E. Thompson,https://house.utleg.gov/rep/THOMJA,THOMJA,https://le.utah.gov/images/legislator/house/TH...,https://le.utah.gov/asp/billsintro/SenResults....,R,jthompson@le.utah.gov,Cache,H3
3,4,State House,Tiara Auxier,https://house.utleg.gov/rep/AUXIET,AUXIET,https://le.utah.gov/images/legislator/house/AU...,https://le.utah.gov/asp/billsintro/SenResults....,R,tauxier@le.utah.gov,"Daggett, Duchesne, Morgan, Rich, Summit",H4
4,5,State House,Casey Snider,https://house.utleg.gov/rep/SNIDEC,SNIDEC,https://le.utah.gov/images/legislator/house/SN...,https://le.utah.gov/asp/billsintro/SenResults....,R,csnider@le.utah.gov,Cache,H5


In [6]:
# Senate districts URL
senate_url = "https://services1.arcgis.com/99lidPhWCzftIe9K/arcgis/rest/services/UtahSenateDistricts2022to2032/FeatureServer/0/query"
params = {
    "where": "1=1",
    "outFields": "*",
    "outSR": "4326",
    "f": "geojson"   # <- request GeoJSON instead of JSON
}

# Request GeoJSON
response = requests.get(senate_url, params=params)
with open("senate.geojson", "wb") as f:
    f.write(response.content)

# Load into GeoDataFrame
senate_gdf = gpd.read_file("senate.geojson")

print(senate_gdf.columns)
senate_gdf.head()


Index(['OBJECTID', 'DIST', 'COLOR4', 'Shape__Area', 'Shape__Length',
       'geometry'],
      dtype='object')


Unnamed: 0,OBJECTID,DIST,COLOR4,Shape__Area,Shape__Length,geometry
0,1,1,4,41329370000.0,1131859.0,"POLYGON ((-112.46356 40.56335, -112.4637 40.56..."
1,2,2,2,8406297000.0,569357.2,"POLYGON ((-111.42591 41.67675, -111.42596 41.6..."
2,3,3,1,5972148000.0,557460.1,"POLYGON ((-111.62349 40.73216, -111.62346 40.7..."
3,4,4,2,1197231000.0,190500.9,"POLYGON ((-112.49342 41.077, -112.48325 41.087..."
4,5,5,4,606843400.0,156897.0,"POLYGON ((-111.71156 41.06643, -111.71158 41.0..."


In [7]:
senate_gdf['District'] = senate_gdf['DIST'].astype(int)
senate_gdf['Chamber'] = "Senate"
senate_gdf['DistrictKey'] = "S" + senate_gdf['District'].astype(str)

print(senate_gdf.columns)
senate_gdf.head()

Index(['OBJECTID', 'DIST', 'COLOR4', 'Shape__Area', 'Shape__Length',
       'geometry', 'District', 'Chamber', 'DistrictKey'],
      dtype='object')


Unnamed: 0,OBJECTID,DIST,COLOR4,Shape__Area,Shape__Length,geometry,District,Chamber,DistrictKey
0,1,1,4,41329370000.0,1131859.0,"POLYGON ((-112.46356 40.56335, -112.4637 40.56...",1,Senate,S1
1,2,2,2,8406297000.0,569357.2,"POLYGON ((-111.42591 41.67675, -111.42596 41.6...",2,Senate,S2
2,3,3,1,5972148000.0,557460.1,"POLYGON ((-111.62349 40.73216, -111.62346 40.7...",3,Senate,S3
3,4,4,2,1197231000.0,190500.9,"POLYGON ((-112.49342 41.077, -112.48325 41.087...",4,Senate,S4
4,5,5,4,606843400.0,156897.0,"POLYGON ((-111.71156 41.06643, -111.71158 41.0...",5,Senate,S5


In [8]:
senate_gdf.info()

<class 'geopandas.geodataframe.GeoDataFrame'>
RangeIndex: 29 entries, 0 to 28
Data columns (total 9 columns):
 #   Column         Non-Null Count  Dtype   
---  ------         --------------  -----   
 0   OBJECTID       29 non-null     int32   
 1   DIST           29 non-null     int32   
 2   COLOR4         29 non-null     int32   
 3   Shape__Area    29 non-null     float64 
 4   Shape__Length  29 non-null     float64 
 5   geometry       29 non-null     geometry
 6   District       29 non-null     int64   
 7   Chamber        29 non-null     object  
 8   DistrictKey    29 non-null     object  
dtypes: float64(2), geometry(1), int32(3), int64(1), object(2)
memory usage: 1.8+ KB


In [9]:
house_url = "https://services1.arcgis.com/99lidPhWCzftIe9K/arcgis/rest/services/UtahHouseDistricts2022to2032/FeatureServer/0/query"

params["f"] = "geojson"  # reuse params
response = requests.get(house_url, params=params)
with open("house.geojson", "wb") as f:
    f.write(response.content)

house_gdf = gpd.read_file("house.geojson")

house_gdf['District'] = house_gdf['DIST'].astype(int)
house_gdf['Chamber'] = "House"
house_gdf['DistrictKey'] = "H" + house_gdf['District'].astype(str)

print(house_gdf.columns)
house_gdf.head()

Index(['OBJECTID', 'DIST', 'COLOR4', 'Shape__Area', 'Shape__Length',
       'geometry', 'District', 'Chamber', 'DistrictKey'],
      dtype='object')


Unnamed: 0,OBJECTID,DIST,COLOR4,Shape__Area,Shape__Length,geometry,District,Chamber,DistrictKey
0,1,1,1,31204400000.0,831408.408382,"POLYGON ((-114.04203 41.00157, -114.04196 41.0...",1,House,H1
1,2,2,3,757380200.0,163335.108633,"POLYGON ((-111.90157 41.7392, -111.90178 41.73...",2,House,H2
2,3,3,2,1938910000.0,265721.24189,"POLYGON ((-111.53173 41.64133, -111.53184 41.6...",3,House,H3
3,4,4,3,13123170000.0,739056.385087,"POLYGON ((-111.15137 40.54836, -111.15183 40.5...",4,House,H4
4,5,5,4,2358751000.0,304456.08194,"POLYGON ((-111.91697 41.46409, -111.91695 41.4...",5,House,H5


In [11]:
house_gdf.info()

<class 'geopandas.geodataframe.GeoDataFrame'>
RangeIndex: 75 entries, 0 to 74
Data columns (total 9 columns):
 #   Column         Non-Null Count  Dtype   
---  ------         --------------  -----   
 0   OBJECTID       75 non-null     int32   
 1   DIST           75 non-null     int32   
 2   COLOR4         75 non-null     int32   
 3   Shape__Area    75 non-null     float64 
 4   Shape__Length  75 non-null     float64 
 5   geometry       75 non-null     geometry
 6   District       75 non-null     int64   
 7   Chamber        75 non-null     object  
 8   DistrictKey    75 non-null     object  
dtypes: float64(2), geometry(1), int32(3), int64(1), object(2)
memory usage: 4.5+ KB


In [10]:
all_districts = pd.concat([senate_gdf, house_gdf], ignore_index=True)
all_districts = all_districts.reset_index(drop=True)

print(all_districts.columns)
print(all_districts.info())
all_districts.head()


Index(['OBJECTID', 'DIST', 'COLOR4', 'Shape__Area', 'Shape__Length',
       'geometry', 'District', 'Chamber', 'DistrictKey'],
      dtype='object')
<class 'geopandas.geodataframe.GeoDataFrame'>
RangeIndex: 104 entries, 0 to 103
Data columns (total 9 columns):
 #   Column         Non-Null Count  Dtype   
---  ------         --------------  -----   
 0   OBJECTID       104 non-null    int32   
 1   DIST           104 non-null    int32   
 2   COLOR4         104 non-null    int32   
 3   Shape__Area    104 non-null    float64 
 4   Shape__Length  104 non-null    float64 
 5   geometry       104 non-null    geometry
 6   District       104 non-null    int64   
 7   Chamber        104 non-null    object  
 8   DistrictKey    104 non-null    object  
dtypes: float64(2), geometry(1), int32(3), int64(1), object(2)
memory usage: 6.2+ KB
None


Unnamed: 0,OBJECTID,DIST,COLOR4,Shape__Area,Shape__Length,geometry,District,Chamber,DistrictKey
0,1,1,4,41329370000.0,1131859.0,"POLYGON ((-112.46356 40.56335, -112.4637 40.56...",1,Senate,S1
1,2,2,2,8406297000.0,569357.2,"POLYGON ((-111.42591 41.67675, -111.42596 41.6...",2,Senate,S2
2,3,3,1,5972148000.0,557460.1,"POLYGON ((-111.62349 40.73216, -111.62346 40.7...",3,Senate,S3
3,4,4,2,1197231000.0,190500.9,"POLYGON ((-112.49342 41.077, -112.48325 41.087...",4,Senate,S4
4,5,5,4,606843400.0,156897.0,"POLYGON ((-111.71156 41.06643, -111.71158 41.0...",5,Senate,S5


In [12]:


# Ensure it's a GeoDataFrame
all_districts = gpd.GeoDataFrame(all_districts, geometry='geometry')

# Project to a projected CRS (UTM Zone 12N covers Utah)
all_districts = all_districts.to_crs(epsg=32612)

# Compute centroid in projected CRS
all_districts['centroid'] = all_districts.geometry.centroid

# Convert back to WGS84 for lat/lon
all_districts = all_districts.to_crs(epsg=4326)
all_districts['lat'] = all_districts['centroid'].y
all_districts['lon'] = all_districts['centroid'].x

# Convert geometry to WKT for polygon mapping
# all_districts['geometry_wkt'] = all_districts['geometry'].apply(lambda x: x.wkt if x else None)

all_districts.head()


Unnamed: 0,OBJECTID,DIST,COLOR4,Shape__Area,Shape__Length,geometry,District,Chamber,DistrictKey,centroid,lat,lon
0,1,1,4,41329370000.0,1131859.0,"POLYGON ((-112.46356 40.56335, -112.4637 40.56...",1,Senate,S1,POINT (331027.325 4584031.59),4584032.0,331027.324791
1,2,2,2,8406297000.0,569357.2,"POLYGON ((-111.42591 41.67675, -111.42596 41.6...",2,Senate,S2,POINT (462503.002 4617893.457),4617893.0,462503.002459
2,3,3,1,5972148000.0,557460.1,"POLYGON ((-111.62349 40.73216, -111.62346 40.7...",3,Senate,S3,POINT (456048.333 4550265.13),4550265.0,456048.333121
3,4,4,2,1197231000.0,190500.9,"POLYGON ((-112.49342 41.077, -112.48325 41.087...",4,Senate,S4,POINT (400124.164 4563641.553),4563642.0,400124.164423
4,5,5,4,606843400.0,156897.0,"POLYGON ((-111.71156 41.06643, -111.71158 41.0...",5,Senate,S5,POINT (427584.151 4558179.7),4558180.0,427584.150846


In [13]:
# Simplify polygons (tolerance in degrees; smaller = more detail)
all_districts['geometry_simplified'] = all_districts['geometry'].simplify(0.01)

# Convert simplified geometry to WKT
all_districts['geometry_wkt'] = all_districts['geometry_simplified'].apply(lambda x: x.wkt if x else None)

all_districts.head()

Unnamed: 0,OBJECTID,DIST,COLOR4,Shape__Area,Shape__Length,geometry,District,Chamber,DistrictKey,centroid,lat,lon,geometry_simplified,geometry_wkt
0,1,1,4,41329370000.0,1131859.0,"POLYGON ((-112.46356 40.56335, -112.4637 40.56...",1,Senate,S1,POINT (331027.325 4584031.59),4584032.0,331027.324791,"POLYGON ((-112.36811 40.56467, -112.50726 40.5...","POLYGON ((-112.368108727064 40.56467101737871,..."
1,2,2,2,8406297000.0,569357.2,"POLYGON ((-111.42591 41.67675, -111.42596 41.6...",2,Senate,S2,POINT (462503.002 4617893.457),4617893.0,462503.002459,"POLYGON ((-111.42591 41.67675, -111.43761 41.6...",POLYGON ((-111.42590968669302 41.6767542158292...
2,3,3,1,5972148000.0,557460.1,"POLYGON ((-111.62349 40.73216, -111.62346 40.7...",3,Senate,S3,POINT (456048.333 4550265.13),4550265.0,456048.333121,"POLYGON ((-111.62349 40.73216, -111.62105 40.7...","POLYGON ((-111.623491596593 40.7321600938072, ..."
3,4,4,2,1197231000.0,190500.9,"POLYGON ((-112.49342 41.077, -112.48325 41.087...",4,Senate,S4,POINT (400124.164 4563641.553),4563642.0,400124.164423,"POLYGON ((-112.49342 41.077, -112.23808 41.336...",POLYGON ((-112.49341782164102 41.0770030643290...
4,5,5,4,606843400.0,156897.0,"POLYGON ((-111.71156 41.06643, -111.71158 41.0...",5,Senate,S5,POINT (427584.151 4558179.7),4558180.0,427584.150846,"POLYGON ((-111.71156 41.06643, -111.7877 41.13...",POLYGON ((-111.711559660237 41.066425123801785...


In [14]:
# Merge the two dataframes on DistrictKey
all_data = pd.merge(df, all_districts, on="DistrictKey", how="left")

# Quick check
print(all_data.shape)
all_data.head()


(104, 24)


Unnamed: 0,District_x,Office,Representative,Webpage,Img_ID,Img_URL,Legislation_By_Senator,Party,Email,County(ies),...,Shape__Area,Shape__Length,geometry,District_y,Chamber,centroid,lat,lon,geometry_simplified,geometry_wkt
0,1,State House,Thomas W. Peterson,https://house.utleg.gov/rep/PETERT,PETERT,https://le.utah.gov/images/legislator/house/PE...,https://le.utah.gov/asp/billsintro/SenResults....,R,tpeterson@le.utah.gov,"Box Elder, Cache",...,31204400000.0,831408.408382,"POLYGON ((-114.04203 41.00157, -114.04196 41.0...",1,House,POINT (326446.165 4599416.294),4599416.0,326446.165062,"POLYGON ((-114.04203 41.00157, -114.04149 41.9...",POLYGON ((-114.04202809259601 41.0015679590473...
1,2,State House,Michael J. Petersen,https://house.utleg.gov/rep/PETERM,PETERM,https://le.utah.gov/images/legislator/house/PE...,https://le.utah.gov/asp/billsintro/SenResults....,R,mpetersen@le.utah.gov,Cache,...,757380200.0,163335.108633,"POLYGON ((-111.90157 41.7392, -111.90178 41.73...",2,House,POINT (426318.587 4634051.661),4634052.0,426318.586543,"POLYGON ((-111.90157 41.7392, -112.00336 41.75...",POLYGON ((-111.901565752156 41.739197220326496...
2,3,State House,Jason E. Thompson,https://house.utleg.gov/rep/THOMJA,THOMJA,https://le.utah.gov/images/legislator/house/TH...,https://le.utah.gov/asp/billsintro/SenResults....,R,jthompson@le.utah.gov,Cache,...,1938910000.0,265721.24189,"POLYGON ((-111.53173 41.64133, -111.53184 41.6...",3,House,POINT (447359.668 4629970.259),4629970.0,447359.668397,"POLYGON ((-111.53173 41.64133, -111.56938 41.6...","POLYGON ((-111.531729647026 41.64133416831509,..."
3,4,State House,Tiara Auxier,https://house.utleg.gov/rep/AUXIET,AUXIET,https://le.utah.gov/images/legislator/house/AU...,https://le.utah.gov/asp/billsintro/SenResults....,R,tauxier@le.utah.gov,"Daggett, Duchesne, Morgan, Rich, Summit",...,13123170000.0,739056.385087,"POLYGON ((-111.15137 40.54836, -111.15183 40.5...",4,House,POINT (475126.977 4561290.842),4561291.0,475126.977407,"POLYGON ((-111.15137 40.54836, -111.36985 40.6...","POLYGON ((-111.151371520176 40.548358077792, -..."
4,5,State House,Casey Snider,https://house.utleg.gov/rep/SNIDEC,SNIDEC,https://le.utah.gov/images/legislator/house/SN...,https://le.utah.gov/asp/billsintro/SenResults....,R,csnider@le.utah.gov,Cache,...,2358751000.0,304456.08194,"POLYGON ((-111.91697 41.46409, -111.91695 41.4...",5,House,POINT (438419.006 4601708.67),4601709.0,438419.005801,"POLYGON ((-111.91697 41.46409, -111.87318 41.4...","POLYGON ((-111.916969762611 41.4640931275038, ..."


In [24]:
# # trying to recreate the shape by generating lots of smaller points within the shape for looker studio. Not working for what i need

# # all_districts = your GeoDataFrame with geometry, Representative, DistrictKey, etc.

# points_list = []

# for idx, row in all_data.iterrows():
#     poly = row['geometry']
#     district_key = row['DistrictKey']
#     rep_name = row['Representative']

#     # Skip empty geometries
#     if poly is None or poly.is_empty:
#         continue

#     # --- 1. Add the centroid point ---
#     centroid = poly.centroid
#     points_list.append({
#         'DistrictKey': district_key,
#         'Representative': rep_name,
#         'lat': centroid.y,
#         'lon': centroid.x,
#         'Type': 'Centroid'
#     })

#     # --- 2. Add N random boundary points ---
#     N = 50  # adjust for more or fewer points
#     minx, miny, maxx, maxy = poly.bounds
#     count = 0
#     while count < N:
#         random_point = Point(random.uniform(minx, maxx), random.uniform(miny, maxy))
#         if poly.contains(random_point):
#             points_list.append({
#                 'DistrictKey': district_key,
#                 'Representative': rep_name,
#                 'lat': random_point.y,
#                 'lon': random_point.x,
#                 'Type': 'Boundary'
#             })
#             count += 1

# # Create a DataFrame of all points
# district_points_df = pd.DataFrame(points_list)

# # Optional: combined Location column for Looker Studio
# district_points_df['Location'] = district_points_df.apply(lambda r: f"{r['lat']},{r['lon']}", axis=1)

# district_points_df.head()


Unnamed: 0,DistrictKey,Representative,lat,lon,Type,Location
0,H1,Thomas W. Peterson,41.527401,-113.079199,Centroid,"41.52740140114119,-113.07919919697007"
1,H1,Thomas W. Peterson,41.339478,-113.372872,Boundary,"41.33947775060458,-113.37287177367354"
2,H1,Thomas W. Peterson,41.390159,-112.613671,Boundary,"41.39015885829911,-112.61367145537686"
3,H1,Thomas W. Peterson,41.500639,-113.615818,Boundary,"41.50063875186964,-113.61581793289491"
4,H1,Thomas W. Peterson,41.361196,-113.584484,Boundary,"41.361196131219394,-113.58448448021527"


In [15]:
# Drop unnecessary columns
all_data = all_data.drop(columns=['District_x', 'Office', 'OBJECTID', 'DIST', 'District_y', 'centroid', 'geometry_simplified'], errors="ignore")

# Check result
all_data.head()


Unnamed: 0,Representative,Webpage,Img_ID,Img_URL,Legislation_By_Senator,Party,Email,County(ies),DistrictKey,COLOR4,Shape__Area,Shape__Length,geometry,Chamber,lat,lon,geometry_wkt
0,Thomas W. Peterson,https://house.utleg.gov/rep/PETERT,PETERT,https://le.utah.gov/images/legislator/house/PE...,https://le.utah.gov/asp/billsintro/SenResults....,R,tpeterson@le.utah.gov,"Box Elder, Cache",H1,1,31204400000.0,831408.408382,"POLYGON ((-114.04203 41.00157, -114.04196 41.0...",House,4599416.0,326446.165062,POLYGON ((-114.04202809259601 41.0015679590473...
1,Michael J. Petersen,https://house.utleg.gov/rep/PETERM,PETERM,https://le.utah.gov/images/legislator/house/PE...,https://le.utah.gov/asp/billsintro/SenResults....,R,mpetersen@le.utah.gov,Cache,H2,3,757380200.0,163335.108633,"POLYGON ((-111.90157 41.7392, -111.90178 41.73...",House,4634052.0,426318.586543,POLYGON ((-111.901565752156 41.739197220326496...
2,Jason E. Thompson,https://house.utleg.gov/rep/THOMJA,THOMJA,https://le.utah.gov/images/legislator/house/TH...,https://le.utah.gov/asp/billsintro/SenResults....,R,jthompson@le.utah.gov,Cache,H3,2,1938910000.0,265721.24189,"POLYGON ((-111.53173 41.64133, -111.53184 41.6...",House,4629970.0,447359.668397,"POLYGON ((-111.531729647026 41.64133416831509,..."
3,Tiara Auxier,https://house.utleg.gov/rep/AUXIET,AUXIET,https://le.utah.gov/images/legislator/house/AU...,https://le.utah.gov/asp/billsintro/SenResults....,R,tauxier@le.utah.gov,"Daggett, Duchesne, Morgan, Rich, Summit",H4,3,13123170000.0,739056.385087,"POLYGON ((-111.15137 40.54836, -111.15183 40.5...",House,4561291.0,475126.977407,"POLYGON ((-111.151371520176 40.548358077792, -..."
4,Casey Snider,https://house.utleg.gov/rep/SNIDEC,SNIDEC,https://le.utah.gov/images/legislator/house/SN...,https://le.utah.gov/asp/billsintro/SenResults....,R,csnider@le.utah.gov,Cache,H5,4,2358751000.0,304456.08194,"POLYGON ((-111.91697 41.46409, -111.91695 41.4...",House,4601709.0,438419.005801,"POLYGON ((-111.916969762611 41.4640931275038, ..."


In [16]:
# Make sure lat and lon columns exist
# all_data['lat'] and all_data['lon'] from the centroid step

# Combine into a single location column
all_data['lat_lon'] = all_data.apply(lambda row: f"{row['lat']},{row['lon']}" if pd.notnull(row['lat']) and pd.notnull(row['lon']) else None, axis=1)

# Check
all_data[['lat','lon','lat_lon']].head()


Unnamed: 0,lat,lon,lat_lon
0,4599416.0,326446.165062,"4599416.294328271,326446.16506212123"
1,4634052.0,426318.586543,"4634051.660539259,426318.5865427646"
2,4629970.0,447359.668397,"4629970.259272028,447359.6683968656"
3,4561291.0,475126.977407,"4561290.841854824,475126.97740697"
4,4601709.0,438419.005801,"4601708.669570984,438419.00580123666"


In [17]:
all_data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 104 entries, 0 to 103
Data columns (total 18 columns):
 #   Column                  Non-Null Count  Dtype   
---  ------                  --------------  -----   
 0   Representative          104 non-null    object  
 1   Webpage                 104 non-null    object  
 2   Img_ID                  104 non-null    object  
 3   Img_URL                 104 non-null    object  
 4   Legislation_By_Senator  104 non-null    object  
 5   Party                   104 non-null    object  
 6   Email                   104 non-null    object  
 7   County(ies)             104 non-null    object  
 8   DistrictKey             104 non-null    object  
 9   COLOR4                  104 non-null    int32   
 10  Shape__Area             104 non-null    float64 
 11  Shape__Length           104 non-null    float64 
 12  geometry                104 non-null    geometry
 13  Chamber                 104 non-null    object  
 14  lat                     10

In [18]:
import os
import geopandas as gpd

# Ensure target folder exists
save_dir = "/content/drive/My Drive/ElectionTime/data/"
os.makedirs(save_dir, exist_ok=True)

# File paths
geojson_path = os.path.join(save_dir, "reps_with_geo_data.geojson")
json_path    = os.path.join(save_dir, "reps_with_geo_data.json")

# Make sure it's a GeoDataFrame
all_data = gpd.GeoDataFrame(all_data, geometry="geometry")

# Save full polygons as GeoJSON
all_data.to_file(geojson_path, driver="GeoJSON")

# Save attributes only as JSON
all_data.drop(columns="geometry").to_json(json_path, orient="records")

print(f"✅ Saved GeoJSON to {geojson_path}")
print(f"✅ Saved JSON to {json_path}")


✅ Saved GeoJSON to /content/drive/My Drive/ElectionTime/data/reps_with_geo_data.geojson
✅ Saved JSON to /content/drive/My Drive/ElectionTime/data/reps_with_geo_data.json


In [19]:
# The geometry value is too big, the polygons have a lot of data.
# It was easier to download the geojson files locally for the next step of my process
# I plan to solve this problem in a scalable way


# Create or open the sheet
# try:
#     spreadsheet = gc.open("reps_with_geo_data")
# except gspread.SpreadsheetNotFound:
#     spreadsheet = gc.create("reps_with_geo_data")

# worksheet = spreadsheet.sheet1

# # Write the dataframe
# set_with_dataframe(worksheet, all_data)

# print("✅")
