In [None]:
POOL_STAGE_DB_URL = ''
MASTER_DB_URL = ''
import psycopg2

In [7]:
connection = psycopg2.connect(POOL_STAGE_DB_URL)
cursor = connection.cursor()

## Stage DB Collection

First we will need to pull all of the data from the stage DB. 

In [8]:
get_assignments_sql = """
SELECT * FROM assignment WHERE uploaded = FALSE OR uploaded IS NULL;"""

#Query the stage db
assignments = []
try:
    cursor.execute(get_assignments_sql)
    assignments = cursor.fetchall()
except Exception as e:
    print(f"Error querying stage database: {e}")
finally:   
    cursor.close()
    connection.close()

In [9]:
assignments

[(1, 33682, 5, None),
 (2, 33732, 6, None),
 (3, 18611, 7, None),
 (4, 18598, 8, None),
 (5, 18608, 9, None),
 (6, 17834, 10, None),
 (7, 18075, 11, None),
 (8, 17826, 12, None),
 (9, 17987, 14, None),
 (10, 17816, 15, None),
 (11, 17918, 16, None),
 (12, 17884, 17, None),
 (13, 17658, 18, None),
 (14, 17751, 19, None),
 (15, 17708, 20, None),
 (16, 17292, 21, None),
 (17, 8080, 22, None),
 (18, 7996, 23, None),
 (19, 7951, 24, None),
 (20, 17906, 27, None),
 (21, 17973, 28, None),
 (22, 17904, 29, None),
 (23, 17923, 30, None),
 (24, 17250, 32, None),
 (25, 10134, 35, None),
 (26, 17357, 38, None),
 (27, 17420, 39, None),
 (28, 17341, 40, None),
 (29, 17382, 41, None),
 (30, 17333, 42, None),
 (31, 17233, 43, None),
 (32, 17228, 44, None),
 (33, 10053, 46, None),
 (34, 17255, 49, None),
 (35, 10164, 50, None),
 (36, 10171, 51, None),
 (37, 10149, 53, None),
 (38, 8273, 54, None),
 (39, 8292, 55, None),
 (40, 8918, 57, None),
 (41, 33728, 58, None),
 (42, 33727, 59, None),
 (43, 33722,

In [10]:
#that goes id, address, pool, uploaded
import pandas as pd
columns = ["id", "address", "pool", "uploaded"]
assignments_df = pd.DataFrame(assignments, columns=columns)
assignments_df

Unnamed: 0,id,address,pool,uploaded
0,1,33682,5,
1,2,33732,6,
2,3,18611,7,
3,4,18598,8,
4,5,18608,9,
...,...,...,...,...
526,527,16958,692,
527,528,35221,693,
528,529,16998,695,
529,530,16991,696,


In [14]:
# Fetch all pool and address details in two transactions
get_pools_sql = """
SELECT * FROM pool WHERE id IN %s;"""
get_addresses_sql = """
SELECT * FROM address WHERE id IN %s;"""
# Convert numpy.int64 to Python int
unique_pool_ids = tuple(map(int, assignments_df['pool'].unique()))
unique_address_ids = tuple(map(int, assignments_df['address'].unique()))

pool_details_dict = {}
address_details_dict = {}

try:
    # Query all pool details
    connection = psycopg2.connect(POOL_STAGE_DB_URL)
    cursor = connection.cursor()
    cursor.execute(get_pools_sql, (unique_pool_ids,))
    pool_details = cursor.fetchall()
    pool_details_dict = {pool[0]: pool for pool in pool_details}  # Map pool_id to pool details

    # Query all address details
    cursor.execute(get_addresses_sql, (unique_address_ids,))
    address_details = cursor.fetchall()
    address_details_dict = {address[0]: address for address in address_details}  # Map address_id to address details
except Exception as e:
    print(f"Error querying pool or address details: {e}")
finally:
    cursor.close()
    connection.close()

pool_columns = ['id','pool_id','lat','lon','pool_type']
address_columns = ['id','address_id','address_number','lat','lon','postal_code','street_name','province_state','country','geom','municipality']


pool_details_df = pd.DataFrame(pool_details_dict.values(), columns=pool_columns)
address_details_df = pd.DataFrame(address_details_dict.values(), columns=address_columns)


In [15]:
pool_details_df

Unnamed: 0,id,pool_id,lat,lon,pool_type
0,5,83061,43.549425,-80.188298,0
1,6,83062,43.549110,-80.188693,0
2,7,83063,43.551423,-80.274596,0
3,8,83064,43.551648,-80.275519,0
4,9,83065,43.551919,-80.273888,0
...,...,...,...,...,...
526,692,83899,43.576708,-80.264922,0
527,693,83901,43.576488,-80.265575,1
528,695,83905,43.577874,-80.263373,0
529,696,83906,43.576631,-80.263653,0


In [18]:
assignments_df.head()

Unnamed: 0,id,address,pool,uploaded
0,1,33682,5,
1,2,33732,6,
2,3,18611,7,
3,4,18598,8,
4,5,18608,9,


In [26]:
#now we will join the pool_details and address_details back to the assignments_df to get all the info in one place 

# this should have an identical number of rows output as the assignments_df, but with all the pool and address details included
merged_df = assignments_df.merge(pool_details_df, left_on='pool', right_on='id', how='left', suffixes=('', '_pool')).merge(address_details_df, left_on='address', right_on='id', how='left', suffixes=('', '_addr'))
merged_df.head()

Unnamed: 0,id,address,pool,uploaded,id_pool,pool_id,lat,lon,pool_type,id_addr,address_id,address_number,lat_addr,lon_addr,postal_code,street_name,province_state,country,geom,municipality
0,1,33682,5,,5,83061,43.549425,-80.188298,0,33682,30423,46,43.549308,-80.188093,N1L 1C2,Glenholm Drive,Ontario,Canada,0101000020E6100000D1E638B7090C54C031B77BB94FC6...,Guelph
1,2,33732,6,,6,83062,43.54911,-80.188693,0,33732,30473,58,43.549022,-80.188491,N1L 1C2,Glenholm Drive,Ontario,Canada,0101000020E6100000C8258E3C100C54C06B64575A46C6...,Guelph
2,3,18611,7,,7,83063,43.551423,-80.274596,0,18611,15352,24,43.551517,-80.274639,N1H 1L2,Kimberley Drive,Ontario,Canada,0101000020E610000031CD74AF931154C01118EB1B98C6...,Guelph
3,4,18598,8,,8,83064,43.551648,-80.275519,0,18598,15339,29,43.551574,-80.275429,N1H 1L3,Kimberley Drive,Ontario,Canada,0101000020E6100000AED7F4A0A01154C076A911FA99C6...,Guelph
4,5,18608,9,,9,83065,43.551919,-80.273888,0,18608,15349,18,43.551918,-80.274002,N1H 1L2,Kimberley Drive,Ontario,Canada,0101000020E61000007842AF3F891154C07009C03FA5C6...,Guelph


In [31]:
def standardize_text_fields(df):
    df['postal_code'] = df['postal_code'].str.upper().str.replace(' ', '', regex=False)
    df['street_name'] = df['street_name'].str.strip().str.title()
    df['municipality'] = df['municipality'].str.strip().str.title()
    return df

merged_df = standardize_text_fields(merged_df)
merged_df.head()

Unnamed: 0,id,address,pool,uploaded,id_pool,pool_id,lat,lon,pool_type,id_addr,address_id,address_number,lat_addr,lon_addr,postal_code,street_name,province_state,country,geom,municipality
0,1,33682,5,,5,83061,43.549425,-80.188298,0,33682,30423,46,43.549308,-80.188093,N1L1C2,Glenholm Drive,Ontario,Canada,0101000020E6100000D1E638B7090C54C031B77BB94FC6...,Guelph
1,2,33732,6,,6,83062,43.54911,-80.188693,0,33732,30473,58,43.549022,-80.188491,N1L1C2,Glenholm Drive,Ontario,Canada,0101000020E6100000C8258E3C100C54C06B64575A46C6...,Guelph
2,3,18611,7,,7,83063,43.551423,-80.274596,0,18611,15352,24,43.551517,-80.274639,N1H1L2,Kimberley Drive,Ontario,Canada,0101000020E610000031CD74AF931154C01118EB1B98C6...,Guelph
3,4,18598,8,,8,83064,43.551648,-80.275519,0,18598,15339,29,43.551574,-80.275429,N1H1L3,Kimberley Drive,Ontario,Canada,0101000020E6100000AED7F4A0A01154C076A911FA99C6...,Guelph
4,5,18608,9,,9,83065,43.551919,-80.273888,0,18608,15349,18,43.551918,-80.274002,N1H1L2,Kimberley Drive,Ontario,Canada,0101000020E61000007842AF3F891154C07009C03FA5C6...,Guelph


In [32]:
def remove_duplicates(df):
    # Drop duplicates where street_name AND address_number match, along with EITHER municipality OR postal code
    mask = df.duplicated(subset=['street_name', 'address_number', 'municipality'], keep='first') | \
        df.duplicated(subset=['street_name', 'address_number', 'postal_code'], keep='first')
    df_no_duplicates = df[~mask]
    return df_no_duplicates
cleaned_df = remove_duplicates(merged_df)
cleaned_df.head()

Unnamed: 0,id,address,pool,uploaded,id_pool,pool_id,lat,lon,pool_type,id_addr,address_id,address_number,lat_addr,lon_addr,postal_code,street_name,province_state,country,geom,municipality
0,1,33682,5,,5,83061,43.549425,-80.188298,0,33682,30423,46,43.549308,-80.188093,N1L1C2,Glenholm Drive,Ontario,Canada,0101000020E6100000D1E638B7090C54C031B77BB94FC6...,Guelph
1,2,33732,6,,6,83062,43.54911,-80.188693,0,33732,30473,58,43.549022,-80.188491,N1L1C2,Glenholm Drive,Ontario,Canada,0101000020E6100000C8258E3C100C54C06B64575A46C6...,Guelph
2,3,18611,7,,7,83063,43.551423,-80.274596,0,18611,15352,24,43.551517,-80.274639,N1H1L2,Kimberley Drive,Ontario,Canada,0101000020E610000031CD74AF931154C01118EB1B98C6...,Guelph
3,4,18598,8,,8,83064,43.551648,-80.275519,0,18598,15339,29,43.551574,-80.275429,N1H1L3,Kimberley Drive,Ontario,Canada,0101000020E6100000AED7F4A0A01154C076A911FA99C6...,Guelph
4,5,18608,9,,9,83065,43.551919,-80.273888,0,18608,15349,18,43.551918,-80.274002,N1H1L2,Kimberley Drive,Ontario,Canada,0101000020E61000007842AF3F891154C07009C03FA5C6...,Guelph


In [33]:
import geopandas as gpd
from shapely.geometry import box
from geopy.distance import distance
from shapely import box

def build_bbox(df):

    min_lat = df['lat'].min()
    max_lat = df['lat'].max()
    min_lon = df['lon'].min()
    max_lon = df['lon'].max()
    bbox = (min_lon, min_lat, max_lon, max_lat)

    #add a 5 km buffer to the bbox
    buffer_km = 5

    # import geopy to calculate the buffer

    bottom_left = (min_lat, min_lon)
    top_right = (max_lat, max_lon)
    bottom_left_buffered = distance(kilometers=buffer_km).destination(bottom_left, 225)  # 225 degrees is southwest
    top_right_buffered = distance(kilometers=buffer_km).destination(top_right,45)    # 45 degrees is northeast
    bbox_buffered = (bottom_left_buffered.longitude, bottom_left_buffered.latitude,
                    top_right_buffered.longitude, top_right_buffered.latitude)



    return bbox_buffered
bbox_buffered = build_bbox(cleaned_df)

In [38]:
cleaned_df.head()

Unnamed: 0,id,address,pool,uploaded,id_pool,pool_id,lat,lon,pool_type,id_addr,address_id,address_number,lat_addr,lon_addr,postal_code,street_name,province_state,country,geom,municipality
0,1,33682,5,,5,83061,43.549425,-80.188298,0,33682,30423,46,43.549308,-80.188093,N1L1C2,Glenholm Drive,Ontario,Canada,0101000020E6100000D1E638B7090C54C031B77BB94FC6...,Guelph
1,2,33732,6,,6,83062,43.54911,-80.188693,0,33732,30473,58,43.549022,-80.188491,N1L1C2,Glenholm Drive,Ontario,Canada,0101000020E6100000C8258E3C100C54C06B64575A46C6...,Guelph
2,3,18611,7,,7,83063,43.551423,-80.274596,0,18611,15352,24,43.551517,-80.274639,N1H1L2,Kimberley Drive,Ontario,Canada,0101000020E610000031CD74AF931154C01118EB1B98C6...,Guelph
3,4,18598,8,,8,83064,43.551648,-80.275519,0,18598,15339,29,43.551574,-80.275429,N1H1L3,Kimberley Drive,Ontario,Canada,0101000020E6100000AED7F4A0A01154C076A911FA99C6...,Guelph
4,5,18608,9,,9,83065,43.551919,-80.273888,0,18608,15349,18,43.551918,-80.274002,N1H1L2,Kimberley Drive,Ontario,Canada,0101000020E61000007842AF3F891154C07009C03FA5C6...,Guelph


In [None]:
import sys
sys.path.append('../')
from include.db.connections import get_master_db_connection
MASTER_DB_URL=''


master_conn = get_master_db_connection(MASTER_DB_URL)


In [36]:
query = f"""
SELECT * FROM properties WHERE ST_Intersects(geom, ST_MakeEnvelope({bbox_buffered[0]}, {bbox_buffered[1]}, {bbox_buffered[2]}, {bbox_buffered[3]}, 4326));
"""
master_addresses = gpd.read_postgis(query, master_conn, geom_col='geom')
print(f"Retrieved {len(master_addresses)} addresses from master database within bounding box.")

  df = pd.read_sql(


Retrieved 70 addresses from master database within bounding box.


  return pd.read_sql(spatial_ref_sys_sql, con)


In [37]:
master_addresses = standardize_text_fields(master_addresses)


In [39]:
def remove_duplicates(input_df, master_df):
    # Drop duplicates where street_name AND address_number match, along with EITHER municipality OR postal code in master_df
    mask = input_df.apply(
        lambda row: (
            ((master_df['street_name'] == row['street_name']) &
             (master_df['address_number'] == row['address_number']) &
             (master_df['municipality'] == row['municipality'])).any() or
            ((master_df['street_name'] == row['street_name']) &
             (master_df['address_number'] == row['address_number']) &
             (master_df['postal_code'] == row['postal_code'])).any()
        ),
        axis=1
    )
    df_no_duplicates = input_df[~mask]
    return df_no_duplicates

df_final = remove_duplicates(cleaned_df, master_addresses)
df_final.head()

Unnamed: 0,id,address,pool,uploaded,id_pool,pool_id,lat,lon,pool_type,id_addr,address_id,address_number,lat_addr,lon_addr,postal_code,street_name,province_state,country,geom,municipality
0,1,33682,5,,5,83061,43.549425,-80.188298,0,33682,30423,46,43.549308,-80.188093,N1L1C2,Glenholm Drive,Ontario,Canada,0101000020E6100000D1E638B7090C54C031B77BB94FC6...,Guelph
1,2,33732,6,,6,83062,43.54911,-80.188693,0,33732,30473,58,43.549022,-80.188491,N1L1C2,Glenholm Drive,Ontario,Canada,0101000020E6100000C8258E3C100C54C06B64575A46C6...,Guelph
2,3,18611,7,,7,83063,43.551423,-80.274596,0,18611,15352,24,43.551517,-80.274639,N1H1L2,Kimberley Drive,Ontario,Canada,0101000020E610000031CD74AF931154C01118EB1B98C6...,Guelph
3,4,18598,8,,8,83064,43.551648,-80.275519,0,18598,15339,29,43.551574,-80.275429,N1H1L3,Kimberley Drive,Ontario,Canada,0101000020E6100000AED7F4A0A01154C076A911FA99C6...,Guelph
4,5,18608,9,,9,83065,43.551919,-80.273888,0,18608,15349,18,43.551918,-80.274002,N1H1L2,Kimberley Drive,Ontario,Canada,0101000020E61000007842AF3F891154C07009C03FA5C6...,Guelph


In [61]:
assigment_ids_to_be_upserted = df_final['id'].tolist()
# we will use these later to update the stage db.

# now let's make an address_id column with unique int ids (generated using time hash)
import time
import hashlib
import secrets

MAX_BIGINT = 2**63 - 1  # PostgreSQL signed BIGINT max

def generate_random_bigint() -> int:
    return secrets.randbelow(MAX_BIGINT)

def generate_time_based_id():
    return generate_random_bigint()
# Generate unique address_ids for each row
df_final['address_id'] = [generate_time_based_id() for _ in range(len(df_final))]
df_final[['address_id', 'street_name', 'address_number', 'municipality']].head()

Unnamed: 0,address_id,street_name,address_number,municipality
0,1481679809065298666,Glenholm Drive,46,Guelph
1,8189952701791716537,Glenholm Drive,58,Guelph
2,5555287100411562606,Kimberley Drive,24,Guelph
3,1859949693545033835,Kimberley Drive,29,Guelph
4,5871583080725510033,Kimberley Drive,18,Guelph


In [49]:
POOL_COLUMNS = ['address_id', 'pool_type']
pool_df = df_final[POOL_COLUMNS].copy()
address_df = df_final.drop(columns='pool_type')
address_df.head()

Unnamed: 0,id,address,pool,uploaded,id_pool,pool_id,lat,lon,id_addr,address_id,address_number,lat_addr,lon_addr,postal_code,street_name,province_state,country,geom,municipality
0,1,33682,5,,5,83061,43.549425,-80.188298,33682,8985157874523924763,46,43.549308,-80.188093,N1L1C2,Glenholm Drive,Ontario,Canada,0101000020E6100000D1E638B7090C54C031B77BB94FC6...,Guelph
1,2,33732,6,,6,83062,43.54911,-80.188693,33732,1949674999700719292,58,43.549022,-80.188491,N1L1C2,Glenholm Drive,Ontario,Canada,0101000020E6100000C8258E3C100C54C06B64575A46C6...,Guelph
2,3,18611,7,,7,83063,43.551423,-80.274596,18611,3638693555836031693,24,43.551517,-80.274639,N1H1L2,Kimberley Drive,Ontario,Canada,0101000020E610000031CD74AF931154C01118EB1B98C6...,Guelph
3,4,18598,8,,8,83064,43.551648,-80.275519,18598,2786242187361959719,29,43.551574,-80.275429,N1H1L3,Kimberley Drive,Ontario,Canada,0101000020E6100000AED7F4A0A01154C076A911FA99C6...,Guelph
4,5,18608,9,,9,83065,43.551919,-80.273888,18608,8136990299224034324,18,43.551918,-80.274002,N1H1L2,Kimberley Drive,Ontario,Canada,0101000020E61000007842AF3F891154C07009C03FA5C6...,Guelph


In [50]:
address_df.columns

Index(['id', 'address', 'pool', 'uploaded', 'id_pool', 'pool_id', 'lat', 'lon',
       'id_addr', 'address_id', 'address_number', 'lat_addr', 'lon_addr',
       'postal_code', 'street_name', 'province_state', 'country', 'geom',
       'municipality'],
      dtype='object')

In [51]:
cols_to_keep = [ 'address_id', 'address_number', 'lat_addr', 'lon_addr',
       'postal_code', 'street_name', 'province_state', 'country', 'geom',
       'municipality']

address_df = address_df[cols_to_keep]

#rename lat_addr to lat, and lon_addr to lon for the master db schema
address_df.rename(columns={'lat_addr': 'lat', 'lon_addr': 'lon'}, inplace=True)
address_df.head()

Unnamed: 0,address_id,address_number,lat,lon,postal_code,street_name,province_state,country,geom,municipality
0,8985157874523924763,46,43.549308,-80.188093,N1L1C2,Glenholm Drive,Ontario,Canada,0101000020E6100000D1E638B7090C54C031B77BB94FC6...,Guelph
1,1949674999700719292,58,43.549022,-80.188491,N1L1C2,Glenholm Drive,Ontario,Canada,0101000020E6100000C8258E3C100C54C06B64575A46C6...,Guelph
2,3638693555836031693,24,43.551517,-80.274639,N1H1L2,Kimberley Drive,Ontario,Canada,0101000020E610000031CD74AF931154C01118EB1B98C6...,Guelph
3,2786242187361959719,29,43.551574,-80.275429,N1H1L3,Kimberley Drive,Ontario,Canada,0101000020E6100000AED7F4A0A01154C076A911FA99C6...,Guelph
4,8136990299224034324,18,43.551918,-80.274002,N1H1L2,Kimberley Drive,Ontario,Canada,0101000020E61000007842AF3F891154C07009C03FA5C6...,Guelph


In [52]:

# Prepare the insert statement
insert_query = """
INSERT INTO properties (address_id, address_number, country, lat, lon, postal_code, 
                        street_name, municipality, province_state, geom)
VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s, ST_GeogFromText(%s))
"""
master_conn = get_master_db_connection(MASTER_DB_URL)
try:
    cursor = master_conn.cursor()
    rows_inserted = 0
    
    for idx, row in address_df.iterrows():
        # Create WKT representation of the point
        wkt = f"POINT({row['lon']} {row['lat']})"
        
        values = (
            int(row['address_id']),
            row['address_number'],
            row['country'],
            float(row['lat']),
            float(row['lon']),
            row['postal_code'],
            row['street_name'],
            row['municipality'],
            row['province_state'],
            wkt
        )
        
        cursor.execute(insert_query, values)
        rows_inserted += 1
    
    master_conn.commit()
    cursor.close()
    print(f"Successfully uploaded {rows_inserted} addresses to properties table.")
    
except Exception as e:
    master_conn.rollback()
    print(f"Error uploading addresses: {e}")
    raise


Successfully uploaded 531 addresses to properties table.


In [54]:
change_uploaded_state = """
UPDATE assignment
SET uploaded = TRUE
WHERE id IN %s;
"""

stage_conn = psycopg2.connect(POOL_STAGE_DB_URL)
try:
    cursor = stage_conn.cursor()
    cursor.execute(change_uploaded_state, (tuple(assigment_ids_to_be_upserted),))
    stage_conn.commit()
    cursor.close()
    print(f"Successfully updated uploaded state for {len(assigment_ids_to_be_upserted)} assignments.")
except Exception as e:
    stage_conn.rollback()
    print(f"Error updating uploaded state: {e}")
    raise
finally:
    stage_conn.close()

Successfully updated uploaded state for 531 assignments.


In [66]:
# Step 2: Get the property UUIDs for the address_ids we just inserted
address_ids = pool_df['address_id'].tolist()

# Query to get id (UUID) and address_id from properties table
query = """
SELECT id, address_id 
FROM properties 
WHERE address_id = ANY(%s)
"""
conn = get_master_db_connection(MASTER_DB_URL)
cursor = conn.cursor()
cursor.execute(query, (address_ids,))
property_mappings = cursor.fetchall()
cursor.close()

# Create a mapping dict: address_id -> property_id (UUID)
address_to_property = {row[1]: row[0] for row in property_mappings}

# Add property_id column to pool_df
pool_df['property_id'] = pool_df['address_id'].map(address_to_property)

# Check if any addresses didn't get mapped
unmapped = pool_df[pool_df['property_id'].isnull()]
if len(unmapped) > 0:
    print(f"Warning: {len(unmapped)} pools couldn't be mapped to properties")
    print(unmapped)

print(f"Mapped {len(pool_df[pool_df['property_id'].notnull()])} pools to properties")
pool_df.head()


Mapped 531 pools to properties


Unnamed: 0,address_id,pool_type,property_id,pool_id
0,8985157874523924763,0,b557b67a-714f-4d06-8d3c-c836c029f486,6725440030533470637
1,1949674999700719292,0,ae7ba23b-9190-49a6-9648-2054e4785597,6725440030533470637
2,3638693555836031693,0,caf7ca46-c37c-4e3f-ad72-3e3feb576405,6725440030533470637
3,2786242187361959719,0,73e367fb-8a5c-4eeb-a593-36b4b2df9bdd,6725440030533470637
4,8136990299224034324,0,6fbe9026-4d93-4b35-adb4-b4fe3b8b3e71,6725440030533470637


In [69]:
# Step 3: Upload pools to database
# Filter out any unmapped pools
pool_df_upload = pool_df[pool_df['property_id'].notnull()].copy()

# Insert pools
insert_query = """
INSERT INTO pools ( property_id, pool_type)
VALUES ( %s, %s)
"""

try:
    cursor = conn.cursor()
    pools_inserted = 0
    
    for idx, row in pool_df_upload.iterrows():
        values = (
            row['property_id'],  # This is already a UUID string
            row['pool_type'] if pd.notna(row['pool_type']) else None
        )
        
        cursor.execute(insert_query, values)
        pools_inserted += 1
    
    conn.commit()
    cursor.close()
    print(f"Successfully uploaded {pools_inserted} pools to database.")
    
except Exception as e:
    conn.rollback()
    print(f"Error uploading pools: {e}")
    raise


Successfully uploaded 531 pools to database.
