## Imports

In [None]:
import geopandas as gpd
import pandas as pd
from shapely.geometry import shape, MultiPolygon, Polygon
import os
from supabase import create_client, Client
from dotenv import load_dotenv
import psycopg

## Processing

### Reading shape files

In [None]:
admin0 = gpd.read_file("Shapefiles\\pak_admbnda_adm0_wfp_20220909.shp")
admin1 = gpd.read_file("Shapefiles\\pak_admbnda_adm1_wfp_20220909.shp")
admin2 = gpd.read_file("Shapefiles\\pak_admbnda_adm2_wfp_20220909.shp")
admin3 = gpd.read_file("Shapefiles\\pak_admbnda_adm3_wfp_20220909.shp")

print("Admin Level 0 shape:", admin0.shape)
print("Admin Level 1 shape:", admin1.shape)
print("Admin Level 2 shape:", admin2.shape)
print("Admin Level 3 shape:", admin3.shape)

### Transformation to EPSG:4326

In [3]:
if admin0.crs != "EPSG:4326":
    admin0 = admin0.to_crs("EPSG:4326")
if admin1.crs != "EPSG:4326":
    admin1 = admin1.to_crs("EPSG:4326")
if admin2.crs != "EPSG:4326":
    admin2 = admin2.to_crs("EPSG:4326")
if admin3.crs != "EPSG:4326":
    admin3 = admin3.to_crs("EPSG:4326")

### Multipolygon to Polygon

In [None]:
def convert_to_polygon(geom):
    # Convert MultiPolygon to Polygon by taking the largest polygon
    if isinstance(geom, MultiPolygon):
        # Return the largest polygon
        return max(geom.geoms, key=lambda p: p.area)
    return geom

# Apply conversion to all levels
admin0['geometry'] = admin0['geometry'].apply(convert_to_polygon)
admin1['geometry'] = admin1['geometry'].apply(convert_to_polygon)
admin2['geometry'] = admin2['geometry'].apply(convert_to_polygon)
admin3['geometry'] = admin3['geometry'].apply(convert_to_polygon)

print("Geometry conversion complete")

### Shapefile structure

In [None]:
print(admin0.columns.to_list())
print("====================================")
print(admin0.head(2))

### Transform to Dataframe

In [11]:
# Function to transfor the different shapefiles into a dataframe for the database
def transform_shape(gdf, level):
    transformed_data = []
    
    for idx, row in gdf.iterrows():
        name = row[f"ADM{level}_EN"]
        geometry = row["geometry"].wkt

        # Get parent's name too
        if level > 0:
            parent_name = row[f"ADM{level-1}_EN"]
        else:
            parent_name = None
        
        transformed_data.append({
            'name': name,
            'parent_name': parent_name,
            'hierarchy_level': level,
            'polygon': geometry
        })
    return pd.DataFrame(transformed_data)

places_0 = transform_shape(admin0, level=0)
places_1 = transform_shape(admin1, level=1)
places_2 = transform_shape(admin2, level=2)
places_3 = transform_shape(admin3, level=3)

all_places = pd.concat([places_0, places_1, places_2, places_3], ignore_index=True)

In [None]:
print(f"Total places: {len(all_places)}")
print(f"Level 0: {len(places_0)}")
print(f"Level 1: {len(places_1)}")
print(f"Level 2: {len(places_2)}")
print(f"Level 3: {len(places_3)}")

# Preview the data
print("\nSample of combined data:")
print(all_places.head(20))

## Uploading to Supabase

### Connection

In [None]:
load_dotenv('../local.env', override=True)

SUPABASE_URL = os.getenv("SUPABASE_URL")
SUPABASE_KEY = os.getenv("SUPABASE_KEY")
SUPABASE_DB_URL = os.getenv("SUPABASE_DB_URL")

supabase: Client = create_client(SUPABASE_URL, SUPABASE_KEY)
conn = psycopg.connect(SUPABASE_DB_URL)
cur = conn.cursor()
print("Connected to Supabase")

### Insertion

In [None]:
from io import StringIO

# Prepare data for COPY
output = StringIO()
for row in all_places.itertuples(index=False):
    parent = row.parent_name if pd.notna(row.parent_name) else '\\N'
    output.write(f"{row.name}\t{parent}\t{row.hierarchy_level}\tSRID=4326;{row.polygon}\n")

output.seek(0)

print(f"Copying {len(all_places)} records...")

with cur.copy("COPY places (name, parent_name, hierarchy_level, polygon) FROM STDIN") as copy:
    copy.write(output.getvalue())

conn.commit()
print("Copy successful")
cur.close()
conn.close()