In [None]:
# Version: 1.0 to Github
import tkinter as tk
from tkinter import filedialog, messagebox
import geopandas as gpd
import pandas as pd
import os
from urllib.parse import unquote  # Fix for URL-encoded file paths
from arcgis.gis import GIS
from arcgis.geoenrichment import enrich
from arcgis.features import GeoAccessor
from arcgis.geoenrichment import Country

# **🔹 ArcGIS Authentication**
ARCGIS_API_KEY = "your_arcgis_api
gis = GIS(api_key=ARCGIS_API_KEY)

if gis.users.me is None:
    print("You are authenticated as an anonymous user (likely via API key).")
else:
    print(f"Logged in as: {gis.users.me.username}")

# Initialize country object for enrichment
country = Country("usa", gis=gis)

# **🔹 Function to upload Shapefile**
def upload_shapefile():
    global shapefile_path
    shapefile_path = filedialog.askopenfilename(filetypes=[("Shapefile", "*.shp")])

    if shapefile_path:
        shapefile_path = unquote(os.path.abspath(shapefile_path))  # Fix URL encoding & ensure absolute path
        
        # **Ensure required files exist**
        shp_dir = os.path.dirname(shapefile_path)
        base_name = os.path.splitext(os.path.basename(shapefile_path))[0]
        required_extensions = [".shp", ".dbf", ".shx"]

        missing_files = [
            ext for ext in required_extensions if not os.path.exists(os.path.join(shp_dir, base_name + ext))
        ]

        if missing_files:
            messagebox.showerror("Error", f"Missing required files: {', '.join(missing_files)} in {shp_dir}")
            return

        lbl_shapefile.config(text=f"Shapefile: {os.path.basename(shapefile_path)}")


# clean shp
def clean_shapefile(gdf):
    """Reprojects shapefile to EPSG 4326 and validates spatial data."""
    if gdf.crs is None or gdf.crs.to_epsg() != 4326:  # Fix CRS check
        gdf = gdf.to_crs(epsg=4326)
        # gdf['geometry'] = gdf['geometry'].buffer(0)
        print("✅ Reprojected to EPSG 4326 (WGS 84).")

    # Convert GeoDataFrame to ArcGIS Spatially Enabled DataFrame
    spatial_df = GeoAccessor.from_geodataframe(gdf)

    # ✅ Ensure 'SHAPE' is set as the spatial column
    if "SHAPE" not in spatial_df.columns:
        spatial_df.spatial.set_geometry("geometry", inplace=True)
        print("✅ Spatial column set to 'SHAPE'.")

    # Validate Spatial Data
    if spatial_df.spatial.validate():
        print("✅ Spatial data is valid.")
    else:
        print("⚠ Warning: Spatial data may be invalid.")
    
    return spatial_df  # ✅ Return the ArcGIS-compatible dataframe

# **🔹 Function to upload variable list**
def upload_variables():
    global variables_path
    variables_path = filedialog.askopenfilename(filetypes=[("CSV files", "*.csv"), ("Excel files", "*.xlsx")])
    lbl_variables.config(text=f"Variables List: {os.path.basename(variables_path)}")
    if variables_path:
        variables_path = unquote(os.path.abspath(variables_path))  # Fix URL encoding & ensure absolute path
        lbl_variables.config(text=f"Variables List: {os.path.basename(variables_path)}")

# Function to preprocess variables list
def preprocess_variables(file_path):
    """Extracts variables from CSV/Excel and prepares them for enrichment."""
    if file_path.endswith(".csv"):
        esri_ba = pd.read_csv(file_path)
    else:
        esri_ba = pd.read_excel(file_path)

    # Extract variables and descriptions
    variable_to_description = dict(zip(esri_ba["name"], esri_ba["alias"]))
    variables = list(variable_to_description.keys())
    variable_to_description_lower = {k.lower(): v for k, v in variable_to_description.items()}

    return variables, variable_to_description, variable_to_description_lower

# **🔹 Global Variables**
shapefile_path = ""
variables_path = ""


# **🔹 Function to run enrichment**
def run_enrichment():
    if not shapefile_path or not variables_path:
        messagebox.showerror("Error", "Please upload both shapefile and variables list")
        return

    try:
        if not os.path.exists(shapefile_path):
            messagebox.showerror("Error", f"Shapefile not found: {shapefile_path}")
            return

        print("Reading shapefile...")
        gdf = gpd.read_file(shapefile_path)

        print("Cleaning shapefile...")
        gdf = clean_shapefile(gdf)

        print("Preprocessing variables...")
        selected_variables, variable_to_description, variable_to_description_lower = preprocess_variables(variables_path)

        print("Selected Variables:", selected_variables)
        if not selected_variables:
            messagebox.showerror("Error", "Selected variables list is empty.")
            return

        print("Running enrichment...")
        try:
            enriched_data = country.enrich(gdf, variables=selected_variables)
        except Exception as enrich_error:
            print(f"❌ Enrichment Exception: {enrich_error}")
            raise enrich_error  # Re-raise to be caught by the outer try-except

        print("Renaming columns...")
        enriched_data2 = enriched_data.rename(columns=variable_to_description_lower)

        output_folder = filedialog.askdirectory(title="Select Output Folder (Please create one for storing Shp and Excel files)")
        if not output_folder:
            messagebox.showerror("Error", "Please select a valid output directory.")
            return

        shp_file_path = os.path.join(output_folder, "enriched_shapefile.shp")
        enriched_data.spatial.to_featureclass(shp_file_path)  # Save as .shp

        excel_file_path = os.path.join(output_folder, "enriched_data.xlsx")
        enriched_data2.drop(columns=['SHAPE']).to_excel(excel_file_path, index=False)  # Save as .xlsx

        messagebox.showinfo("Success", f"Enrichment completed!\nShapefile saved at {shp_file_path}\nExcel file saved at {excel_file_path}")
        lbl_status.config(text="Enrichment completed!")

    except Exception as e:
        messagebox.showerror("Error", f"An error occurred during enrichment: {e}")
        print(f"❌ Enrichment Error: {e}")


# **🔹 Tkinter GUI Setup**
root = tk.Tk()
root.title("ArcGIS Enrichment Tool")
root.geometry("400x300")

# **UI Components**
btn_shapefile = tk.Button(root, text="Upload Shapefile (.shp)", command=upload_shapefile)
btn_shapefile.pack(pady=5)

lbl_shapefile = tk.Label(root, text="No file selected", fg="gray")
lbl_shapefile.pack()

btn_variables = tk.Button(root, text="Upload Variables List (.csv)", command=upload_variables)
btn_variables.pack(pady=5)

lbl_variables = tk.Label(root, text="No file selected", fg="gray")
lbl_variables.pack()

btn_run = tk.Button(root, text="Run Enrichment", command=run_enrichment, bg="green", fg="white")
btn_run.pack(pady=10)

lbl_status = tk.Label(root, text="", fg="blue")
lbl_status.pack()

# **Run the Tkinter loop**
root.mainloop()


You are authenticated as an anonymous user (likely via API key).
Reading shapefile...
Cleaning shapefile...
✅ Reprojected to EPSG 4326 (WGS 84).
✅ Spatial data is valid.
Preprocessing variables...
Selected Variables: ['TOTPOP_CY', 'ACSTOTHU', 'MEDAGE_CY', 'BACHDEG_CY']
Running enrichment...
❌ Enrichment Exception: list index out of range
❌ Enrichment Error: list index out of range
