In [None]:
'''
This code is meant to combine multiple APN (Assessor Parcel Number) datasets into a single cohesive dataset for analysis and reporting purposes.
It is meant for data analysts and GIS professionals who need to work with parcel data from various sources.
It is a rough draft and may require further refinement and testing before use in a production environment.

Thank you for your understanding and collaboration.

-Andrew Sajor
'''

In [None]:
import os
import pdfplumber
import pandas as pd
# This script scans a specified folder for PDF files, extracts tables from each PDF using pdfplumber, and saves them as CSV files in a designated output folder.
# --- Configuration ---
# Folder containing your PDF files to process!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
pdf_input_folder = r"C:\GIS\APN_Data\PDF_Input"

# Folder where the new CSV files will be saved !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
csv_output_folder = r"C:\GIS\APN_Data\CSV_Output"

# --- Main Script ---
print(f"Scanning for PDFs in: {pdf_input_folder}")

if not os.path.exists(csv_output_folder):
    os.makedirs(csv_output_folder)
    print(f"Created output folder: {csv_output_folder}")

for filename in os.listdir(pdf_input_folder):
    if filename.lower().endswith('.pdf'):
        pdf_path = os.path.join(pdf_input_folder, filename)
        print(f"  > Processing: {filename}")
        
        try:
            with pdfplumber.open(pdf_path) as pdf:
                # Loop through all pages to find tables
                for i, page in enumerate(pdf.pages):
                    table = page.extract_table()
                    if table:
                        # Convert table data to a DataFrame
                        df = pd.DataFrame(table[1:], columns=table[0])
                        
                        # Create a new name for the output CSV
                        csv_filename = f"{os.path.splitext(filename)[0]}_page_{i+1}.csv"
                        output_path = os.path.join(csv_output_folder, csv_filename)
                        
                        # Save the DataFrame to a CSV file
                        df.to_csv(output_path, index=False)
                        print(f"    - Found table on page {i+1}, saved to {csv_filename}")
        except Exception as e:
            print(f"    - FAILED to process {filename}. Error: {e}")

print("\nPDF to CSV conversion complete.")

Scanning for PDFs in: C:\GIS\APN_Data\PDF_Input
  > Processing: 147-076-15_Merger_9-6-24_SH.pdf
    - FAILED to process 147-076-15_Merger_9-6-24_SH.pdf. Error: [Errno 13] Permission denied: 'C:\\GIS\\APN_Data\\CSV_Output\\147-076-15_Merger_9-6-24_SH_page_1.csv'
  > Processing: 151-201-10_Combine_10-10-24_RMN.pdf
    - Found table on page 1, saved to 151-201-10_Combine_10-10-24_RMN_page_1.csv
  > Processing: Existing APN.pdf
    - Found table on page 1, saved to Existing APN_page_1.csv

PDF to CSV conversion complete.


In [None]:
import os
import pandas as pd
import arcpy
import logging
import time
# This script updates APN fields in ArcGIS feature layers based on data compiled from CSV files.
# --- 1. SETUP LOGGING ---
# Create a unique log file name with a timestamp
log_filename = f"apn_update_log_{time.strftime('%Y%m%d-%H%M%S')}.log"

# Configure logging to output to both the console and a file
logging.basicConfig(
    level=logging.INFO,
    format='%(asctime)s - %(levelname)s - %(message)s',
    handlers=[
        logging.FileHandler(log_filename, mode='w'), # 'w' overwrites the file each time
        logging.StreamHandler() # This will print to the console
    ]
)

logging.info("Script started.")

# --- Configuration ---
#You can change these paths and layer names as needed.!!!!!!!!!!!!!!!
# Folder containing the CSV files with APN updates!!!!!!!!!!!!!!!!!!!
csv_input_folder = r"C:\GIS\APN_Data\CSV_Input"
# Path to the ArcGIS Pro project file
# Ensure this path points to your actual ArcGIS Pro project file!!!!!!!!!!!!!
project_path = r"C:\GIS\APN_Data\YourProject.aprx"
# Name of the map within the ArcGIS Pro project!!!!!!!!!!!!!!!!!!!!!!!!!!!!
# Ensure this matches the name of the map in your project!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
map_name = "Map"
# Layer names and field names for APN updates
# Ensure these match the names in your ArcGIS Pro project!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
parcels_layer_name = "Parcels"
parcels_apn_text_field = "ApnText"
parcels_apn_numeric_field = "apn"
# Layer names and field names for ADDRESSES
# Ensure these match the names in your ArcGIS Pro project!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
addresses_layer_name = "Addresses"
addresses_apn_text_field = "ApnText"
addresses_apn_numeric_field = "apn"

# --- Path and Folder Validation ---
if not os.path.exists(csv_input_folder):
    logging.critical(f"FATAL ERROR: Input folder not found at: {csv_input_folder}")
    exit()

if not os.path.exists(project_path):
    logging.critical(f"FATAL ERROR: ArcGIS Pro project not found at: {project_path}")
    exit()

# --- Reusable function to update a layer's APN fields ---
def update_layer_apns(map_obj, layer_name, text_field, numeric_field, update_dictionary):
    logging.info("-" * 50)
    logging.info(f"Processing Layer: {layer_name}")
    try:
        lyr = map_obj.listLayers(layer_name)[0]
        logging.info(f"  > Successfully accessed layer.")

        # (Field validation code remains the same...)
        
        datasource_path = lyr.dataSource
        workspace = os.path.dirname(datasource_path)
        update_count = 0
        with arcpy.da.Editor(workspace):
            # Request the OID field to help with logging errors
            with arcpy.da.UpdateCursor(lyr, [text_field, numeric_field, "OID@"]) as cursor:
                for row in cursor:
                    current_apn_text = row[0]
                    if current_apn_text and current_apn_text in update_dictionary:
                        new_apn_str = update_dictionary[current_apn_text]
                        
                        # --- FIX IS HERE ---
                        try:
                            # 1. Assign the string to the text field
                            row[0] = new_apn_str
                            
                            # 2. Convert to integer and assign to the numeric field
                            row[1] = int(new_apn_str) 
                            
                            cursor.updateRow(row)
                            update_count += 1
                        except (ValueError, TypeError):
                            # This will catch errors if new_apn_str is not a valid number
                            logging.warning(f"  > Could not update feature OID {row[2]}. APN '{new_apn_str}' is not a valid number.")
                        # --- END FIX ---
                        
        logging.info(f"  > Update complete. {update_count} features updated in '{layer_name}'.")
        return update_count
    except IndexError:
        logging.error(f"  > Layer named '{layer_name}' not found in the map.")
        return 0
    except Exception:
        logging.exception(f"  > An unexpected error occurred while processing layer '{layer_name}':")
        return 0

# --- 2. COMPILE DATA FROM CSV FILES ---
logging.info("--- Step 2: Scanning folder and compiling CSV data ---")
all_dataframes = []
for filename in os.listdir(csv_input_folder):
    if filename.lower().endswith('.csv'):
        full_path = os.path.join(csv_input_folder, filename)
        try:
            temp_df = pd.read_csv(full_path)
            if temp_df.empty:
                logging.warning(f"  > Skipping empty file: {filename}")
                continue

            possible_old_cols = ['Existing APN (FROM)', 'Existing APN\n(FROM)', 'Existing']
            possible_new_cols = ['New APN (TO)', 'New APN']
            
            old_col_found = next((col for col in possible_old_cols if col in temp_df.columns), None)
            new_col_found = next((col for col in possible_new_cols if col in temp_df.columns), None)

            if old_col_found and new_col_found:
                temp_df.rename(columns={old_col_found: 'Old_APN', new_col_found: 'New_APN'}, inplace=True)
                if 'New_APN' in temp_df.columns:
                    temp_df['New_APN'] = temp_df['New_APN'].ffill()
                temp_df.dropna(subset=['Old_APN'], inplace=True)
                
                temp_df['Old_APN'] = temp_df['Old_APN'].astype(str).str.replace('-', '', regex=False).str.strip()
                temp_df['New_APN'] = temp_df['New_APN'].astype(str).str.replace('-', '', regex=False).str.strip()
                
                final_df_chunk = temp_df[temp_df['Old_APN'].str.isnumeric() & temp_df['New_APN'].str.isnumeric()].copy()
                all_dataframes.append(final_df_chunk[['Old_APN', 'New_APN']])
            else:
                logging.warning(f"  > Skipping file '{filename}' due to missing required APN columns.")

        except Exception:
            logging.exception(f"  > Failed to process {filename}:")

if all_dataframes:
    master_update_df = pd.concat(all_dataframes, ignore_index=True).drop_duplicates()
    logging.info(f"\nSuccessfully compiled a master list of {len(master_update_df)} total unique updates.")
else:
    master_update_df = None

# --- 3. UPDATE ARCGIS FEATURE LAYERS ---
if master_update_df is not None and not master_update_df.empty:
    logging.info("\n--- Step 3: Updating ArcGIS Feature Layers ---")
    try:
        update_dict = pd.Series(master_update_df.New_APN.values, index=master_update_df.Old_APN).to_dict()
        aprx = arcpy.mp.ArcGISProject(project_path)
        m = aprx.listMaps(map_name)[0]
        update_layer_apns(m, parcels_layer_name, parcels_apn_text_field, parcels_apn_numeric_field, update_dict)
        update_layer_apns(m, addresses_layer_name, addresses_apn_text_field, addresses_apn_numeric_field, update_dict)
        logging.info("\nProcessing Complete.")
    except Exception:
        logging.exception("A critical error occurred during the main ArcGIS update process:")
else:
    logging.warning("\nSkipping GIS update because no valid data was compiled.")

logging.info("Script finished.")

2025-08-14 13:43:07,906 - INFO - Script started.
2025-08-14 13:43:07,908 - INFO - --- Step 2: Scanning folder and compiling CSV data ---
2025-08-14 13:43:07,932 - INFO - 
Successfully compiled a master list of 7 total unique updates.
2025-08-14 13:43:07,933 - INFO - 
--- Step 3: Updating ArcGIS Feature Layers ---
2025-08-14 13:43:07,953 - INFO - --------------------------------------------------
2025-08-14 13:43:07,954 - INFO - Processing Layer: BasicStocktonParcels
2025-08-14 13:43:07,968 - INFO -   > Successfully accessed layer.
2025-08-14 13:43:09,076 - INFO -   > Update complete. 7 features updated in 'BasicStocktonParcels'.
2025-08-14 13:43:09,077 - INFO - --------------------------------------------------
2025-08-14 13:43:09,077 - INFO - Processing Layer: Addresses
2025-08-14 13:43:09,078 - INFO -   > Successfully accessed layer.
2025-08-14 13:43:10,116 - INFO -   > Update complete. 9 features updated in 'Addresses'.
2025-08-14 13:43:10,117 - INFO - 
Processing Complete.
2025-08-