# Foundation Setup - Data Preparation Phase

**Purpose**: Create TMK foundation layer with wells distance data  
**Input**: Municipal and domestic wells shapefiles from Dr. Shuler's GIS portal  
**Output**: TMK_Foundation_Master in Outputs/Foundation/

## Academic Methodology
This implements the first stage of our three-part analytical framework:
1. **Characteristics Compilation** ← WE ARE HERE
2. Suitability Matrix Analysis 
3. Site-Specific Technology Matching

## HAR 11-62 Compliance Focus
- Municipal wells: 1000-foot setback requirement
- Domestic wells: 1000-foot setback requirement  
- Single-family residential scope only
- Maximum 5 bedrooms per IWS application

## Key Concepts from Previous Work
- **JOIN_LOG tracking** for processing reproducibility
- **Modular design** allowing independent execution
- **Quality assurance** built into each step
- **TMK foundation approach** for all subsequent joins

In [None]:
# Academic Framework Configuration
import arcpy
import os
from datetime import datetime
from pathlib import Path

print("Hawaii Cesspool Matrix Analysis - Academic Framework")
print("Phase 1: Data Preparation - Foundation Setup")
print(f"Started: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}")

# Project configuration - academic approach
project_root = Path(r"C:\Users\rober\OneDrive\Documents\GIS_Projects\ParcelAnalysis")
gdb_path = project_root / "ParcelAnalysis.gdb"
data_folder = project_root / "data" / "gis_downloads" / "wells" / "statewide"
outputs_folder = project_root / "Outputs"

# Set ArcGIS environment
arcpy.env.workspace = str(gdb_path)
arcpy.env.overwriteOutput = True
arcpy.env.outputCoordinateSystem = "PROJCS['NAD_1983_UTM_Zone_4N']"

print(f"✅ Working geodatabase: {gdb_path}")
print(f"✅ Data source: {data_folder}")
print(f"✅ Coordinate system: NAD 1983 UTM Zone 4N (Hawaii standard)")

In [None]:
# Step 1: Import and validate wells data
print("\n" + "="*50)
print("STEP 1: IMPORT AND VALIDATE WELLS DATA")
print("="*50)

# Input shapefiles
municipal_wells_shp = data_folder / "CPs_Distance_to_Municipal_Wells.shp"
domestic_wells_shp = data_folder / "CPs_Distance_to_Domestic_Wells.shp"

# Validate inputs exist
if not municipal_wells_shp.exists():
    raise FileNotFoundError(f"Municipal wells shapefile not found: {municipal_wells_shp}")
if not domestic_wells_shp.exists():
    raise FileNotFoundError(f"Domestic wells shapefile not found: {domestic_wells_shp}")

print(f"✅ Municipal wells found: {municipal_wells_shp.name}")
print(f"✅ Domestic wells found: {domestic_wells_shp.name}")

# Import to geodatabase
municipal_fc = "Municipal_Wells_Distance"
domestic_fc = "Domestic_Wells_Distance"

arcpy.management.CopyFeatures(str(municipal_wells_shp), municipal_fc)
arcpy.management.CopyFeatures(str(domestic_wells_shp), domestic_fc)

# Validate import
muni_count = int(arcpy.management.GetCount(municipal_fc)[0])
domestic_count = int(arcpy.management.GetCount(domestic_fc)[0])

print(f"✅ Imported municipal wells: {muni_count:,} records")
print(f"✅ Imported domestic wells: {domestic_count:,} records")

In [None]:
# Step 2: Create foundation structure
print("\n" + "="*50)
print("STEP 2: CREATE FOUNDATION STRUCTURE")
print("="*50)

# Create outputs folder structure
foundation_folder = outputs_folder / "Foundation"
foundation_folder.mkdir(exist_ok=True)
print(f"✅ Created foundation folder: {foundation_folder}")

# Create foundation layer (start with municipal wells as base)
foundation_name = "TMK_Foundation_Master"
foundation_path = foundation_folder / f"{foundation_name}.shp"

# Copy municipal wells as foundation
arcpy.management.CopyFeatures(municipal_fc, str(foundation_path))
print(f"✅ Created foundation shapefile: {foundation_path.name}")

# Examine field structure
foundation_fields = [f.name for f in arcpy.ListFields(str(foundation_path))]
print(f"Foundation fields ({len(foundation_fields)}): {foundation_fields[:10]}...")  # Show first 10

# Find TMK field
tmk_candidates = ['TMK', 'TMK9', 'TMK_txt']
tmk_field = None
for candidate in tmk_candidates:
    if candidate in foundation_fields:
        tmk_field = candidate
        break

if not tmk_field:
    print(f"❌ ERROR: No TMK field found in foundation data")
    print(f"Available fields: {foundation_fields}")
    raise ValueError("Cannot identify TMK field")

print(f"✅ TMK field identified: {tmk_field}")

In [None]:
# Step 3: Add tracking and processing fields
print("\n" + "="*50)
print("STEP 3: ADD TRACKING AND PROCESSING FIELDS")
print("="*50)

# Define additional fields for Matrix processing
processing_fields = [
    ("JOIN_LOG", "TEXT", 255, "Processing sequence tracking"),
    ("SOIL_STATUS", "TEXT", 20, "Soil data join status"),
    ("SLOPE_CLASS", "TEXT", 15, "HAR 11-62 slope classification"),
    ("PERC_RATE", "DOUBLE", None, "Soil percolation rate (min/inch)"),
    ("REGULATORY_FLAGS", "TEXT", 100, "SMA, flood zone, etc."),
    ("MATRIX_READY", "SHORT", None, "Ready for Matrix analysis (1/0)")
]

# Add fields to foundation
for field_name, field_type, field_length, field_alias in processing_fields:
    try:
        if field_length:
            arcpy.management.AddField(
                str(foundation_path), field_name, field_type,
                field_length=field_length, field_alias=field_alias
            )
        else:
            arcpy.management.AddField(
                str(foundation_path), field_name, field_type,
                field_alias=field_alias
            )
        print(f"  ✅ Added field: {field_name}")
    except Exception as e:
        print(f"  ⚠️ Could not add {field_name}: {e}")

print(f"✅ Foundation structure enhanced for Matrix processing")

In [None]:
# Step 4: Join domestic wells data
print("\n" + "="*50)
print("STEP 4: JOIN DOMESTIC WELLS DATA")
print("="*50)

# Find TMK field in domestic wells
domestic_fields = [f.name for f in arcpy.ListFields(domestic_fc)]
domestic_tmk = None
for candidate in tmk_candidates:
    if candidate in domestic_fields:
        domestic_tmk = candidate
        break

if not domestic_tmk:
    print(f"❌ ERROR: No TMK field found in domestic wells data")
    raise ValueError("Cannot identify domestic wells TMK field")

print(f"✅ Domestic wells TMK field: {domestic_tmk}")
print(f"Foundation TMK field: {tmk_field}")

# Perform the join
try:
    arcpy.management.JoinField(
        in_data=str(foundation_path),
        in_field=tmk_field,
        join_table=domestic_fc,
        join_field=domestic_tmk
    )
    print(f"✅ Domestic wells data joined successfully")
    
    # Verify results
    final_fields = [f.name for f in arcpy.ListFields(str(foundation_path))]
    new_fields = [f for f in final_fields if f not in foundation_fields and f not in [field[0] for field in processing_fields]]
    print(f"✅ New fields added from domestic wells: {len(new_fields)}")
    
except Exception as e:
    print(f"❌ Join failed: {e}")
    raise

In [None]:
# Step 5: Initialize JOIN_LOG tracking
print("\n" + "="*50)
print("STEP 5: INITIALIZE JOIN_LOG TRACKING")
print("="*50)

# Initialize processing log
timestamp = datetime.now().strftime('%Y-%m-%d %H:%M')
initial_log = f"Foundation: {timestamp}; Wells: {timestamp}"

# Update all records with initial log
update_count = 0
with arcpy.da.UpdateCursor(str(foundation_path), ["JOIN_LOG"]) as cursor:
    for row in cursor:
        row[0] = initial_log
        cursor.updateRow(row)
        update_count += 1

print(f"✅ Initialized JOIN_LOG for {update_count:,} records")
print(f"✅ Log entry: {initial_log}")

In [None]:
# Step 6: Final validation and summary
print("\n" + "="*50)
print("STEP 6: FINAL VALIDATION AND SUMMARY")
print("="*50)

# Get final statistics
final_count = int(arcpy.management.GetCount(str(foundation_path))[0])
final_fields = [f.name for f in arcpy.ListFields(str(foundation_path))]

# Look for distance fields
distance_fields = [f for f in final_fields if 'dist' in f.lower() and 'well' in f.lower()]

print(f"\n📊 FOUNDATION SETUP COMPLETE")
print(f"━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━")
print(f"Foundation file: {foundation_path}")
print(f"Total records: {final_count:,}")
print(f"Total fields: {len(final_fields)}")
print(f"Distance fields: {distance_fields}")
print(f"TMK field: {tmk_field}")
print(f"Processing timestamp: {timestamp}")

# Check for null distances (quality indicator)
if distance_fields:
    print(f"\n🔍 Data Quality Check:")
    for field in distance_fields[:2]:  # Check first 2 distance fields
        null_count = 0
        total_count = 0
        with arcpy.da.SearchCursor(str(foundation_path), [field]) as cursor:
            for row in cursor:
                total_count += 1
                if row[0] is None:
                    null_count += 1
        
        completeness = ((total_count - null_count) / total_count) * 100 if total_count > 0 else 0
        print(f"  {field}: {completeness:.1f}% complete ({total_count-null_count:,}/{total_count:,})")

print(f"\n✅ Ready for Phase 2: Geospatial Analysis")
print(f"✅ Foundation layer ready for slope, soil, and regulatory overlays")

# Clean up temporary geodatabase features
for fc in [municipal_fc, domestic_fc]:
    if arcpy.Exists(fc):
        arcpy.management.Delete(fc)
        print(f"🗑️ Cleaned up temporary: {fc}")

print(f"\n🎯 Next Step: Run Phase 2 - Geospatial Analysis notebooks")