# Chicago HERE Technologies Hackathon
## Authors : Aryaman, Prabhat and Kush

In [53]:
import geopandas as gpd
import pandas as pd
from shapely.geometry import Point
from collections import Counter
import os
import ast  # for parsing functionalClass safely

# Set base folder
base_path = "Chicago_Hackathon_base_datasets"

# 1. Scan validation files
validation_files = []
for folder_name in os.listdir(base_path):
    folder_path = os.path.join(base_path, folder_name)
    val_file = os.path.join(folder_path, f"{folder_name}_validations.geojson")
    if os.path.exists(val_file):
        validation_files.append((folder_name, val_file))

# 2. Scan combined files
combined_files = {}
for folder_name in os.listdir(base_path):
    folder_path = os.path.join(base_path, folder_name)
    combined_file = os.path.join(folder_path, f"{folder_name}_combined.geojson")
    if os.path.exists(combined_file):
        combined_files[folder_name] = combined_file

# 3. Load all validation data
all_validations = []
for tile_id, path in validation_files:
    try:
        gdf = gpd.read_file(path)
        gdf["tile_id"] = tile_id
        all_validations.append(gdf)
    except Exception as e:
        print(f"Error loading {tile_id}: {e}")

combined_validations = pd.concat(all_validations, ignore_index=True)
print(f"✅ Loaded {len(combined_validations)} validation points.")

# 4. Define projected CRS for distance calc
projected_crs = "EPSG:32632"
case_counts = []

# 5. Process each validation feature
for idx, row in combined_validations.iterrows():
    tile_id = row["tile_id"]
    feature_id = row["Feature ID"]

    if tile_id not in combined_files:
        continue

    try:
        combined_gdf = gpd.read_file(combined_files[tile_id])
        if combined_gdf.crs.is_geographic:
            combined_gdf = combined_gdf.to_crs(projected_crs)

        matched_sign = combined_gdf[combined_gdf['Feature ID'] == feature_id]
        if matched_sign.empty:
            continue

        # Confidence and Observation Count
        confidence = matched_sign.get("confidence", pd.Series([1])).values[0]
        obs_raw = matched_sign.get("observationCounts", pd.Series(["10"])).values[0]
        try:
            obs_count = float(obs_raw)
        except:
            obs_count = 10

        # Get nearest roads
        roads = combined_gdf[
            (combined_gdf.geometry.type == 'LineString') &
            (combined_gdf['accessCharacteristics.pedestrian'] == 1)
        ].copy()

        if roads.empty:
            continue

        roads = roads[roads.is_valid & roads.geometry.notnull()].to_crs(projected_crs)
        point = row.geometry
        if point.is_empty or point is None:
            continue

        point_proj = gpd.GeoSeries([point], crs=combined_gdf.crs).to_crs(projected_crs).iloc[0]
        distances = roads.geometry.distance(point_proj)
        nearest = distances.nsmallest(2)
        nearest_idxs = nearest.index.tolist()

        # --- 🧠 Parse functionalClass safely ---
        fc_value = None
        if len(nearest_idxs) > 0:
            fc_raw = roads.loc[nearest_idxs[0]]['functionalClass']
            try:
                fc_parsed = ast.literal_eval(fc_raw) if isinstance(fc_raw, str) else fc_raw
                if isinstance(fc_parsed, list) and len(fc_parsed) > 0:
                    fc_value = str(fc_parsed[0].get("value"))
            except Exception as e:
                print(f"⚠️ Could not parse functionalClass for {feature_id}: {e}")
                fc_value = None

        # --- 🧠 Classification logic ---
        if confidence == 0 or obs_count < 3:
            case = 1
        elif len(nearest_idxs) > 1 and roads.loc[nearest_idxs[1]]['accessCharacteristics.pedestrian'] == 0:
            case = 2
        elif roads.loc[nearest_idxs[0]]['accessCharacteristics.pedestrian'] == 1 and fc_value == '1':
            case = 3
        else:
            case = 4

        case_counts.append(case)

        # Debug first few
        if idx < 5:
            print(f"\n🔍 Feature ID: {feature_id}")
            print(f"Confidence: {confidence}, Observation Count: {obs_count}")
            print(f"Nearest FC: {fc_value}")
            print(f"→ Assigned Case {case}")

    except Exception as e:
        print(f"❌ Error in tile {tile_id}, feature {feature_id}: {e}")

# 6. Summary of Results
summary = Counter(case_counts)
print("\n📊 Case Summary:")
for c in sorted(summary):
    print(f"Case {c}: {summary[c]} issues")

✅ Loaded 60 validation points.

🔍 Feature ID: urn:here::here:signs:1623128752571438692
Confidence: None, Observation Count: 10
Nearest FC: 3
→ Assigned Case 4

🔍 Feature ID: urn:here::here:signs:1622234405194397091
Confidence: None, Observation Count: 10
Nearest FC: 5
→ Assigned Case 4

🔍 Feature ID: urn:here::here:signs:1622234404173774672
Confidence: None, Observation Count: 10
Nearest FC: 5
→ Assigned Case 4

🔍 Feature ID: urn:here::here:signs:1621800204704796055
Confidence: None, Observation Count: 10
Nearest FC: 5
→ Assigned Case 4

🔍 Feature ID: urn:here::here:signs:1623044201860442531
Confidence: None, Observation Count: 10
Nearest FC: 5
→ Assigned Case 4


  return lib.distance(a, b, **kwargs)
  return lib.distance(a, b, **kwargs)
  return lib.distance(a, b, **kwargs)
  return lib.distance(a, b, **kwargs)
  return lib.distance(a, b, **kwargs)


KeyboardInterrupt: 