In [1]:
import firebase_admin
from firebase_admin import credentials, firestore
import geopy.distance
from sklearn.ensemble import RandomForestClassifier
import pickle
import pandas as pd
from datetime import datetime
from geopy.geocoders import Nominatim
from sklearn.preprocessing import LabelEncoder

# Initialize Firebase Admin SDK
def initialize_firebase():
    try:
        if not firebase_admin._apps:
            cred = credentials.Certificate('serviceAccountKey.json')
            firebase_admin.initialize_app(cred)
        else:
            print("Firebase already initialized.")
    except Exception as e:
        print(f"Error initializing Firebase: {e}")

initialize_firebase()

# Initialize Firestore client
db = firestore.client()

In [2]:
# Fetch Employees & Job Sites
employees = [doc.to_dict() for doc in db.collection('employees').stream()]
job_sites = [doc.to_dict() for doc in db.collection('job_sites').stream()]

print(f"📋 Employees Loaded: {len(employees)}")
print(f"🏗️ Job Sites Loaded: {len(job_sites)}")

📋 Employees Loaded: 50
🏗️ Job Sites Loaded: 4


In [3]:
import time
import googlemaps
from geopy.geocoders import Nominatim
from geopy.exc import GeocoderTimedOut
import re

# ✅ Replace with your actual Google API key (Secure this key in production!)
GOOGLE_API_KEY = "AIzaSyADgR5Y3ARu69ClnxiAJ2XN5XZQ7OaY_0E"
gmaps = googlemaps.Client(key=GOOGLE_API_KEY)

# ✅ Initialize OpenStreetMap (OSM) as a backup geocoder
geolocator_osm = Nominatim(user_agent="optishipp_geocoder")

def clean_geocode_address(address):
    """
    Cleans an address by removing unit numbers, shopping centers, and common non-address elements.
    """
    address = re.sub(r'\b(Unit|Suite|Apt|Floor|Rm|#)\s*\d+\b', '', address, flags=re.IGNORECASE)  # Remove unit numbers
    address = re.sub(r'\b(Shopping Center|Mall|Plaza|Building|Complex)\b', '', address, flags=re.IGNORECASE)  # Remove landmarks
    return address.strip()

def google_geocode(address, max_retries=3):
    """
    Attempts to geocode an address using Google Maps API with retry logic.
    """
    address = clean_geocode_address(address)
    for attempt in range(max_retries):
        try:
            geocode_result = gmaps.geocode(address)
            if geocode_result:
                # Extract latitude and longitude
                lat = geocode_result[0]['geometry']['location']['lat']
                lon = geocode_result[0]['geometry']['location']['lng']
                
                # Check if it's a partial match (avoid vague results)
                if geocode_result[0].get('partial_match', False):
                    print(f"⚠️ Google returned a partial match for {address}. Retrying...")
                    continue

                print(f"✅ Google Maps Geocode Success: {address} -> {lat}, {lon}")
                return lat, lon  # Return valid coordinates

            print(f"⚠️ Google Geocode failed for: {address}. Attempt {attempt+1}/{max_retries}")
        except Exception as e:
            print(f"❌ Google Maps Geocode error for {address}: {e}")

        time.sleep(2)  # Prevent rate limiting

    return None, None  # If all attempts fail

def osm_geocode(address, max_retries=3):
    """
    Attempts to geocode an address using OpenStreetMap (OSM) with retry logic.
    """
    address = clean_geocode_address(address)
    for attempt in range(max_retries):
        try:
            location = geolocator_osm.geocode(address, timeout=10)
            if location:
                print(f"🌍 OSM Geocode Success: {address} -> {location.latitude}, {location.longitude}")
                return location.latitude, location.longitude
        except GeocoderTimedOut:
            print(f"⏳ OSM Timeout error for {address}. Attempt {attempt+1}/{max_retries}")
        except Exception as e:
            print(f"❌ OSM Geocode error for {address}: {e}")

        time.sleep(2)  # Prevent rapid retries

    return None, None  # If all attempts fail

def geocode_address(address):
    """
    Attempts to geocode an address using Google Maps first, then OpenStreetMap as a backup.
    """
    lat, lon = google_geocode(address)
    if lat is None or lon is None:
        print(f"🔄 Trying OSM for: {address}")
        lat, lon = osm_geocode(address)

    if lat is None or lon is None:
        print(f"❌ Failed to geocode address: {address}")

    return lat, lon





In [5]:
# Feature Engineering
def calculate_distance(employee_location, site_location):
    return geopy.distance.distance(employee_location, site_location).km

In [7]:
# ✅ Initialize assigned employees set
assigned_employees = set()

for site in job_sites:
    required_roles = site.get('required_roles', {})  
    assigned_counts = {role: 0 for role in required_roles}

    for role, role_data in required_roles.items():  # Extract role name and details
        required_count = role_data.get('num_workers', 0)
        if required_count == 0:
            continue

        print(f"\n🔎 Processing site {site['site_id']} - Role: {role}, Needs: {required_count}")

        employee_scores = []
        for employee in employees:
            score = 0  

            # ✅ Ensure we check the role correctly (since employee roles are lists)
            if role in employee.get('role', []):
                score += 5  

            # ✅ Availability Check
            if any(shift in employee.get('availability', []) for shift in role_data.get('work_schedule', [])):
                score += 4  

            # ✅ Has Car Check
            if employee.get('have_car', 'No') == 'Yes':
                score += 3  

            # ✅ Distance Calculation
            distance = calculate_distance((employee['latitude'], employee['longitude']), (site['latitude'], site['longitude']))
            if distance <= 40:
                score += 2  

            employee_scores.append({
                'employee': employee,
                'score': score,
                'distance': distance
            })

        # ✅ Sort employees by best match (highest score first, then highest rating)
        sorted_employees = sorted(employee_scores, key=lambda x: (-x['score'], x['distance'], -x['employee'].get('rating', 0)))

        print(f"🎯 {len(sorted_employees)} Employees Available for {role} at Site {site['site_id']}")

        # ✅ First, assign employees within 40km
        assigned = 0
        for emp_data in sorted_employees:
            if assigned >= required_count:
                break

            employee = emp_data['employee']

            if employee['worker_id'] in assigned_employees:
                continue

            # ✅ Enforce 40km limit unless no other choice
            if emp_data['distance'] > 40 and assigned < required_count:
                continue  

            print(f"⬆ Attempting Assignment: {employee['worker_id']} → {site['site_id']} (Score: {emp_data['score']}, Distance: {emp_data['distance']} km)")

            try:
                db.collection('assignments').add({
                    'employee_id': employee['worker_id'],
                    'job_site_id': site['site_id'],
                    'role': role,
                    'distance': emp_data['distance'],  # ✅ Add Distance
                    'assigned_date': datetime.now()
                })
                print(f"✅ Assigned {employee['worker_id']} to {site['site_id']} for {role} with score {emp_data['score']} and distance {emp_data['distance']} km.")
                assigned_employees.add(employee['worker_id'])
                assigned_counts[role] += 1
                assigned += 1
            except Exception as e:
                print(f"❌ Firestore Write Failed: {e}")

        # ✅ If no one was assigned within 40km, assign best available
        if assigned < required_count:
            print(f"⚠️ No employees found within 40km for {role} at {site['site_id']}. Assigning best available...")

            for emp_data in sorted_employees:
                if assigned >= required_count:
                    break

                employee = emp_data['employee']
                if employee['worker_id'] in assigned_employees:
                    continue

                print(f"⬆ Assigning Fallback: {employee['worker_id']} → {site['site_id']} (Distance: {emp_data['distance']} km)")

                try:
                    db.collection('assignments').add({
                        'employee_id': employee['worker_id'],
                        'job_site_id': site['site_id'],
                        'role': role,
                        'distance': emp_data['distance'],  # ✅ Add Distance
                        'assigned_date': datetime.now()
                    })
                    print(f"✅ Assigned {employee['worker_id']} (Fallback) to {site['site_id']} for {role}. Distance: {emp_data['distance']} km.")
                    assigned_employees.add(employee['worker_id'])
                    assigned_counts[role] += 1
                    assigned += 1
                except Exception as e:
                    print(f"❌ Firestore Write Failed: {e}")



🔎 Processing site SITE5148 - Role: Cleaner, Needs: 1


KeyError: 'latitude'