In [30]:
import firebase_admin
from firebase_admin import credentials, firestore
import geopy.distance
from sklearn.ensemble import RandomForestClassifier
import pickle
import pandas as pd
from datetime import datetime
from geopy.geocoders import Nominatim
from sklearn.preprocessing import LabelEncoder

# Initialize Firebase Admin SDK
def initialize_firebase():
    try:
        if not firebase_admin._apps:
            cred = credentials.Certificate('serviceAccountKey.json')
            firebase_admin.initialize_app(cred)
        else:
            print("Firebase already initialized.")
    except Exception as e:
        print(f"Error initializing Firebase: {e}")

initialize_firebase()

# Initialize Firestore client
db = firestore.client()

Firebase already initialized.


In [31]:
# Fetch Employees & Job Sites
employees = [doc.to_dict() for doc in db.collection('employees').stream()]
job_sites = [doc.to_dict() for doc in db.collection('job_sites').stream()]

print(f"📋 Employees Loaded: {len(employees)}")
print(f"🏗️ Job Sites Loaded: {len(job_sites)}")

📋 Employees Loaded: 50
🏗️ Job Sites Loaded: 4


In [32]:
# Geocoding function (Avoid re-geocoding addresses already stored in Firestore)
geolocator = Nominatim(user_agent="OptiShiftApp")

def geocode_address(address):
    try:
        location = geolocator.geocode(address, timeout=10)
        if location:
            return location.latitude, location.longitude
        else:
            return None, None
    except Exception as e:
        return None, None

# Geocode Missing Addresses
for employee in employees:
    if 'latitude' not in employee or 'longitude' not in employee:
        print(f"🔍 Missing coordinates for {employee['worker_id']}. Attempting geocode...")
        lat, lon = geocode_address(employee['home_address'])
        if lat and lon:
            employee['latitude'] = lat
            employee['longitude'] = lon
            print(f"✅ Geocoded {employee['worker_id']}: {lat}, {lon}")
        else:
            print(f"❌ Failed to geocode {employee['worker_id']}. Skipping.")

for site in job_sites:
    if 'latitude' not in site or 'longitude' not in site:
        lat, lon = geocode_address(site['address'])
        if lat and lon:
            site['latitude'] = lat
            site['longitude'] = lon

🔍 Missing coordinates for DVCQQLCL. Attempting geocode...
✅ Geocoded DVCQQLCL: 43.5921963, -79.65138688125015
🔍 Missing coordinates for 2084COEC. Attempting geocode...
✅ Geocoded 2084COEC: 43.5922757, -79.63514028047207
🔍 Missing coordinates for 16SAR8R9. Attempting geocode...
✅ Geocoded 16SAR8R9: 43.605456849999996, -79.65300176125949
🔍 Missing coordinates for HNMRH170. Attempting geocode...
✅ Geocoded HNMRH170: 43.5921963, -79.65138688125015
🔍 Missing coordinates for BQYL7RR5. Attempting geocode...
✅ Geocoded BQYL7RR5: 43.623916449999996, -79.67202003231276
🔍 Missing coordinates for J30DTGPH. Attempting geocode...
✅ Geocoded J30DTGPH: 43.6270285, -79.6300009
🔍 Missing coordinates for LJVVAM2B. Attempting geocode...
✅ Geocoded LJVVAM2B: 43.605456849999996, -79.65300176125949
🔍 Missing coordinates for EOCO79YN. Attempting geocode...
✅ Geocoded EOCO79YN: 43.6270285, -79.6300009
🔍 Missing coordinates for PAWQC5I1. Attempting geocode...
✅ Geocoded PAWQC5I1: 43.58448695, -79.6211363943631


In [33]:
# print(employees) as a dataframe
employees_df = pd.DataFrame(employees)
print(employees_df)

     sur_name phone_number have_car first_name                        role  \
0       López   4378577691       No       Juan           [Cleaner, Labour]   
1       Munoz   4374229494      Yes     Manuel  [Painter, Cleaner, Labour]   
2       Munoz   4374603523       No       José                   [Cleaner]   
3       Gómez   4378954592      Yes       José  [Labour, Cleaner, Painter]   
4     Vázquez   4379764800      Yes     Manuel  [Painter, Labour, Cleaner]   
5       Gómez   4378618980      Yes     Manuel  [Painter, Cleaner, Labour]   
6   Hernández   4376971002      Yes       José          [Cleaner, Painter]   
7       Pérez   4373377433       No     Carlos                   [Cleaner]   
8       Munoz   4375884785      Yes       José                   [Cleaner]   
9       López   4379518294      Yes       José  [Painter, Labour, Cleaner]   
10      López   4379601096      Yes       Juan  [Cleaner, Labour, Painter]   
11  Hernández   4376531158       No       Juan                  

In [34]:
# Feature Engineering
def calculate_distance(employee_location, site_location):
    return geopy.distance.distance(employee_location, site_location).km

In [35]:
print(f"📋 Employees Retrieved: {len(employees)}")
print(f"🏗️ Job Sites Retrieved: {len(job_sites)}")

# Print first 5 employees and job sites
for emp in employees[:5]:
    print(f"👤 Employee: {emp.get('worker_id', 'No ID')} | Role: {emp.get('role', 'No Role')} | Location: {emp.get('latitude', 'No Lat')}, {emp.get('longitude', 'No Lon')}")

for site in job_sites[:5]:
    print(f"🏗️ Job Site: {site.get('site_id', 'No ID')} | Required Roles: {site.get('required_roles', 'No Roles')} | Location: {site.get('latitude', 'No Lat')}, {site.get('longitude', 'No Lon')}")


📋 Employees Retrieved: 50
🏗️ Job Sites Retrieved: 4
👤 Employee: DVCQQLCL | Role: ['Cleaner', 'Labour'] | Location: 43.5921963, -79.65138688125015
👤 Employee: 2084COEC | Role: ['Painter', 'Cleaner', 'Labour'] | Location: 43.5922757, -79.63514028047207
👤 Employee: 16SAR8R9 | Role: ['Cleaner'] | Location: 43.605456849999996, -79.65300176125949
👤 Employee: HNMRH170 | Role: ['Labour', 'Cleaner', 'Painter'] | Location: 43.5921963, -79.65138688125015
👤 Employee: BQYL7RR5 | Role: ['Painter', 'Labour', 'Cleaner'] | Location: 43.623916449999996, -79.67202003231276
🏗️ Job Site: SITE5148 | Required Roles: {'Cleaner': {'work_schedule': ['7:00-15:30'], 'num_workers': 1}, 'Labour': {'work_schedule': ['7:00-15:30'], 'num_workers': 1}} | Location: 43.55198488743175, -79.66416755049764
🏗️ Job Site: SITE8123 | Required Roles: {'Labour': {'work_schedule': ['7:00-15:30'], 'num_workers': 3}} | Location: 43.822017483251166, -79.0495549953297
🏗️ Job Site: SITE8308 | Required Roles: {'Labour': {'work_schedule'

In [39]:
# ✅ Initialize assigned employees set
assigned_employees = set()

for site in job_sites:
    required_roles = site.get('required_roles', {})  
    assigned_counts = {role: 0 for role in required_roles}

    for role, role_data in required_roles.items():  # Extract role name and details
        required_count = role_data.get('num_workers', 0)
        if required_count == 0:
            continue

        print(f"\n🔎 Processing site {site['site_id']} - Role: {role}, Needs: {required_count}")

        employee_scores = []
        for employee in employees:
            score = 0  

            # ✅ Ensure we check the role correctly (since employee roles are lists)
            if role in employee.get('role', []):
                score += 5  

            # ✅ Availability Check
            if any(shift in employee.get('availability', []) for shift in role_data.get('work_schedule', [])):
                score += 4  

            # ✅ Has Car Check
            if employee.get('have_car', 'No') == 'Yes':
                score += 3  

            # ✅ Distance Calculation
            distance = calculate_distance((employee['latitude'], employee['longitude']), (site['latitude'], site['longitude']))
            if distance <= 40:
                score += 2  

            employee_scores.append({
                'employee': employee,
                'score': score,
                'distance': distance
            })

        # ✅ Sort employees by best match (highest score first, then highest rating)
        sorted_employees = sorted(employee_scores, key=lambda x: (-x['score'], x['distance'], -x['employee'].get('rating', 0)))

        print(f"🎯 {len(sorted_employees)} Employees Available for {role} at Site {site['site_id']}")

        # ✅ First, assign employees within 40km
        assigned = 0
        for emp_data in sorted_employees:
            if assigned >= required_count:
                break

            employee = emp_data['employee']

            if employee['worker_id'] in assigned_employees:
                continue

            # ✅ Enforce 40km limit unless no other choice
            if emp_data['distance'] > 40 and assigned < required_count:
                continue  

            print(f"⬆ Attempting Assignment: {employee['worker_id']} → {site['site_id']} (Score: {emp_data['score']}, Distance: {emp_data['distance']} km)")

            try:
                db.collection('assignments').add({
                    'employee_id': employee['worker_id'],
                    'job_site_id': site['site_id'],
                    'role': role,
                    'distance': emp_data['distance'],  # ✅ Add Distance
                    'assigned_date': datetime.now()
                })
                print(f"✅ Assigned {employee['worker_id']} to {site['site_id']} for {role} with score {emp_data['score']} and distance {emp_data['distance']} km.")
                assigned_employees.add(employee['worker_id'])
                assigned_counts[role] += 1
                assigned += 1
            except Exception as e:
                print(f"❌ Firestore Write Failed: {e}")

        # ✅ If no one was assigned within 40km, assign best available
        if assigned < required_count:
            print(f"⚠️ No employees found within 40km for {role} at {site['site_id']}. Assigning best available...")

            for emp_data in sorted_employees:
                if assigned >= required_count:
                    break

                employee = emp_data['employee']
                if employee['worker_id'] in assigned_employees:
                    continue

                print(f"⬆ Assigning Fallback: {employee['worker_id']} → {site['site_id']} (Distance: {emp_data['distance']} km)")

                try:
                    db.collection('assignments').add({
                        'employee_id': employee['worker_id'],
                        'job_site_id': site['site_id'],
                        'role': role,
                        'distance': emp_data['distance'],  # ✅ Add Distance
                        'assigned_date': datetime.now()
                    })
                    print(f"✅ Assigned {employee['worker_id']} (Fallback) to {site['site_id']} for {role}. Distance: {emp_data['distance']} km.")
                    assigned_employees.add(employee['worker_id'])
                    assigned_counts[role] += 1
                    assigned += 1
                except Exception as e:
                    print(f"❌ Firestore Write Failed: {e}")



🔎 Processing site SITE5148 - Role: Cleaner, Needs: 1
🎯 50 Employees Available for Cleaner at Site SITE5148
⬆ Attempting Assignment: J30DTGPH → SITE5148 (Score: 14, Distance: 8.78235734270652 km)
✅ Assigned J30DTGPH to SITE5148 for Cleaner with score 14 and distance 8.78235734270652 km.

🔎 Processing site SITE5148 - Role: Labour, Needs: 1
🎯 50 Employees Available for Labour at Site SITE5148
⬆ Attempting Assignment: FVVIJE3B → SITE5148 (Score: 14, Distance: 4.05819579137333 km)
✅ Assigned FVVIJE3B to SITE5148 for Labour with score 14 and distance 4.05819579137333 km.

🔎 Processing site SITE8123 - Role: Labour, Needs: 3
🎯 50 Employees Available for Labour at Site SITE8123
⚠️ No employees found within 40km for Labour at SITE8123. Assigning best available...
⬆ Assigning Fallback: U54CSNUX → SITE8123 (Distance: 52.744354760859906 km)
✅ Assigned U54CSNUX (Fallback) to SITE8123 for Labour. Distance: 52.744354760859906 km.
⬆ Assigning Fallback: ZQUO11WI → SITE8123 (Distance: 53.90167175104349 