In [52]:
import firebase_admin
from firebase_admin import credentials, firestore
import geopy.distance
from sklearn.ensemble import RandomForestClassifier
import pickle
import pandas as pd
from datetime import datetime


from geopy.geocoders import Nominatim


# Initialize Firebase Admin SDK
cred = credentials.Certificate('serviceAccountKey.json')


def initialize_firebase():
    try:
        # Check if Firebase has already been initialized
        if not firebase_admin._apps:
            cred = credentials.Certificate('serviceAccountKey.json')
            firebase_admin.initialize_app(cred)
        else:
            print("Firebase already initialized.")
    except Exception as e:
        print(f"Error initializing Firebase: {e}")

# Initialize Firebase (only once)
initialize_firebase()

# Initialize Firestore client
db = firestore.client()


# Fetch employee data
employees_ref = db.collection('employees')
employees = employees_ref.stream()

# Fetch job site data
job_sites_ref = db.collection('job_sites')
job_sites = job_sites_ref.stream()

# Example: employee and job site data
employee_data = [doc.to_dict() for doc in employees]
job_site_data = [doc.to_dict() for doc in job_sites]


Firebase already initialized.


In [53]:
# Geocoding function

geolocator = Nominatim(user_agent="OptiShiftApp")

def geocode_address(address):
    geolocator = Nominatim(user_agent="OptiShiftApp")
    location = geolocator.geocode(address, timeout=10)  # Increased timeout to 10 seconds
    if location:
        return location.latitude, location.longitude
    else:
        return None, None  # Return None if the geocoding fails

# Update employee and job site data with latitude and longitude
for employee in employee_data:
    lat, lon = geocode_address(employee['home_address'])
    employee['latitude'] = lat
    employee['longitude'] = lon

for site in job_site_data:
    lat, lon = geocode_address(site['location'])
    site['latitude'] = lat
    site['longitude'] = lon


In [54]:
# Printing employee id, latitude and longitude

for employee in employee_data:
    print(f"Employee ID: {employee['worker_id']}, Latitude: {employee['latitude']}, Longitude: {employee['longitude']}")
    print(f"Employee ID: {employee['worker_id']}, Home Address: {employee['home_address']}")
    print("\n")



Employee ID: KOO0MP1I, Latitude: 43.72182305209789, Longitude: -79.30367397595893
Employee ID: KOO0MP1I, Home Address: 1871 O'Connor Drive, Toronto


Employee ID: ORRFXDUB, Latitude: 43.6067546, Longitude: -79.64894838560983
Employee ID: ORRFXDUB, Home Address: 4557 Hurontario Street, Mississauga


Employee ID: TGKJTPX9, Latitude: 43.6711575, Longitude: -79.2941531
Employee ID: TGKJTPX9, Home Address: 2255B Queen Street East, Toronto


Employee ID: VKBOB7Z4, Latitude: 43.6711575, Longitude: -79.2941531
Employee ID: VKBOB7Z4, Home Address: 2255B Queen Street East, Toronto


Employee ID: P37NH11K, Latitude: 43.58540415, Longitude: -79.64389344844595
Employee ID: P37NH11K, Home Address: 350 Burnhamthorpe Road West #401, Mississauga


Employee ID: PKM5M17G, Latitude: None, Longitude: None
Employee ID: PKM5M17G, Home Address: 50 Ashtonbee Road Unit 2, Scarborough


Employee ID: L770NM18, Latitude: 43.71732965179992, Longitude: -79.31328883775146
Employee ID: L770NM18, Home Address: 111 Berm

In [55]:
employees_ref = db.collection('employees')

# Find employees with missing lat/long
for doc in employees_ref.stream():
    employee_data = doc.to_dict()
    if employee_data.get('latitude') is None or employee_data.get('longitude') is None:
        print(f"Employee ID: {doc.id}, Address: {employee_data.get('address', 'No Address')}")


Employee ID: 1XHNzFQqkq0HIOmK1Wxg, Address: No Address
Employee ID: 1zbcW2qqPhixLNw4QhV0, Address: No Address
Employee ID: 35Ppe5PLEGFKKFzglydS, Address: No Address
Employee ID: 3AzQxQsDL28kRyTTdOEv, Address: No Address
Employee ID: 3nSjIJA9M8Dw8wK4vfCQ, Address: No Address
Employee ID: 4DCFeOP9g8pjNtEUH7Xc, Address: No Address
Employee ID: 4MXpQopC65CsJxTZEHtx, Address: No Address
Employee ID: 4X0jYChEDwy0D4BpAMwM, Address: No Address
Employee ID: 5IqJuKgoQ502cXsccIeI, Address: No Address
Employee ID: 5bMX9QqDTCMICwb9ywpl, Address: No Address
Employee ID: 6aFsfBdX1PzRwi8g1qOr, Address: No Address
Employee ID: 8BUCkq2YAusWO7n19j39, Address: No Address
Employee ID: A3tu3n7THXD3x47B2hOx, Address: No Address
Employee ID: AtPA2hAp2GiWqkJeGkWS, Address: No Address
Employee ID: BE21B9HC53PaKWKIPHAE, Address: No Address
Employee ID: BN83TL892zh417viQA2Q, Address: No Address
Employee ID: DghW5uwy12reznHfiLLa, Address: No Address
Employee ID: DqsFfZMQ0Azg9BKuDqYH, Address: No Address
Employee I

In [56]:
# Feature Engineering
def calculate_distance(employee_location, site_location):
    """Calculate distance between two locations."""
    return geopy.distance.distance(employee_location, site_location).km

# Create feature vectors
features = []
for employee in employee_data:
    for site in job_site_data:
        if employee['latitude'] is not None and site['latitude'] is not None:
            # Calculate distance between employee and job site
            employee_location = (employee['latitude'], employee['longitude'])
            site_location = (site['latitude'], site['longitude'])
            distance = calculate_distance(employee_location, site_location)
            
            feature_vector = {
                'employee_id': employee['worker_id'],
                'job_site_id': site['site_id'],
                'distance': distance,
                'available': employee['availability'],  # Other relevant features
                'role': employee['role']
            }
            features.append(feature_vector)




TypeError: string indices must be integers, not 'str'

In [42]:

#print features as a dataframe
df = pd.DataFrame(features)
print(df.head(20))


   employee_id job_site_id   distance     available  role
0     KOO0MP1I    SITE1256   9.721641  [7:00-15:30]     0
1     KOO0MP1I    SITE1877  41.793459  [7:00-15:30]     0
2     KOO0MP1I    SITE1033  11.603362  [7:00-15:30]     0
3     KOO0MP1I    SITE6928  42.226789  [7:00-15:30]     0
4     KOO0MP1I    SITE9352  25.106379  [7:00-15:30]     0
5     KOO0MP1I    SITE6630  62.644572  [7:00-15:30]     0
6     KOO0MP1I    SITE3726  62.644572  [7:00-15:30]     0
7     KOO0MP1I    SITE6008  69.185013  [7:00-15:30]     0
8     KOO0MP1I    SITE8056  12.535557  [7:00-15:30]     0
9     KOO0MP1I    SITE3818  69.329909  [7:00-15:30]     0
10    KOO0MP1I    SITE5812   9.494257  [7:00-15:30]     0
11    KOO0MP1I    SITE5403   9.343881  [7:00-15:30]     0
12    KOO0MP1I    SITE1029  12.317600  [7:00-15:30]     0
13    KOO0MP1I    SITE2388  62.644572  [7:00-15:30]     0
14    KOO0MP1I    SITE7220   3.063226  [7:00-15:30]     0
15    KOO0MP1I    SITE7982  25.106379  [7:00-15:30]     0
16    KOO0MP1I

In [44]:
from sklearn.preprocessing import LabelEncoder
import re

# Function to convert time range (e.g., '7:00-15:30' → 1 for available)
def convert_availability(time_range):
    # If time_range is a string and contains the range '7:00', it's available
    if isinstance(time_range, str) and '7:00' in time_range:  # Modify this logic as needed
        return 1  # Available
    return 0  # Unavailable

# Function to extract start time (e.g., '7:00-15:30' → 7)
def extract_start_time(time_range):
    if isinstance(time_range, str):
        match = re.match(r"(\d+):\d+-", time_range)
        if match:
            return int(match.group(1))
    return None

# Function to extract end time (e.g., '7:00-15:30' → 15)
def extract_end_time(time_range):
    if isinstance(time_range, str):
        match = re.match(r"- (\d+):\d+", time_range)
        if match:
            return int(match.group(1))
    return None

# Encode 'role' as a numeric feature
label_encoder = LabelEncoder()
for feature in features:
    feature['role'] = label_encoder.fit_transform([feature['role']])[0]  # Convert role to numeric

# Convert feature vectors to a DataFrame
df = pd.DataFrame(features)

# First, extract start and end times for time range strings in the 'available' column
df['start_time'] = df['available'].apply(extract_start_time)
df['end_time'] = df['available'].apply(extract_end_time)

# Now, apply the conversion function to the 'available' column (converts to binary)
df['available'] = df['available'].apply(convert_availability)

# Convert the columns to the correct types
df['distance'] = df['distance'].astype(float)  # Ensure 'distance' is a float
df['available'] = df['available'].astype(int)  # Ensure 'available' is an int

# Define the target variable (e.g., job_site_assignment: 1 for assigned, 0 for not assigned)
df['assigned'] = [1 if x['employee_id'] == x['job_site_id'] else 0 for x in features]

# Split data into features (X) and target (y)
X = df[['distance', 'available', 'role', 'start_time', 'end_time']]  # Add start_time and end_time as features if needed
y = df['assigned']


# prinf the data as a dataframe

print(X.head(20))


     distance  available  role start_time end_time
0    9.721641          0     0       None     None
1   41.793459          0     0       None     None
2   11.603362          0     0       None     None
3   42.226789          0     0       None     None
4   25.106379          0     0       None     None
5   62.644572          0     0       None     None
6   62.644572          0     0       None     None
7   69.185013          0     0       None     None
8   12.535557          0     0       None     None
9   69.329909          0     0       None     None
10   9.494257          0     0       None     None
11   9.343881          0     0       None     None
12  12.317600          0     0       None     None
13  62.644572          0     0       None     None
14   3.063226          0     0       None     None
15  25.106379          0     0       None     None
16  25.164841          0     0       None     None
17  25.164841          0     0       None     None
18  10.756572          0     0 

In [37]:
# Train the model
model = RandomForestClassifier()
model.fit(X, y)

# Save the model as a .pkl file
with open('job_assignment_model.pkl', 'wb') as f:
    pickle.dump(model, f)

print("Model trained and saved successfully.")


Model trained and saved successfully.


In [31]:
from sklearn.preprocessing import LabelEncoder
import pickle
import pandas as pd

# Load the trained model when needed
with open('job_assignment_model.pkl', 'rb') as f:
    loaded_model = pickle.load(f)

# Label encoder used during training
label_encoder = LabelEncoder()

# Simulate encoding for 'role' feature (use the same encoder you used in training)
role_data = ['Cleaner']  # Example role
role_encoded = label_encoder.fit_transform(role_data)  # Example: Encoding the role
role_encoded_value = role_encoded[0]  # Get the encoded value

# Prepare the new data for prediction, including all necessary features
new_data = pd.DataFrame({
    'distance': [10],
    'available': [1],  # Example feature
    'role': [role_encoded_value],  # Use the encoded role
    'start_time': [0],  # Assuming a default value (adjust as needed)
    'end_time': [0]    # Assuming a default value (adjust as needed)
})

# Use the model for predictions
prediction = loaded_model.predict(new_data)
print("Predicted assignment:", prediction)


Predicted assignment: [0]


In [None]:
from datetime import datetime
import pandas as pd

# Function to calculate distance between two geographical points
def calculate_distance(location1, location2):
    # Assuming this function is already defined, e.g., using the Haversine formula
    # location1 and location2 are tuples of (latitude, longitude)
    pass

# Daily predictions and updates
def assign_jobs_daily():
    # Fetch updated employee and job site data
    employees = employees_ref.stream()
    job_sites = job_sites_ref.stream()

    # Calculate new features
    features = []
    for employee in employees:
        employee_data = employee.to_dict()  # Convert DocumentSnapshot to dictionary
        for site in job_sites:
            site_data = site.to_dict()  # Convert DocumentSnapshot to dictionary
            
            # Safely get latitude and longitude using .get() to avoid KeyError
            employee_latitude = employee_data.get('latitude', None)
            employee_longitude = employee_data.get('longitude', None)
            site_latitude = site_data.get('latitude', None)
            site_longitude = site_data.get('longitude', None)
            
            # If any of the latitude/longitude values are missing, skip this feature
            if None in [employee_latitude, employee_longitude, site_latitude, site_longitude]:
                continue
            
            # Calculate distance and generate feature vector
            employee_location = (employee_latitude, employee_longitude)
            site_location = (site_latitude, site_longitude)
            distance = calculate_distance(employee_location, site_location)
            
            feature_vector = {
                'employee_id': employee_data['worker_id'],
                'job_site_id': site_data['site_id'],
                'distance': distance,
                'available': employee_data['availability'],
                'role': employee_data['role']
            }
            features.append(feature_vector)
    
    # Ensure features list is not empty
    if not features:
        print("No valid features generated. Exiting.")
        return

    # Predict using the saved model
    new_data = pd.DataFrame(features)
    predictions = loaded_model.predict(new_data)
    
    # Assign employees to job sites based on predictions
    for i, prediction in enumerate(predictions):
        if prediction == 1:
            employee_id = features[i]['employee_id']
            job_site_id = features[i]['job_site_id']
            # Update the assignment in Firestore
            db.collection('shift_assignments').add({
                'employee_id': employee_id,
                'job_site_id': job_site_id,
                'assigned_date': datetime.now()
            })

# Call daily assignment function
assign_jobs_daily()


No valid features generated. Exiting.
