In [8]:
pip install faker

Note: you may need to restart the kernel to use updated packages.


In [9]:
import random
import uuid
from datetime import datetime, timedelta
from faker import Faker

fake = Faker()

# Define order structure
def generate_order():
    cleaning_types = ['driveway', 'walkway', 'deicing', 'salting']
    order_id = str(uuid.uuid4())
    order_time = datetime.now() - timedelta(hours=random.randint(1, 24))
    preferred_times = [f"{random.randint(6, 20)}:00" for _ in range(2)]
    
    return {
        "orderId": order_id,
        "orderStatus": "Pending",
        "orderPlacedTime": order_time.isoformat(),
        "orderFulfilledTime": None,
        "cleaningSpecifics": random.sample(cleaning_types, k=random.randint(1, 3)),
        "prefTime": preferred_times,
        "customerFName": fake.first_name(),
        "customerLName": fake.last_name(),
        "customerPhoneNumber": fake.phone_number(),
        "streetAddress": fake.street_address(),
        "city": fake.city(),
        "state": fake.state_abbr(),
        "zipCode": fake.zipcode()
    }

# Define contractor structure
def generate_contractor():
    specialties = ['driveway', 'walkway', 'deicing', 'salting']
    today = datetime.now().strftime("%Y-%m-%d")
    availability_times = [f"{hour}:00" for hour in range(6, 21)]
    availability = {today: random.sample(availability_times, k=5)}
    
    return {
        "contractorId": str(uuid.uuid4()),
        "name": fake.name(),
        "phoneNumber": fake.phone_number(),
        "currentLocation": {
            "lat": fake.latitude(),
            "lon": fake.longitude()
        },
        "specialties": random.sample(specialties, k=random.randint(1, 4)),
        "maxConcurrentJobs": random.randint(2, 5),
        "activeJobs": [],
        "serviceAreaZipCodes": [fake.zipcode() for _ in range(3)],
        "availability": availability
    }

# Generate synthetic data
orders = [generate_order() for _ in range(10)]
contractors = [generate_contractor() for _ in range(5)]




In [10]:
# Print sample
from pprint import pprint
print("Sample Orders:")
pprint(orders[:2])
print("\nSample Contractors:")
pprint(contractors[:2])

Sample Orders:
[{'city': 'Jenniferfort',
  'cleaningSpecifics': ['walkway', 'deicing'],
  'customerFName': 'Jeanne',
  'customerLName': 'Liu',
  'customerPhoneNumber': '445.868.5037x297',
  'orderFulfilledTime': None,
  'orderId': 'c7ebd4be-8dfe-4bc2-a54b-db473b358756',
  'orderPlacedTime': '2025-07-13T04:12:03.208662',
  'orderStatus': 'Pending',
  'prefTime': ['12:00', '14:00'],
  'state': 'ME',
  'streetAddress': '5134 Scott Falls',
  'zipCode': '73775'},
 {'city': 'Floydstad',
  'cleaningSpecifics': ['driveway'],
  'customerFName': 'Joseph',
  'customerLName': 'Webb',
  'customerPhoneNumber': '6974285659',
  'orderFulfilledTime': None,
  'orderId': 'ba9edb55-ab3c-4701-92e1-7ce47364fdaf',
  'orderPlacedTime': '2025-07-12T22:12:03.210656',
  'orderStatus': 'Pending',
  'prefTime': ['18:00', '9:00'],
  'state': 'OR',
  'streetAddress': '537 Todd Spurs Apt. 531',
  'zipCode': '57518'}]

Sample Contractors:
[{'activeJobs': [],
  'availability': {'2025-07-13': ['16:00', '19:00', '9:00', 

In [16]:
import math
from datetime import datetime

def haversine(lat1, lon1, lat2, lon2):
    # Haversine distance in km
    R = 6371
    dlat = math.radians(lat2 - lat1)
    dlon = math.radians(lon2 - lon1)
    a = (math.sin(dlat/2)**2 +
         math.cos(math.radians(lat1)) *
         math.cos(math.radians(lat2)) *
         math.sin(dlon/2)**2)
    c = 2 * math.atan2(math.sqrt(a), math.sqrt(1-a))
    return R * c

def score_contractor(order, contractor, today):
    score = 0

    # 1. Availability match
    contractor_times = contractor["availability"].get(today, [])
    if any(time in contractor_times for time in order["prefTime"]):
        score += 3  # High priority for customer satisfaction

    # 2. ZIP code match
    if order["zipCode"] in contractor["serviceAreaZipCodes"]:
        score += 2

    # 3. Skill match
    skills_match = set(order["cleaningSpecifics"]) & set(contractor["specialties"])
    if skills_match:
        score += 2

    # 4. Load balancing (available capacity)
    load_ratio = len(contractor["activeJobs"]) / contractor["maxConcurrentJobs"]
    if load_ratio < 0.5:
        score += 2  # Prefer contractors with lighter loads
    elif load_ratio < 1:
        score += 1  # Acceptable
    # else no points if overloaded

    # 5. Proximity to job (mock distance)
    lat_order = float(contractor["currentLocation"]["lat"])
    lon_order = float(contractor["currentLocation"]["lon"])
    lat_job = lat_order + (random.uniform(-0.02, 0.02))  # Simulate nearby job
    lon_job = lon_order + (random.uniform(-0.02, 0.02))

    distance_km = haversine(lat_order, lon_order, lat_job, lon_job)
    if distance_km < 5:
        score += 3
    elif distance_km < 10:
        score += 2
    elif distance_km < 20:
        score += 1

    return score

def match_orders_to_contractors(orders, contractors):
    today = datetime.now().strftime("%Y-%m-%d")
    assignments = []

    for order in orders:
        best_match = None
        best_score = -1

        for contractor in contractors:
            if len(contractor["activeJobs"]) >= contractor["maxConcurrentJobs"]:
                continue  # Skip overloaded contractors

            score = score_contractor(order, contractor, today)
            if score > best_score:
                best_score = score
                best_match = contractor

        if best_match:
            assignments.append({
                "orderId": order["orderId"],
                "contractorId": best_match["contractorId"],
                "score": best_score
            })
            best_match["activeJobs"].append(order["orderId"])  # Update load

    return assignments


In [17]:
# from pprint import pprint

# Match orders to contractors and print assignments
assignments = match_orders_to_contractors(orders, contractors)
print("Order Assignments:")
print(assignments)

Order Assignments:
[{'orderId': 'c7ebd4be-8dfe-4bc2-a54b-db473b358756', 'contractorId': '13a9a29b-b3eb-4c1d-bf62-d097c4985308', 'score': 10}, {'orderId': 'ba9edb55-ab3c-4701-92e1-7ce47364fdaf', 'contractorId': '1b2a7288-0595-4a6c-becf-26a6703b4656', 'score': 10}, {'orderId': '1cb850c6-efa2-46d1-9962-91f65815df81', 'contractorId': '1b2a7288-0595-4a6c-becf-26a6703b4656', 'score': 10}, {'orderId': '872b9e8b-8838-4c1c-8076-31c6fd17e90d', 'contractorId': '6bc38179-5b68-4f54-b81b-64874cdada5f', 'score': 10}, {'orderId': 'bc331b47-041e-4c8e-80a7-d79d29528412', 'contractorId': 'd58c8847-b4cd-4a19-8684-a63ea568c389', 'score': 10}, {'orderId': '3a3a7a5a-6626-4743-9ba8-ace3cf703f10', 'contractorId': '7951475e-c7a4-4b70-85e9-2a0114d3d7b5', 'score': 10}, {'orderId': 'd7243eda-1139-46ad-a29d-e051997a0090', 'contractorId': '13a9a29b-b3eb-4c1d-bf62-d097c4985308', 'score': 10}, {'orderId': 'b3fd62b1-c026-4852-9e6d-32975316f55b', 'contractorId': '7951475e-c7a4-4b70-85e9-2a0114d3d7b5', 'score': 10}, {'or

In [None]:
import numpy as np
from sklearn.metrics.pairwise import cosine_distances

def recommend_items(user_vector, item_matrix, top_n=5):
    """
    Recommend items based on cosine distance.
    
    Args:
        user_vector (np.array): Feature vector for the user.
        item_matrix (np.array): Matrix where each row is an item feature vector.
        top_n (int): Number of recommendations to return.
        
    Returns:
        indices (list): Indices of the top_n recommended items.
        distances (list): Cosine distances of the recommended items.
    """
    # Compute cosine distances between user and all items
    distances = cosine_distances([user_vector], item_matrix)[0]
    # Get indices of items with smallest distances (most similar)
    recommended_indices = np.argsort(distances)[:top_n]
    return recommended_indices.tolist(), distances[recommended_indices].tolist()