In [1]:
import csv
import random
from datetime import datetime, timedelta

In [2]:
def generate_vehicle_data(vehicle_id, event_created_at):
    latitude = round(random.uniform(34.0, 42.0), 4)  # Random latitude in the US
    longitude = round(random.uniform(-125.0, -70.0), 4)  # Random longitude in the US
    speed = round(random.uniform(50.0, 80.0), 1)  # Random speed between 50 and 80
    engine_status = random.choice(["normal", "check_engine_light_on"])
    fuel_consumption_current = round(random.uniform(5.0, 10.0), 1)
    fuel_consumption_average = round(random.uniform(5.0, 10.0), 1)
    fuel_consumption_unit = "L/100km"
    
    # Random driver behavior
    hard_accelerations = random.randint(0, 5)
    smooth_accelerations = random.randint(10, 20)
    hard_brakes = random.randint(0, 3)
    smooth_brakes = random.randint(5, 15)
    sharp_turns = random.randint(0, 3)
    gentle_turns = random.randint(4, 8)
    
    maintenance_required = random.choice([True, False])

    return [
        vehicle_id,
        event_created_at,
        latitude,
        longitude,
        speed,
        engine_status,
        fuel_consumption_current,
        fuel_consumption_average,
        fuel_consumption_unit,
        hard_accelerations,
        smooth_accelerations,
        hard_brakes,
        smooth_brakes,
        sharp_turns,
        gentle_turns,
        maintenance_required
    ]

In [3]:
def generate_vehicle_info(vehicle_id):
    # Generate a fictitious vehicle model
    models = ["Sedan", "SUV", "Truck", "Hatchback", "Coupe", "Convertible"]
    model = random.choice(models)
    
    # You can expand this to include more fictitious vehicle data if needed
    return {
        "vehicle_id": vehicle_id,
        "model": model,
        "year": random.randint(2015, 2024),  # Random year between 2015 and 2024
        "color": random.choice(["Red", "Blue", "Green", "Black", "White"])  # Random color
    }

In [4]:
def generate_maintenance_records(vehicle_id, make_year):
    records = []
    current_date = datetime.now()
    
    # Generate a random number of services (0 to 5)
    num_services = random.randint(0, 5)
    
    for _ in range(num_services):
        # Service date should be older than one week but not older than the make year
        service_date = current_date - timedelta(days=random.randint(8, 365))
        if service_date.year < make_year:
            continue
        
        next_service_due = service_date + timedelta(days=random.randint(30, 180))  # Service due in 1 to 6 months
        
        records.append({
            "vehicle_id": vehicle_id,
            "service_date": service_date.strftime('%Y-%m-%d'),
            "next_service_due": next_service_due.strftime('%Y-%m-%d'),
            "service_type": random.choice(["Oil Change", "Tire Rotation", "Brake Inspection", "Transmission Check", "General Service"])
        })
        
    return records

In [28]:
num_records = 500000
current_time = datetime.now()
one_week_ago = current_time - timedelta(weeks=1)
records = []
vehicle_info_records = []
maintenance_records = []

# Create a pool of unique vehicle IDs
vehicle_ids = [f"V{random.randint(1000, num_records)}" for _ in range(num_records // 10)]

for i in range(num_records):
    # Generate a random timestamp within the last week
    random_seconds = random.randint(0, 7 * 24 * 60 * 60)  # Random seconds within a week
    timestamp = one_week_ago + timedelta(seconds=random_seconds)
    
    # Randomly select a vehicle ID from the pool, allowing for duplicates
    vehicle_id = random.choice(vehicle_ids)
    
    # Ensure unique vehicle IDs
    if vehicle_id not in [record[0] for record in records]:
        vehicle_info = generate_vehicle_info(vehicle_id)
        records.append(generate_vehicle_data(vehicle_id, timestamp.isoformat() + 'Z'))
        vehicle_info_records.append(vehicle_info)

        # Generate maintenance records based on the make year
        maintenance_records.extend(generate_maintenance_records(vehicle_id, vehicle_info["year"]))

print(f"Number of records created: {len(records)}")

Number of records created: 47535


In [29]:
# Write vehicle telemetry data to CSV. This is what's used to feed Snowpipe Streaming Java client
with open('vehicle_data.csv', 'w', newline='') as csvfile:
    csvwriter = csv.writer(csvfile)
    # Write header
    csvwriter.writerow([
        "vehicle_id", "event_created_at", "latitude", "longitude", "speed", "engine_status",
        "fuel_consumption_current", "fuel_consumption_average", "fuel_consumption_unit",
        "hard_accelerations", "smooth_accelerations", "hard_brakes", "smooth_brakes",
        "sharp_turns", "gentle_turns", "maintenance_required"
    ])
    # Write records
    csvwriter.writerows(records)

In [30]:
# Write vehicle info data to CSV
with open('vehicle_info.csv', 'w', newline='') as csvfile:
    csvwriter = csv.writer(csvfile)
    # Write header
    csvwriter.writerow(["vehicle_id", "model", "make_year", "color"])
    # Write records
    for info in vehicle_info_records:
        csvwriter.writerow([info["vehicle_id"], info["model"], info["year"], info["color"]])

In [31]:
# Write maintenance records to CSV
with open('maintenance_records.csv', 'w', newline='') as csvfile:
    csvwriter = csv.writer(csvfile)
    csvwriter.writerow(["vehicle_id", "service_date", "next_service_due", "service_type"])
    for record in maintenance_records:
        csvwriter.writerow([record["vehicle_id"], record["service_date"], record["next_service_due"], record["service_type"]])