<a href="https://colab.research.google.com/github/2003UJAN/Vehicle-Service-Scheduling/blob/main/Dataset_Generator_Code.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

**Importing Libraries**

In [13]:
import pandas as pd
import random
from datetime import datetime, timedelta

**Define possible values for categorical features**

In [14]:
vehicle_models = ["Car", "SUV", "Van", "Truck", "Bus", "Motorcycle"]
maintenance_history = ["Good", "Average", "Poor"]
fuel_types = ["Diesel", "Petrol", "Electric"]
transmission_types = ["Automatic", "Manual"]
tire_conditions = ["New", "Good", "Worn Out"]
brake_conditions = ["New", "Good", "Worn Out"]
battery_statuses = ["New", "Good", "Weak"]
owner_types = ["First", "Second", "Third"]

**Function to generate random date within the past 5 years**

In [15]:
def random_date(days_range=1825):
    return datetime.today() - timedelta(days=random.randint(0, days_range))

**Generate synthetic data**

In [16]:
data = []
num_records = 50000

for i in range(num_records):
    vehicle = {
        "Vehicle_ID": i + 1,
        "Vehicle_Model": random.choice(vehicle_models),
        "Mileage": random.randint(5000, 200000),  # in km
        "Maintenance_History": random.choice(maintenance_history),
        "Reported_Issues": random.randint(0, 10),
        "Vehicle_Age": random.randint(1, 20),
        "Fuel_Type": random.choice(fuel_types),
        "Transmission_Type": random.choice(transmission_types),
        "Engine_Size": random.randint(1000, 5000),
        "Odometer_Reading": random.randint(1000, 300000),
        "Last_Service_Date": random_date().strftime("%Y-%m-%d"),
        "Warranty_Expiry_Date": random_date(365 * 3).strftime("%Y-%m-%d"),
        "Owner_Type": random.choice(owner_types),
        "Insurance_Premium": random.randint(5000, 50000),
        "Service_History": random.randint(1, 10),
        "Accident_History": random.randint(0, 5),
        "Fuel_Efficiency": round(random.uniform(5, 25), 2),
        "Tire_Condition": random.choice(tire_conditions),
        "Brake_Condition": random.choice(brake_conditions),
        "Battery_Status": random.choice(battery_statuses),
    }

    need_service = 0
    if vehicle["Reported_Issues"] > 5 or vehicle["Maintenance_History"] == "Poor" or vehicle["Accident_History"] > 2:
        need_service = 1

    vehicle["Need_Service"] = need_service

    if need_service:
        if vehicle["Reported_Issues"] >= 8:
            priority = 1
        elif vehicle["Reported_Issues"] >= 5:
            priority = 2
        else:
            priority = 3
    elif vehicle["Maintenance_History"] == "Poor":
        priority = 1
    elif vehicle["Accident_History"] >= 3:
        priority = 2
    else:
        priority = random.randint(3, 5)

    vehicle["Service_Priority"] = priority

    estimated_time = random.uniform(1, 5) if need_service else random.uniform(0.5, 2)
    vehicle["Estimated_Service_Time"] = round(estimated_time, 2)

    data.append(vehicle)

**Convert to DataFrame**

In [17]:
df = pd.DataFrame(data)

**Save to CSV file**

In [18]:
csv_filename = "vehicle_service_data.csv"
df.to_csv(csv_filename, index=False)

print(f"Dataset saved as {csv_filename}")

Dataset saved as vehicle_service_data.csv
