In [1]:
import pandas as pd
import numpy as np
from datetime import datetime, timedelta
import random
def generate_coordinates():
    return round(random.uniform(10.0, 30.0), 6), round(random.uniform(70.0, 90.0), 6)
def generate_fuel_requirement():
    return round(random.uniform(0, 60), 2)
def generate_satisfaction():
    satisfaction_levels = ['Bad', 'Moderate', 'Good', 'Perfect']
    return random.choice(satisfaction_levels)
def generate_weather():
    weather_conditions = ['Clear', 'Misty', 'Cloudy', 'Rainy', 'Sunny', 'Drizzle']
    weights = [0.3, 0.3, 0.1, 0.1, 0.1, 0.1]
    return random.choices(weather_conditions, weights=weights)[0]
def generate_infrastructure():
    infrastructure_conditions = ['Poor', 'Moderate', 'Good']
    weights = [0.2, 0.2, 0.6]
    return random.choices(infrastructure_conditions, weights=weights)[0]
def generate_night_driving(time):
    if time.hour >= 18:  # Assuming 6 PM as the threshold for night driving
        return 'Yes'
    else:
        return 'No'
def generate_truck_numbers():
    truck_numbers = []
    for i in range(1, 101):
        for j in range(1, 21): 
            truck_numbers.append(f"Cluster{i}_Truck{j}")
    return truck_numbers
def get_truck_number(cluster_id):
    cluster_number = int(cluster_id.split('Cluster')[1])
    truck_number = random.randint((cluster_number - 1) * 20 + 1, cluster_number * 20)
    return f"Cluster{cluster_number}_Truck{truck_number}"
def calculate_truck_capacity(df):
    max_fuel_per_truck = df.groupby('Truck_Number')[['MS_Requirement', 'HSD_Requirement', 'LDO_Requirement', 'FO_Requirement', 'LSHS_Requirement', 'SKO_Requirement']].sum().max(axis=1)
    max_fuel_per_truck += 10 
    max_fuel_per_truck = (max_fuel_per_truck // 10) * 10  
    max_fuel_per_truck.name = 'Truck_Capacity'
    return max_fuel_per_truck
truck_numbers = generate_truck_numbers()

data = []
for i in range(18000):
    indent_id = f"INDENT{i+1}"
    cluster_id = f"Cluster{(i % 100) + 1}"  # Cluster ID
    truck_number = get_truck_number(cluster_id)
    date_of_ordering = datetime(2022, 1, 1) + timedelta(days=random.randint(0, 730)) - timedelta(days=3)
    date_of_receiving = date_of_ordering + timedelta(days=3)
    customer_id = f"Customer{i % 2000 + 1}"  # Ensure customer IDs are within range
    fuel_requirements = [generate_fuel_requirement() for _ in range(6)]
    filled_fuel = [min(req, round(random.uniform(0.75, 1.0) * req, 2)) for req in fuel_requirements]  # Generate filled fuel <= requirement
    latitude, longitude = generate_coordinates()
    sealing_status = 'Sealed' if random.random() > 0.5 else 'Not Sealed'
    satisfaction = generate_satisfaction() if sealing_status == 'Not Sealed' else 'Perfect' if all(f == r for f, r in zip(filled_fuel, fuel_requirements)) else 'Good'  # Set satisfaction level based on filled fuel
    entering_time = datetime.combine(date_of_receiving, datetime.min.time()) + timedelta(hours=random.randint(6, 10))
    exiting_time = entering_time + timedelta(hours=random.randint(1, 8))
    entering_storage_time = datetime.combine(date_of_receiving, datetime.min.time()) + timedelta(hours=random.randint(16, 20))
    exiting_storage_time = entering_storage_time + timedelta(hours=random.randint(1, 8))
    traffic = random.randint(1, 3)
    weather = generate_weather()
    infrastructure = generate_infrastructure()
    night_driving = generate_night_driving(entering_storage_time)

    data.append([indent_id, truck_number, date_of_ordering, date_of_receiving, customer_id, cluster_id] + fuel_requirements + [latitude, longitude, sealing_status, satisfaction, entering_time, exiting_time, entering_storage_time, exiting_storage_time, traffic, weather, infrastructure, night_driving])

df = pd.DataFrame(data, columns=['Indent_ID', 'Truck_Number', 'Date_of_Ordering', 'Date_of_Receiving', 'Customer_ID', 'Cluster_ID', 'MS_Requirement', 'HSD_Requirement', 'LDO_Requirement', 'FO_Requirement', 'LSHS_Requirement', 'SKO_Requirement', 'Latitude', 'Longitude', 'Sealing_Status', 'Customer_Satisfaction', 'Entering_Time', 'Exiting_Time', 'Entering_Storage_Time', 'Exiting_Storage_Time', 'Traffic', 'Weather', 'Local_Infrastructure', 'Night_Driving'])
truck_capacity = calculate_truck_capacity(df)
df = df.merge(truck_capacity, left_on='Truck_Number', right_index=True)
ims_df = df[['Indent_ID', 'Truck_Number', 'Date_of_Ordering', 'Customer_ID', 'Cluster_ID', 'MS_Requirement', 'HSD_Requirement', 'LDO_Requirement', 'FO_Requirement', 'LSHS_Requirement', 'SKO_Requirement', 'Latitude', 'Longitude', 'Truck_Capacity']].copy()
tas_df = df[['Indent_ID', 'Truck_Number', 'Customer_ID', 'Cluster_ID', 'MS_Requirement', 'HSD_Requirement', 'LDO_Requirement', 'FO_Requirement', 'LSHS_Requirement', 'SKO_Requirement', 'Latitude', 'Longitude', 'Sealing_Status', 'Customer_Satisfaction', 'Truck_Capacity']].copy()
vts_df = df[['Indent_ID', 'Truck_Number', 'Date_of_Receiving', 'Entering_Time', 'Exiting_Time', 'Customer_ID', 'Cluster_ID', 'Entering_Storage_Time', 'Exiting_Storage_Time', 'Truck_Capacity']].copy()
external_conditions_df = df[['Indent_ID', 'Truck_Number', 'Customer_ID', 'Cluster_ID', 'Date_of_Receiving', 'Traffic', 'Weather', 'Local_Infrastructure', 'Night_Driving', 'Truck_Capacity']].copy()


ims_df.to_csv('IMS_Dataset.csv', index=False)
tas_df.to_csv('TAS_Dataset.csv', index=False)
vts_df.to_csv('VTS_Dataset.csv', index=False)
external_conditions_df.to_csv('External_Conditions_Dataset.csv', index=False)