In [2]:
import pandas as pd
import numpy as np
import random
from datetime import datetime, timedelta



 Fixed Dataset 'swiggy_deliveyboy_300.csv' created with 100,000 rows and consistent delivery-person city mapping.


In [None]:
# Constants
NUM_ROWS = 100000
NUM_DELIVERY_PERSONS = 300
CITIES = ['Mumbai', 'Delhi', 'Bangalore', 'Hyderabad', 'Chennai', 'Kolkata', 'Pune', 'Ahmedabad']
START_DATE = datetime(2024, 5, 1)
END_DATE = datetime(2024, 5, 31)



In [None]:
# Helper Functions
def random_datetime(start, end):
    delta = end - start
    int_delta = delta.days * 24 * 60 + delta.seconds // 60
    random_minute = random.randrange(int_delta)
    return start + timedelta(minutes=random_minute)


In [None]:

def generate_order_times(order_time):
    pickup_gap = random.randint(10, 30)
    delivery_gap = random.randint(20, 60)
    pickup_time = order_time + timedelta(minutes=pickup_gap)
    delivery_time = pickup_time + timedelta(minutes=delivery_gap)
    return pickup_time, delivery_time



In [None]:
def generate_rating():
    return random.choices([1, 2, 3, 4, 5], weights=[5, 10, 15, 35, 35])[0]



In [None]:
def generate_order_value():
    if random.random() < 0.10:
        return round(random.uniform(1500, 3000), 2)
    else:
        return round(random.uniform(100, 1500), 2)



In [None]:
# Step 1: Assign one city per delivery person
delivery_persons = [f"D{pid:04d}" for pid in range(1, NUM_DELIVERY_PERSONS + 1)]
delivery_person_city_map = {
    dp_id: random.choice(CITIES) for dp_id in delivery_persons
}



In [None]:
# Step 2: Generate Data
data = []

for i in range(NUM_ROWS):
    order_id = str(random.randint(10**14, 10**15 - 1))  # Random 15-digit Order ID
    delivery_person_id = random.choice(delivery_persons)
    city = delivery_person_city_map[delivery_person_id]  # Get assigned city only
    order_value = generate_order_value()
    order_quantity = random.randint(1, 5)
    order_time = random_datetime(START_DATE, END_DATE)
    pickup_time, delivery_time = generate_order_times(order_time)
    rating = generate_rating()

    data.append([
        order_id,
        delivery_person_id,
        order_value,
        order_quantity,
        city,
        order_time,
        pickup_time,
        delivery_time,
        rating
    ])



In [None]:
# Create Dataframe
df = pd.DataFrame(data, columns=[
    'order_id',
    'delivery_person_id',
    'order_value',
    'order_quantity',
    'city',
    'order_timestamp',
    'pickup_timestamp',
    'delivery_timestamp',
    'order_rating'
])



In [None]:
# Exporting to CSV
df.to_csv('swiggy_deliveyboy_300.csv', index=False)


In [None]:
print(" Fixed Dataset 'swiggy_deliveyboy_300.csv' created with 100,000 rows and consistent delivery-person city mapping.")
