In [1]:
import sqlite3
import pandas as pd
from faker import Faker
import random
from datetime import datetime, timedelta


In [2]:
# Initialize Faker for generating synthetic data
faker = Faker()

# Number of records for each table
NUM_CUSTOMERS = 50
NUM_RESTAURANTS = 20
NUM_ORDERS = 100
NUM_DELIVERIES = 100
NUM_DELIVERY_PERSONS = 15

In [3]:
# Function to generate Customers dataset
def generate_customers(num_records):
    customers = []
    for _ in range(num_records):
        customers.append({
            "customer_id": _ + 1,
            "name": faker.name(),
            "email": faker.email(),
            "phone": faker.phone_number(),
            "location": faker.address(),
            "signup_date": faker.date_between(start_date="-2y", end_date="today"),
            "is_premium": random.choice([0, 1]),
            "preferred_cuisine": random.choice(["Indian", "Chinese", "Italian", "Mexican", "Thai"]),
            "total_orders": random.randint(0, 50),
            "average_rating": round(random.uniform(3.0, 5.0), 1),
        })
    return pd.DataFrame(customers)

In [4]:
# Function to generate Restaurants dataset
def generate_restaurants(num_records):
    restaurants = []
    for _ in range(num_records):
        restaurants.append({
            "restaurant_id": _ + 1,
            "name": faker.company(),
            "cuisine_type": random.choice(["Indian", "Chinese", "Italian", "Mexican", "Thai"]),
            "location": faker.address(),
            "owner_name": faker.name(),
            "average_delivery_time": random.randint(20, 45),
            "contact_number": faker.phone_number(),
            "rating": round(random.uniform(3.0, 5.0), 1),
            "total_orders": random.randint(0, 1000),
            "is_active": random.choice([0, 1]),
        })
    return pd.DataFrame(restaurants)

In [5]:
# Function to generate Orders dataset
def generate_orders(num_records, num_customers, num_restaurants):
    orders = []
    for _ in range(num_records):
        orders.append({
            "order_id": _ + 1,
            "customer_id": random.randint(1, num_customers),
            "restaurant_id": random.randint(1, num_restaurants),
            "order_date": faker.date_time_between(start_date="-1y", end_date="now"),
            "delivery_time": faker.date_time_between(start_date="now", end_date="+2h"),
            "status": random.choice(["Pending", "Delivered", "Cancelled"]),
            "total_amount": round(random.uniform(100.0, 2000.0), 2),
            "payment_mode": random.choice(["Credit Card", "Cash", "UPI", "Wallet"]),
            "discount_applied": round(random.uniform(0.0, 500.0), 2),
            "feedback_rating": round(random.uniform(3.0, 5.0), 1),
        })
    return pd.DataFrame(orders)

In [6]:
# Function to generate Deliveries dataset
def generate_deliveries(num_records, num_orders):
    deliveries = []
    for _ in range(num_records):
        deliveries.append({
            "delivery_id": _ + 1,
            "order_id": random.randint(1, num_orders),
            "delivery_status": random.choice(["On the way", "Delivered", "Failed"]),
            "distance": round(random.uniform(1.0, 20.0), 2),
            "delivery_time": random.randint(20, 120),
            "estimated_time": random.randint(15, 60),
            "delivery_fee": round(random.uniform(10.0, 100.0), 2),
            "vehicle_type": random.choice(["Bike", "Car", "Scooter"]),
        })
    return pd.DataFrame(deliveries)


In [7]:
# Function to generate Delivery Persons dataset
def generate_delivery_persons(num_records):
    delivery_persons = []
    for _ in range(num_records):
        delivery_persons.append({
            "delivery_person_id": _ + 1,
            "name": faker.name(),
            "contact_number": faker.phone_number(),
            "vehicle_type": random.choice(["Bike", "Car", "Scooter"]),
            "total_deliveries": random.randint(0, 500),
            "average_rating": round(random.uniform(3.0, 5.0), 1),
            "location": faker.city(),
        })
    return pd.DataFrame(delivery_persons)


In [8]:
# Generate datasets
customers_df = generate_customers(NUM_CUSTOMERS)
restaurants_df = generate_restaurants(NUM_RESTAURANTS)
orders_df = generate_orders(NUM_ORDERS, NUM_CUSTOMERS, NUM_RESTAURANTS)
deliveries_df = generate_deliveries(NUM_DELIVERIES, NUM_ORDERS)
delivery_persons_df = generate_delivery_persons(NUM_DELIVERY_PERSONS)

In [10]:
customers_df.to_csv('customers.csv', index=False)
restaurants_df.to_csv('restaurants.csv', index=False)
orders_df.to_csv('orders.csv', index=False)
deliveries_df.to_csv('deliveries.csv', index=False)
delivery_persons_df.to_csv('delivery_persons.csv', index=False)


In [16]:
deliveries_df.shape

(100, 8)