In [1]:
from faker import Faker
import csv
import random
from datetime import datetime, timedelta

# Initialize Faker to generate fake data
fake = Faker()

# Function to generate random date and time
def random_date(start_date, end_date):
    delta = end_date - start_date
    random_days = random.randint(0, delta.days)
    random_hours = random.randint(0, 23)
    random_minutes = random.randint(0, 59)
    random_seconds = random.randint(0, 59)
    return start_date + timedelta(days=random_days, hours=random_hours, minutes=random_minutes, seconds=random_seconds)

# Tamil Nadu city names
tamil_nadu_cities = [
    'Chennai', 'Coimbatore', 'Madurai', 'Tiruchirappalli', 'Salem', 'Erode',
    'Tirunelveli', 'Vellore', 'Thoothukudi', 'Thanjavur', 'Dindigul', 'Ranipet', 'Nagercoil',
    'Cuddalore', 'Kancheepuram', 'Karur', 'Neyveli', 'Kumbakonam', 'Rajapalayam', 'Pollachi'
]

# Different bus operators
bus_operators = [
    'KPN Travels', 'Parveen Travels', 'SRM Transports', 'National Travels', 'Sharma Transports',
    'RKT Tours and Travels', 'Rathimeena Travels', 'MJT Travels', 'Thirumal Alaghu Travels', 'Ramu Travels', 
    'SRS Travels', 'Bharathi Travels', 'Universal Travels', 'ABT X Travels', 'Hindusthan Travels', 'Pandian Roadways', 
    'Kesineni Travels'
]

# Generating random data for the dataset
bus_ticket_data = []

def is_day_or_night(hour):
    return 'Day' if 6 <= hour < 18 else 'Night'

# Generating data for different months and years
for i in range(1000):
    date_time = random_date(datetime(2016, 1, 1), datetime(2024, 12, 31))
    route_cities = random.sample(tamil_nadu_cities, random.randint(2, 5))
    route_info = ', '.join(route_cities)
    popular_destinations = ', '.join(random.sample(tamil_nadu_cities, random.randint(2, 5)))
    ticket_price = round(random.uniform(50.0, 300.0), 2)
    entry = {
        'Date_Time': date_time.strftime('%Y-%m-%d %H:%M:%S'),
        'Year': date_time.year,  # Separate Year column
        'Month': date_time.month,  # Separate Month column
        'Day': date_time.day,  # Separate Day column
        'Hour': date_time.hour,  # Separate Hour column
        'Minute': date_time.minute,  # Separate Minute column
        'Day_Night': is_day_or_night(date_time.hour),  # New column for day or night
        'Departure_Location': random.choice(tamil_nadu_cities),
        'Arrival_Location': random.choice(tamil_nadu_cities),
        'Number_of_Passengers': random.randint(1, 4),
        'Seat_Selection': f'{random.randint(1, 30)}, {random.randint(1, 30)}',
        'Bus_Type': random.choice([
            'Volvo AC', 'Mercedes Benz AC', 'Scania AC', 'Regular Non-AC', 'Semi-Sleeper Non-AC',
            'AC Sleeper', 'Non-AC Sleeper', 'Luxury AC/Non-AC', 'Double Decker AC/Non-AC',
            'Volvo Multi-Axle AC', 'Party Buses'
        ]),
        'Customer_ID': str(random.randint(100000, 999999)),
        'Contact_Email': f'customer{random.randint(1, 100)}@example.com',
        'Contact_Phone': f'+1234567{random.randint(100, 999)}89',
        'Ticket_Price': ticket_price,
        'Payment_Method': random.choice(['Credit Card', 'Debit Card', 'Net Banking']),
        'Transaction_ID': str(random.randint(1000000000, 9999999999)),
        'Bus_Operator': random.choice(bus_operators),
        'Bus_ID': f'BUS{random.randint(100, 999)}',
        'Departure_Time': random_date(datetime(2023, 1, 1), datetime(2024, 12, 31)).strftime('%Y-%m-%d %H:%M:%S'),
        'Route_Info': route_info,
        'Coupon_Code': f'DISCOUNT{random.randint(1, 20)}',
        'Discount_Percentage': random.randint(5, 20),
        'Cancellation_Time': random_date(datetime(2023, 1, 1), datetime(2024, 12, 31)).strftime('%Y-%m-%d %H:%M:%S'),
        'Refund_Amount': round(random.uniform(10.0, 50.0), 2),
        'Review_Text': f'Experience was {random.choice(["good", "average", "excellent"])}!',
        'Rating': round(random.uniform(3.0, 5.0), 1),
        'Popular_Destinations': popular_destinations,
        'Regional_Trends': f'Region {random.randint(1, 5)}: Higher booking volume in this region',
        'Bus_Occupancy': round(random.uniform(0.5, 1.0), 2),
        'Punctuality': f'On-time performance is {random.randint(70, 100)}%',
        'Peak_Travel_Seasons': f'Season {random.randint(1, 7)}, Season {random.randint(1, 7)}',
        'Gender': random.choice(['Male', 'Female']),
        'Device_Type': random.choice(['Mobile', 'Desktop', 'Tablet']),  # New column for device type
        'Operating_System': random.choice(['iOS', 'Android', 'Windows', 'macOS']),  # OS information
        'Browser': random.choice(['Chrome', 'Firefox', 'Safari', 'Edge', 'Opera'])  # Browser information
    }
    bus_ticket_data.append(entry)

# Saving data to a CSV file
csv_file = 'final_update_BUS.csv'

with open(csv_file, 'w', newline='', encoding='utf-8') as file:
    writer = csv.DictWriter(file, fieldnames=bus_ticket_data[0].keys())
    writer.writeheader()
    writer.writerows(bus_ticket_data)

print(f"Data has been written to '{csv_file}' successfully!")


Data has been written to 'final_update_BUS.csv' successfully!
