In [1]:
from faker import Faker
import csv
import random
from datetime import datetime, timedelta

# Initialize Faker to generate fake data
fake = Faker()

# Function to generate random date and time
def random_date(start_date, end_date):
    delta = end_date - start_date
    random_days = random.randint(0, delta.days)
    random_hours = random.randint(0, 23)
    random_minutes = random.randint(0, 59)
    random_seconds = random.randint(0, 59)
    return start_date + timedelta(days=random_days, hours=random_hours, minutes=random_minutes, seconds=random_seconds)

# Tamil Nadu city names
tamil_nadu_cities = [
    'Chennai', 'Coimbatore', 'Madurai', 'Tiruchirappalli', 'Salem', 'Erode',
    'Tirunelveli', 'Vellore', 'Thoothukudi', 'Thanjavur', 'Dindigul', 'Ranipet', 'Nagercoil',
    'Cuddalore', 'Kancheepuram', 'Karur', 'Neyveli', 'Kumbakonam', 'Rajapalayam', 'Pollachi'
]

# Define bus operators and route information
bus_operators = [
    'KPN Travels', 'Parveen Travels', 'SRM Transports', 'National Travels', 'Sharma Transports',
    'RKT Tours and Travels', 'Rathimeena Travels', 'MJT Travels', 'Thirumal Alaghu Travels', 'Ramu Travels', 
    'SRS Travels', 'Bharathi Travels', 'Universal Travels', 'ABT X Travels', 'Hindusthan Travels', 'Pandian Roadways', 
    'Kesineni Travels'
]  # Example list, modify as needed

# Generating random data for the dataset
bus_ticket_data = []

def get_time_of_day(booking_time):
    hour = booking_time.hour
    return 'Day' if 6 <= hour < 18 else 'Night'

# Generating data for different months and years
for i in range(1000):
    # Get or initialize customer's total booking count
    total_booking_count = random.randint(1, 200)
    
    # Calculate seat selection based on the number of passengers
    number_of_passengers = random.randint(1, 4)
    seat_selection = random.sample(range(1, 31), number_of_passengers)

    booking_time = random_date(datetime(2018, 1, 1), datetime(2024, 12, 31))
    arrival_time = booking_time + timedelta(days=random.randint(5, 10))
    departure_time = arrival_time + timedelta(days=random.randint(1, 5))
    cancellation_time = random_date(booking_time, arrival_time) if random.choice([True, False]) else None

    route_cities = random.sample(tamil_nadu_cities, random.randint(2, 5))
    route_info = ', '.join(route_cities)
    ticket_price = round(random.uniform(50.0, 300.0), 2)
    refund_amount = ticket_price if cancellation_time else None
    
    # Extracting booking time details
    booking_year = booking_time.year
    booking_month = booking_time.strftime('%B')  # Full month name (e.g., January)
    booking_hours = booking_time.hour
    booking_minutes = booking_time.minute
    
    day_or_night = get_time_of_day(booking_time)
    
    entry = {
        'Booking_Time': booking_time.strftime('%Y-%m-%d'),
        'Booking_Year': booking_year,
        'Booking_Month': booking_month,
        'Booking_Hours': booking_hours,
        'Booking_Minutes': booking_minutes,
        'Day_or_Night': day_or_night,
        'Cancellation_Time': cancellation_time.strftime('%Y-%m-%d') if cancellation_time else 'nil',
        'Arrival_Time': None if cancellation_time else arrival_time.strftime('%Y-%m-%d'),
        'Departure_Time': None if cancellation_time else departure_time.strftime('%Y-%m-%d'),
        'Punctuality': None if cancellation_time else f'{random.randint(70, 95)}%',  # Placeholder value
        'Number_of_Passengers': number_of_passengers,
        'Seat_Selection': ', '.join(str(s) for s in seat_selection),
        'Departure_Location': random.choice(tamil_nadu_cities),
        'Arrival_Location': random.choice(tamil_nadu_cities),
        'Ticket_Price': ticket_price,
        'Refund_Amount': refund_amount,
        'Customer_Age': random.randint(18, 70),
        'Customer_Email': fake.email(),
        'Bus_Type': random.choice([
            'Volvo AC', 'Mercedes Benz AC', 'Scania AC', 'Regular Non-AC', 'Semi-Sleeper Non-AC',
            'AC Sleeper', 'Non-AC Sleeper', 'Luxury AC/Non-AC', 'Double Decker AC/Non-AC',
            'Volvo Multi-Axle AC', 'Party Buses'
        ]),
        'Contact_Phone': f'+1234567{random.randint(100, 999)}89',
        'Payment_Method': random.choice(['Credit Card', 'Debit Card', 'Net Banking']),
        'Transaction_ID': str(random.randint(1000000000, 9999999999)),
        'Bus_Operator': random.choice(bus_operators),
        'Bus_ID': f'BUS{random.randint(100, 999)}',
        'Route_Info': route_info,
        'Coupon_Code': f'DISCOUNT{random.randint(1, 20)}',
        'Discount_Percentage': random.randint(5, 20),
        'Review_Text': f'Experience was {random.choice(["good", "average", "excellent"])}!',
        'Rating': round(random.uniform(3.0, 5.0), 1),
        'Gender': random.choice(['Male', 'Female']),
        'Device_Type': random.choice(['Mobile', 'Desktop', 'Tablet']),
        'Operating_System': 'iOS' if random.choice(['Mobile', 'Desktop', 'Tablet']) == 'Mobile' else 'Android' if random.choice(['Mobile', 'Desktop', 'Tablet']) == 'Tablet' else random.choice(['Windows', 'macOS']),
        'Browser': random.choice(['Chrome', 'Firefox', 'Safari', 'Edge', 'Opera']),  # Browser information
        'Total_Booking_Count': total_booking_count
    }
    
    # Extracting booking year and month
    entry['Booking_Year'] = booking_time.year
    entry['Booking_Month'] = booking_time.strftime('%B')  # Full month name (e.g., January)
    
    bus_ticket_data.append(entry)

# Saving data to a CSV file
csv_file = 'merged_bus_ticket_dat.csv'

with open(csv_file, 'w', newline='', encoding='utf-8') as file:
    writer = csv.DictWriter(file, fieldnames=bus_ticket_data[0].keys())
    writer.writeheader()
    writer.writerows(bus_ticket_data)

print(f"Data has been written to '{csv_file}' successfully!")


Data has been written to 'merged_bus_ticket_dat.csv' successfully!
