In [1]:
import pandas as pd
import numpy as np
from datetime import datetime, timedelta

In [2]:
n_rows = 10000
np.random.seed(42) 

### **Tours**

In [3]:
def generate_tour_names(n):
    adjectives = [
        "Amazing", "Beautiful", "Epic", "Grand", "Hidden", "Adventurous", 
        "Mysterious", "Breathtaking", "Majestic", "Enchanted", "Serene", 
        "Legendary", "Untamed", "Timeless", "Stunning", "Vibrant", 
        "Wild", "Daring", "Uncharted", "Timeless", "Glorious", "Mystical", 
        "Glistening", "Radiant", "Exhilarating", "Incredible", "Fantastic"
    ]

    nouns = [
        "Adventure", "Expedition", "Journey", "Experience", "Trail", 
        "Escape", "Discovery", "Odyssey", "Quest", "Voyage", "Saga", 
        "Exploration", "Trek", "Realm", "Encounter", "Ascent", 
        "Conquest", "Horizon", "Path", "Mission", "Excursion", 
        "Venture", "Journey", "Legacy", "Challenge", "Pursuit"
    ]

    return [f"{np.random.choice(adjectives)} {np.random.choice(nouns)}" for _ in range(n)]

In [4]:
def generate_locations(n):
    locations = [
        "Paris", "New York", "Tokyo", "Sydney", "Rome", "London", "Barcelona", 
        "Dubai", "Cancun", "Amsterdam", "Maldives", "Singapore", "Los Angeles", 
        "Rio de Janeiro", "Vancouver", "Istanbul", "Bangkok", "Buenos Aires", 
        "Florence", "Venice", "Santorini", "Kyoto", "Bali", "Prague", "Hawaii", 
        "Maui", "Hong Kong", "Las Vegas", "Orlando", "Mexico City", "Marrakech", 
        "Cairo", "Edinburgh", "Athens", "San Francisco", "Niagara Falls", 
        "Grand Canyon", "Stockholm", "Berlin", "Vienna", "Seoul", "Tulum", 
        "Caribe", "Petra", "Bora Bora", "Machu Picchu"
    ]

    return np.random.choice(locations, n)

In [5]:
data = {
    "tour_name": generate_tour_names(n_rows),
    "location": generate_locations(n_rows),
    "duration_days": np.random.randint(1, 8, n_rows),
    "price_per_person": np.round(np.random.uniform(100, 2000, n_rows), 2),
    "start_date": [datetime.now() + timedelta(days=np.random.randint(1, 365)) for _ in range(n_rows)],
    "max_participants": np.random.randint(5, 50, n_rows)
}

In [6]:
start_dates = data["start_date"]
duration_days = data["duration_days"]
data["end_date"] = [start_date + timedelta(days=int(duration)) for start_date, duration in zip(start_dates, duration_days)]

In [7]:
tours_df = pd.DataFrame(data)

In [8]:
tours_df.to_csv("tours_data.csv", index=False)

### **Users**

In [9]:
def generate_usernames(n):
    prefixes = [
        "cool", "smart", "happy", "fast", "pro", "super", "mega", "neo", "semi", 
        "ninja", "epic", "wild", "legend", "cyber", "atomic", "ultra", "dark", 
        "mighty", "stealth", "xtreme", "rapid", "power", "future", "tech", 
        "brave", "mystic", "swift", "thunder", "supernova", "alpha", "elite", 
        "glitch", "storm", "max", "king", "boss"
    ]

    suffixes = [
        "hero", "master", "genius", "dev", "ninja", "you", "123", "234", "345", 
        "king", "champ", "boss", "guru", "x", "y", "z", "elite", "ace", "pro", 
        "god", "hacker", "wizard", "warrior", "champion", "quest", "titan", 
        "soul", "shooter", "viper", "zero", "one", "savage", "vortex", "gamer", 
        "spark", "force", "snipe", "blaze", "storm"
    ]

    return [f"{np.random.choice(prefixes)}_{np.random.choice(suffixes)}" for _ in range(n)]

In [10]:
def generate_real_names(n):
    first_names = [
        "Ali", "Eli", "Diego", "Giselle", "Gissele", "Janetzy", "Albaro", 
        "Salome", "Tristan", "Sofia", "Karla", 
        "Mateo", "Valeria", "Camila", "Andres", "Mariana", 
        "Lucia", "Fernando", "Emiliano", "Isabella", "Javier", 
        "Ana", "Dario", "Renata", "Hugo", "Victoria", 
        "Oliver", "Gabriel", "Bianca", "Samuel", "Daniela", 
        "Sebastian", "Claudia", "Nicolas", "Elena", "Leandro", 
        "Alma", "Ivanna", "Ricardo", "Noah", "Manuel", "Adriana"
    ]       

    last_names = [
        "Tino", "Rios", "Alvarado", "Nieto", "Gomez", "Aguilar", "Flores", 
        "Hernandez", "Lopez", "Martinez", "Perez", "Rodriguez", 
        "Sanchez", "Vargas", "Castro", "Ortiz", "Ruiz", 
        "Ramirez", "Cruz", "Torres", "Morales", "Vega", 
        "Gutierrez", "Mendoza", "Jimenez", "Navarro", "Garcia", 
        "Diaz", "Reyes", "Silva", "Campos", "Valencia", 
        "Fuentes", "Cabrera", "Pineda", "Mejia", "Acosta", "Orozco",
        "Romero", "Chavez", "Serrano", "Padilla", "Delgado"
    ]

    return [f"{np.random.choice(first_names)} {np.random.choice(last_names)}" for _ in range(n)]

In [11]:
def generate_emails(usernames):
    domains = ["gmail.com", "yahoo.com", "outlook.com", "live.com"]
    return [f"{username}@{np.random.choice(domains)}" for username in usernames]

In [12]:
def generate_states(n):
    states = [
        "California", "Texas", "New York", "Florida", "Illinois", "Nevada", 
        "Oregon", "Arizona", "Colorado", "Washington", "Georgia", "North Carolina", 
        "Michigan", "Ohio", "Pennsylvania", "Virginia", "Tennessee", "Massachusetts", 
        "Indiana", "Missouri", "Wisconsin", "North Dakota", "South Carolina", 
        "Alabama", "Louisiana", "Kentucky", "Maine", "New Jersey", 
        "Minnesota", "Connecticut", "Iowa", "Kansas", "Arkansas", 
        "Utah", "West Virginia", "Hawaii", "Idaho", "Montana", 
        "Wyoming", "Delaware", "Alaska", "Nebraska", "Rhode Island"
    ]

    return np.random.choice(states, n)

In [13]:
usernames = generate_usernames(n_rows)
data = {
    "username": usernames,
    "age": np.random.randint(18, 70, n_rows),
    "state": generate_states(n_rows),
    "real_name": generate_real_names(n_rows),
    "email": generate_emails(usernames),
}

In [14]:
users_df = pd.DataFrame(data)

In [15]:
users_df.to_csv("users_data.csv", index=False)

### **Tours and Users**

In [16]:
tours_users_df = pd.concat([tours_df, users_df], axis=1)

In [17]:
tours_users_df.to_csv("tours_users_df.csv", index=False)