In [1]:
import firebase_admin
from firebase_admin import firestore, credentials
from google.cloud.firestore import SERVER_TIMESTAMP

creds = credentials.Certificate("../creds/firebase_key.json")

try:
    firebase_admin.initialize_app(creds)
except ValueError:
    pass

db = firestore.client()

collections = db.collections()


# Create sample users with varying fields
users_ref = db.collection("users")

# Delete all users in the users collection
# users = users_ref.get()
# for user in users:
#     user.reference.delete()


# Query first 10 documents to understand schema
first_10_users = users_ref.limit(10).get()
print("Analyzing schema from first 10 documents...")

# Collect all unique field names
fields = set()
for user in first_10_users:
    fields.update(user.to_dict().keys())

print(f"Found fields: {sorted(list(fields))}")

import random
from faker import Faker

fake = Faker()

# Create sample data
sample_users = []

# Define common interests for more realistic data
interests_pool = [
    "hiking",
    "reading",
    "cooking",
    "photography",
    "gaming",
    "traveling",
    "music",
    "art",
    "sports",
    "technology",
    "gardening",
    "yoga",
    "movies",
    "dancing",
    "writing",
]

# Focus states with their approximate population weights
target_states = {
    "CA": 30,  # More entries for California
    "NY": 15,
    "TX": 20,
    "FL": 15,
    "WA": 8,
    "PA": 7,
    "GA": 5,
}

# Generate 100 users
for _ in range(100):
    # Weight the state selection based on target_states
    state = random.choices(
        list(target_states.keys()), weights=list(target_states.values())
    )[0]

    # Generate 1-4 random interests
    num_interests = random.randint(1, 4)
    interests = random.sample(interests_pool, num_interests)

    user = {
        "name": fake.name(),
        "age": random.randint(18, 80),
        "state": state,
        "interests": interests,
        "timestamp": SERVER_TIMESTAMP,
    }

    sample_users.append(user)

print(f"Generated {len(sample_users)} users with the following state distribution:")
state_counts = {}
for user in sample_users:
    state_counts[user["state"]] = state_counts.get(user["state"], 0) + 1
for state, count in state_counts.items():
    print(f"{state}: {count} users")

# Add users to Firestore
for user in sample_users:
    users_ref.add(user)

print(f"Added {len(sample_users)} sample users to Firestore")

Analyzing schema from first 10 documents...
Found fields: ['age', 'interests', 'name', 'state']
Generated 100 users with the following state distribution:
TX: 15 users
FL: 20 users
NY: 17 users
CA: 33 users
WA: 6 users
GA: 5 users
PA: 4 users
Added 100 sample users to Firestore


In [None]:
# Define new home/decor related interests
home_interests = [
    "Interior Design",
    "Home Renovation",
    "DIY Projects",
    "Standing Desk",
    "Gaming Chair",
    "White Board",
    "Spring Mattress",
    "Zero Gravity Mattress",
    "Wooden Bed Frame",
    "HD Monitor",
    "Large TV",
    "Chandelier",
    "Smart Home Tech",
    "Home Organization",
    "Modern Decor",
    "Wall Art",
    "Home Storage",
    "Sustainable Living",
    "Ergonomic Office Chair",
    "Mechanical Keyboard",
    "Dual Monitor Setup",
    "LED Strip Lighting",
    "Floating Shelves",
    "Bean Bag Chair",
    "Air Purifier",
    "Desk Lamp",
    "Computer Desk",
    "Bookshelf",
    "Room Divider",
    "Cable Management",
    "Smart Lighting",
    "Minimalist Decor",
    "Office Plants",
    "Desk Organizer",
    "Wireless Charger",
    "Monitor Stand",
    "Ergonomic Mouse",
    "Noise Cancelling Panels",
    "Filing Cabinet",
    "Laptop Stand",
    "USB Hub",
    "Desk Mat",
    "Webcam",
    "Microphone Stand",
    "Ring Light",
    "Headphone Stand",
    "Footrest",
    "Anti-Fatigue Mat",
    "Window Blinds",
    "Area Rug",
    "Coffee Table",
    "Side Table",
    "Coffee Maker",
    "Espresso Machine",
    "Kitchen Island",
    "Kitchen Countertop",
    "Kitchen Cabinet",
    "Kitchen Sink",
    "Kitchen Faucet",
    "Kitchen Appliances",
    "Kitchen Accessories",
    "Kitchen Decor",
    "PlayStation 5",
    "Xbox Series X",
    "Gaming Controller",
    "Sun Room",
    "Home Theater System",
    "Projector Screen",
    "Surround Sound",
    "Love Seat",
    "Gaming Chair",
    "Entertainment Center",
    "Media Console",
    "Gaming Desk",
    "Game Storage",
    "Reclining Sofa",
    "Blackout Curtains",
    "Soundbar",
    "TV Mount",
    "Gaming Accessories",
    "Movie Posters",
    "Ambient Lighting",
    "Nintendo Switch",
    "Gaming Headset",
    "Stream Deck",
    "RGB Lighting",
    "Gaming Monitor",
    "Retro Gaming Setup",
    "Board Game Table",
    "Card Game Storage",
    "Reading Nook",
    "Accent Chair",
    "Ottoman",
    "Floor Cushions",
    "Throw Blankets",
    "Indoor Plants",
    "Wall Sconces",
    "Ceiling Fan",
    "Window Seat",
    "Accent Table",
    "Magazine Rack",
    "Record Player",
]

# Get all users
users = users_ref.get()

# Update each user's interests
for user in users:
    # Generate 1-4 random home-related interests
    num_interests = min(int(random.expovariate(1 / 2)) + 1, len(home_interests))
    new_interests = random.sample(home_interests, num_interests)

    # Update the user document
    user.reference.update({"interests": new_interests})

print("Updated all users with home/decor related interests")

# Add more diverse users
new_users = [
    {"name": "Rajesh Kumar", "email": "rajesh.k@email.com"},
    {"name": "Wei Chen", "email": "wei.chen@email.com"},
    {"name": "Ahmad Abdullah", "email": "ahmad.a@email.com"},
    {"name": "Jamal Williams", "email": "jamal.w@email.com"},
    {"name": "Kwame Mensah", "email": "kwame.m@email.com"},
    {"name": "Dmitri Volkov", "email": "dmitri.v@email.com"},
    {"name": "Lars Johansson", "email": "lars.j@email.com"},
    {"name": "Nguyen Van Minh", "email": "nguyen.m@email.com"},
    {"name": "Juan Carlos Rodriguez", "email": "juan.r@email.com"},
    {"name": "Marcus Santos", "email": "marcus.s@email.com"},
    {"name": "Ibrahim Ahmed", "email": "ibrahim.a@email.com"},
    {"name": "Li Wei", "email": "li.wei@email.com"},
    {"name": "Anton Kovac", "email": "anton.k@email.com"},
    {"name": "Arjun Patel", "email": "arjun.p@email.com"},
    {"name": "Miguel Hernandez", "email": "miguel.h@email.com"},
    {"name": "Budi Santoso", "email": "budi.s@email.com"},
    {"name": "Carlo De Leon", "email": "carlo.d@email.com"},
    {"name": "Klaus Weber", "email": "klaus.w@email.com"},
    {"name": "Andre Baptiste", "email": "andre.b@email.com"},
    {"name": "Omar Hassan", "email": "omar.h@email.com"},
    {"name": "Vikram Singh", "email": "vikram.s@email.com"},
    {"name": "Zhang Wei", "email": "zhang.w@email.com"},
    {"name": "Mohammed Al-Sayed", "email": "mohammed.a@email.com"},
    {"name": "Kofi Adebayo", "email": "kofi.a@email.com"},
    {"name": "Boris Petrov", "email": "boris.p@email.com"},
    {"name": "Henrik Nielsen", "email": "henrik.n@email.com"},
    {"name": "Tran Van Duc", "email": "tran.d@email.com"},
    {"name": "Rahul Sharma", "email": "rahul.s@email.com"},
    {"name": "Liu Yang", "email": "liu.y@email.com"},
    {"name": "Yusuf Rahman", "email": "yusuf.r@email.com"},
    {"name": "Malik Johnson", "email": "malik.j@email.com"},
    {"name": "Oluwaseun Okonkwo", "email": "oluwaseun.o@email.com"},
    {"name": "Gustav Lindberg", "email": "gustav.l@email.com"},
    {"name": "Reza Ahmadi", "email": "reza.a@email.com"},
    {"name": "Jin-ho Park", "email": "jinho.p@email.com"},
    {"name": "Marco Rizal", "email": "marco.r@email.com"},
    {"name": "Franz Mueller", "email": "franz.m@email.com"},
    {"name": "Desmond Clarke", "email": "desmond.c@email.com"},
    {"name": "Hassan Mahmoud", "email": "hassan.m@email.com"},
    {"name": "Sanjay Gupta", "email": "sanjay.g@email.com"},
    # Additional Indian names
    {"name": "Aditya Krishnan", "email": "aditya.k@email.com"},
    {"name": "Pranav Mehta", "email": "pranav.m@email.com"},
    {"name": "Rohit Verma", "email": "rohit.v@email.com"},
    {"name": "Suresh Reddy", "email": "suresh.r@email.com"},
    {"name": "Amit Shah", "email": "amit.s@email.com"},
    {"name": "Deepak Chopra", "email": "deepak.c@email.com"},
    {"name": "Karthik Raman", "email": "karthik.r@email.com"},
    {"name": "Nikhil Menon", "email": "nikhil.m@email.com"},
    {"name": "Vivek Malhotra", "email": "vivek.m@email.com"},
    {"name": "Ramesh Iyer", "email": "ramesh.i@email.com"},
    # Additional Chinese names
    {"name": "Wang Fei", "email": "wang.f@email.com"},
    {"name": "Chen Xiuying", "email": "chen.x@email.com"},
    {"name": "Sun Yiping", "email": "sun.y@email.com"},
    {"name": "Zhao Ming", "email": "zhao.m@email.com"},
    {"name": "Wu Jianping", "email": "wu.j@email.com"},
    {"name": "Lin Baozhai", "email": "lin.b@email.com"},
    {"name": "Huang Wei", "email": "huang.w@email.com"},
    {"name": "Guo Xiaoming", "email": "guo.x@email.com"},
    {"name": "Yu Zhihao", "email": "yu.z@email.com"},
    {"name": "Zhou Yong", "email": "zhou.y@email.com"},
]

# Add new users to Firestore
for user_data in new_users:
    # Generate 1-4 random interests using same distribution
    num_interests = min(int(random.expovariate(1 / 2)) + 1, len(home_interests))
    interests = random.sample(home_interests, num_interests)

    # Add interests to user data
    user_data["interests"] = interests

    # Add to Firestore
    users_ref.add(user_data)

print("Added diverse set of new users with randomized interests")


Updated all users with home/decor related interests
