In [39]:
from faker import Faker
import random
import csv

fake = Faker()

# Sanitize text to ensure UTF-8 compatibility
def sanitize_text(text):
    return text.encode('utf-8', errors='ignore').decode('utf-8')

# Generate Users
num_users = 50
users = []

for user_id in range(1, num_users + 1):
    users.append({
        "user_id": user_id,
        "name": sanitize_text(fake.name()),
        "username": sanitize_text(fake.user_name()),
        "email": sanitize_text(fake.email()),
        "bio": sanitize_text(fake.sentence(nb_words=10))
    })

# Generate Posts
num_posts = 200
posts = []

for post_id in range(1, num_posts + 1):
    posts.append({
        "post_id": post_id,
        "user_id": random.randint(1, num_users),
        "content": sanitize_text(fake.text(max_nb_chars=200)),
        "timestamp": fake.date_time_this_year().strftime("%Y-%m-%d %H:%M:%S")
    })

# Simulate Likes
num_likes = 500
likes = []

for like_id in range(1, num_likes + 1):
    likes.append({
        "like_id": like_id,
        "user_id": random.randint(1, num_users),
        "post_id": random.randint(1, num_posts)
    })

# Simulate Comments
num_comments = 300
comments = []

for comment_id in range(1, num_comments + 1):
    comments.append({
        "comment_id": comment_id,
        "user_id": random.randint(1, num_users),
        "post_id": random.randint(1, num_posts),
        "comment_text": sanitize_text(fake.sentence(nb_words=15)),
        "timestamp": fake.date_time_this_year().strftime("%Y-%m-%d %H:%M:%S")
    })

# Save Users to CSV
with open("users.csv", mode="w", newline="", encoding="utf-8") as file:
    writer = csv.DictWriter(file, fieldnames=["user_id", "name", "username", "email", "bio"])
    writer.writeheader()
    writer.writerows(users)

# Save Posts to CSV
with open("posts.csv", mode="w", newline="", encoding="utf-8") as file:
    writer = csv.DictWriter(file, fieldnames=["post_id", "user_id", "content", "timestamp"])
    writer.writeheader()
    writer.writerows(posts)

# Save Likes to CSV
with open("likes.csv", mode="w", newline="", encoding="utf-8") as file:
    writer = csv.DictWriter(file, fieldnames=["like_id", "user_id", "post_id"])
    writer.writeheader()
    writer.writerows(likes)

# Save Comments to CSV
with open("comments.csv", mode="w", newline="", encoding="utf-8") as file:
    writer = csv.DictWriter(file, fieldnames=["comment_id", "user_id", "post_id", "comment_text", "timestamp"])
    writer.writeheader()
    writer.writerows(comments)

print("Social media activity dataset generated and saved to CSV files.")


Social media activity dataset generated and saved to CSV files.
