In [None]:
import pandas as pd
from faker import Faker
import random
from datetime import timedelta

# Initialize the Faker instance
fake = Faker()

# Define the user designations
designations = [
    'Junior Developer',
    'Developer',
    'Senior Developer',
    'Lead Developer',
    'Manager',
    'Director'
]

# Mapping of training codes based on different fields
training_field_codes = {
    'Full Stack Development': 'FS',
    'Data Science': 'DS',
    'Data Engineering': 'DE',
    'UI/UX': 'UI',
    'Software Testing': 'ST'
}

def generate_fake_users(num_users):
    users = []
    for _ in range(num_users):
        role = 'trainer' if random.randint(1, 3) == 1 else 'user'  # 1 out of 3 users will be trainers
        designation_index = random.randint(3, 5) if role == 'trainer' else random.randint(0, 2)

        user = {
            'username': fake.name(),  # Changed to name for more realism
            'email': fake.email(),
            'password': fake.password(),
            'role': role,
            'designation': designations[designation_index],
        }
        users.append(user)

    print(f"{num_users} fake users created.")
    return users

def generate_fake_training_sessions(num_sessions, users):
    training_sessions = []
    
    for i in range(num_sessions):
        # Randomly choose a trainer from the users list
        trainer = random.choice([user for user in users if user['role'] == 'trainer'])

        # Randomly choose a training field and assign the training code
        field = random.choice(list(training_field_codes.keys()))
        training_code = f"{training_field_codes[field]}-{str(i + 1).zfill(3)}"  # FS-001, DS-001, etc.

        start_date = fake.date_time_this_year()  # Generate start date
        end_date = start_date + timedelta(days=random.randint(1, 30))  # Set end date 1-30 days later

        # Generating participants (1 to 3 users randomly selected)
        participants = []
        for _ in range(random.randint(1, 3)):
            user_participant = random.choice([user for user in users if user['role'] == 'user'])
            participant = {
                'user': user_participant['username'],  # Placeholder for ObjectId; replace with the actual ObjectId later
                'hackerRankScore': round(random.uniform(0, 10), 2),  # Random score between 0 and 10
                'assessmentScore': round(random.uniform(0, 10), 2),
                'performance': round(random.uniform(0, 10), 2),
                'communication': round(random.uniform(0, 10), 2),
                'remarks': fake.sentence()
            }
            participants.append(participant)

        training_session = {
            'training_code': training_code,
            'status': random.choice(['completed', 'ongoing', 'pending']),
            'trainer': trainer['username'],  # Using username instead of ObjectId
            'startDate': start_date,
            'endDate': end_date,
            'participants': participants,
        }
        training_sessions.append(training_session)

    print(f"{num_sessions} fake training sessions created.")
    return training_sessions

# Generate fake users and then training sessions
num_users = 30
users = generate_fake_users(num_users)

num_sessions = 10
training_sessions = generate_fake_training_sessions(num_sessions, users)

# Convert lists to DataFrames
users_df = pd.DataFrame(users)
training_sessions_df = pd.DataFrame(training_sessions)

# Save DataFrames to CSV files
users_df.to_csv('fake_users.csv', index=False)
training_sessions_df.to_csv('fake_training_sessions.csv', index=False)

print("Fake data has been saved to 'fake_users.csv' and 'fake_training_sessions.csv'.")
