Employee Fake Data

In [16]:
import csv
from faker import Faker
import random
import os  # Import os to handle directory operations

fake = Faker()

# Define possible roles
roles = ['Software Engineer', 'Senior Software Engineer', 'Solution Enabler', 'Solutions Consultant']

# Function to generate fake employee data
def generate_fake_employee(emp_id):
    return {
        'id': f'JMD{emp_id}',  # Unique employee ID
        'employeeEmail': fake.unique.email(),  # Unique email for the employee
        'employeeName': fake.name(),  # Random employee name
        'role': random.choice(roles),  # Randomly assign one of the predefined roles
        'gender': fake.random_element(elements=('Male', 'Female')),  # Randomly assign gender
        'password': fake.password()  # Generate a fake password
    }

# Create the STAGGING directory if it doesn't exist
os.makedirs('STAGGING', exist_ok=True)

# Open CSV file for writing in the STAGGING folder
with open(os.path.join('STAGGING', 'employees.csv'), mode='w', newline='') as file:
    # Define the CSV headers
    writer = csv.DictWriter(file, fieldnames=['id', 'employeeEmail', 'employeeName', 'role', 'gender', 'password'])
    writer.writeheader()  # Write the header row

    # Generate and write data for 500 employees
    for emp_id in range(1, 501):
        writer.writerow(generate_fake_employee(emp_id))

print("Data saved to STAGGING/employees.csv")


Data saved to STAGGING/employees.csv


Skill set Fake data

In [15]:
import csv
import random
from faker import Faker
import os  # Import os to handle directory operations

fake = Faker()

# Define departments and their respective skill sets
departments = {
    'Development': ['Reactjs', 'Node', 'Next Js', 'Laravel', 'Angular', 'Flutter', 'React Native'],
    'Data Science': ['Python', 'Applied ML', 'Big Data'],
    'Data Engineering': ['Python', 'MySQL', 'Web Scraping', 'DBT', 'SnowFlake', 'Data Bricks'],
    'Cloud': ['AWS', 'Azure', 'GCP', 'Redis']
}

# Function to assign skills based on department
def assign_skills(department):
    return random.sample(departments[department], random.randint(1, 3))  # Select 1 to 3 random skills

# Function to generate fake skillset data for an employee
def generate_fake_skillset(employee_id, skillset_start_id):
    skillsets = []
    assigned_departments = random.sample(list(departments.keys()), random.randint(1, 2))  # 1 or 2 departments
    
    for department in assigned_departments:
        skillSet = assign_skills(department)
        skillsets.append({
            'id': f'SK{skillset_start_id}',  # Unique id for skillset
            'employeeId': f'JMD{employee_id}',  # Foreign key to Employee
            'skillSet': skillSet,  # Skills from the department
            'department': department  # Department info
        })
        skillset_start_id += 1  # Increment the skillset ID for the next skillset
    
    return skillsets, skillset_start_id

# Function to generate fake certification data based on assigned skills
def generate_fake_certification(employee_id, skills, department, cert_start_id):
    certifications = []
    for skill in skills:  # Use each skill as a course name
        certifications.append({
            'id': f'CERT{cert_start_id}',  # Unique certification id starting from CERT1
            'employeeId': f'JMD{employee_id}',  # Foreign key to Employee
            'courseName': skill,  # Course name same as the skill
            'certificationLink': fake.url(),  # Fake URL for certification
            'skills': skill,  # Skill related to the certification
            'courseDepartment': department,  # Department associated with the skill
            'status': random.choice(['Accept', 'Reject']),  # Random status
            'certificationDate': fake.date_between(start_date='-2y', end_date='today')  # Date within the last 2 years
        })
        cert_start_id += 1  # Increment the certification ID for the next certification
    return certifications, cert_start_id

# Create the STAGGING directory if it doesn't exist
os.makedirs('STAGGING', exist_ok=True)

# Open CSV files for writing in the STAGGING folder
with open(os.path.join('STAGGING', 'skillsets.csv'), mode='w', newline='') as skill_file, \
     open(os.path.join('STAGGING', 'certifications.csv'), mode='w', newline='') as cert_file:
    
    # Skillsets CSV
    skill_fieldnames = ['id', 'employeeId', 'skillSet', 'department']
    skill_writer = csv.DictWriter(skill_file, fieldnames=skill_fieldnames)
    skill_writer.writeheader()  # Write CSV headers for skillsets

    # Certifications CSV
    cert_fieldnames = ['id', 'employeeId', 'courseName', 'certificationLink', 'skills', 
                       'courseDepartment', 'status', 'certificationDate']
    cert_writer = csv.DictWriter(cert_file, fieldnames=cert_fieldnames)
    cert_writer.writeheader()  # Write CSV headers for certifications

    # Initialize the certification ID counter and skillset ID counter
    certification_id_counter = 1
    skillset_id_counter = 1  # Start from SK1

    # Generate and write data for 500 employees
    for employee_id in range(1, 501):
        # Generate skillsets for the employee
        skillsets, skillset_id_counter = generate_fake_skillset(employee_id, skillset_id_counter)
        
        # Write skillsets to CSV
        for skillset in skillsets:
            skillset['skillSet'] = ', '.join(skillset['skillSet'])  # Convert list to string
            skill_writer.writerow(skillset)

            # Generate certifications based on the skills assigned to this department
            certifications, certification_id_counter = generate_fake_certification(
                employee_id,
                skillset['skillSet'].split(', '),  # Skills as a list
                skillset['department'],  # Department for the certification
                certification_id_counter  # Current certification ID counter
            )

            # Write certifications to CSV
            for certification in certifications:
                cert_writer.writerow(certification)

print("Data saved to STAGGING/skillsets.csv and STAGGING/certifications.csv")


Data saved to STAGGING/skillsets.csv and STAGGING/certifications.csv


Skill Scorce Data

In [17]:
import csv
import random
from faker import Faker
import os  # Import os to handle directory operations

fake = Faker()

# Define departments and their respective skill sets
departments = {
    'Development': ['Reactjs', 'Node', 'Next Js', 'Laravel', 'Angular', 'Flutter', 'React Native'],
    'Data Science': ['Python', 'Applied ML', 'Big Data'],
    'Data Engineering': ['Python', 'MySQL', 'Web Scraping', 'DBT', 'SnowFlake', 'Data Bricks'],
    'Cloud': ['AWS', 'Azure', 'GCP', 'Redis']
}

# Function to assign skills based on department
def assign_skills(department):
    return random.sample(departments[department], random.randint(1, 3))  # Select 1 to 3 random skills

# Function to generate fake skill score data
def generate_fake_skill_scores(employee_id, skillset_start_id):
    skill_scores = []
    
    # Generate data for 5 random assessments
    for assessment_id in range(1, 6):  # Adjust number of assessments as needed
        # Get a random department
        department = random.choice(list(departments.keys()))
        # Get skills from the selected department
        skills = assign_skills(department)
        
        for skill in skills:
            # Decide the status and score based on it
            status = random.choice(['Accept', 'Reject'])
            if status == 'Accept':
                test_score = random.randint(71, 100)  # Score must be > 70 for acceptance
            else:
                test_score = random.randint(0, 70)  # Score can be <= 70 for rejection
            
            skill_scores.append({
                'id': f'SKS{skillset_start_id}',  # Unique ID for skill score
                'employeeId': f'JMD{employee_id}',  # Foreign key to Employee
                'assessmentId': f'ASSESS{assessment_id}',  # Foreign key to Assessment
                'courseName': skill,  # Course name same as the skill
                'skill': skill,  # Skill related to the assessment
                'courseDepartment': department,  # Department associated with the skill
                'testScore': test_score,  # Generated test score
                'status': status,  # Random status
                'noOfAttempts': random.randint(1, 3)  # Random number of attempts
            })
            skillset_start_id += 1  # Increment the skillset ID for the next skill score
    
    return skill_scores, skillset_start_id

# Create the STAGGING directory if it doesn't exist
os.makedirs('STAGGING', exist_ok=True)

# Open CSV files for writing in the STAGGING folder
file_path = os.path.join('STAGGING', 'skill_scores.csv')
with open(file_path, mode='w', newline='') as score_file:
    # SkillScores CSV
    score_fieldnames = ['id', 'employeeId', 'assessmentId', 'courseName', 'skill', 
                        'courseDepartment', 'testScore', 'status', 'noOfAttempts']
    score_writer = csv.DictWriter(score_file, fieldnames=score_fieldnames)
    score_writer.writeheader()  # Write CSV headers for skill scores

    # Initialize the skill score ID counter
    skill_score_id_counter = 1  # Start from SKS1

    # Generate and write data for 500 employees
    for employee_id in range(1, 501):
        # Generate skill scores for the employee
        skill_scores, skill_score_id_counter = generate_fake_skill_scores(employee_id, skill_score_id_counter)
        
        # Write skill scores to CSV
        for skill_score in skill_scores:
            score_writer.writerow(skill_score)

print("Data saved to STAGGING/skill_scores.csv")


Data saved to STAGGING/skill_scores.csv
