In [1]:
# Import required libraries
import random
import pandas as pd
import os
import time


In [2]:
# Configure Together API (replace with your actual API key)
os.environ["TOGETHER_API_KEY"] = "10b266e44b08fd3472c7941728aee2a15624a31f69a887b45f929243154e44e1"
from together import Together

# Initialize Together client
client = Together()

# Candidate Names
names = [
    "Adam", "Olivia", "Priya", "Jack", "Zara", "Sophia", "Noah", "Mason", "Aiden", "Emily",
    "Lily", "James", "Ella", "Lucas", "Amelia", "Charlie", "Mia", "Benjamin", "Ava", "Ethan"
]

# Job Roles with Skills and Descriptions
roles_skills = {
    "Data Scientist": {
        "skills": [
            "Data analysis", "Machine learning algorithms", "Data wrangling",
            "Python/Pandas", "SQL querying", "Predictive modeling", "Big data processing",
            "Deep learning", "Statistical software", "Data storytelling"
        ]
    },
    "Software Engineer": {
        "skills": [
            "JavaScript", "React", "Node.js", "RESTful APIs", "Version control",
            "CI/CD pipelines", "System architecture", "Unit testing", "Microservices", "Cloud computing"
        ]
    },
    "Data Engineer": {
        "skills": [
            "ETL pipelines", "Data warehousing", "Big data technologies", "SQL optimization",
            "Data pipeline orchestration", "Python/Scala", "Real-time analytics",
            "Data storage solutions", "Cloud data tools", "Data integration"
        ]
    },
    "UI Designer": {
        "skills": [
            "UI/UX design", "Figma", "Wireframing", "User-centered design", "Responsive layouts",
            "Prototyping tools", "Interactive design", "User flows", "Color theory", "Design thinking"
        ]
    },
    "Data Analyst": {
        "skills": [
            "Data cleaning", "Excel/Google Sheets", "Data visualization", "SQL",
            "Statistical analysis", "Business intelligence tools", "A/B testing",
            "Data reporting", "Trend analysis", "Data-driven decision making"
        ]
    },
    "Product Manager": {
        "skills": [
            "Agile methodologies", "Market research", "Product strategy", "Roadmap planning",
            "Stakeholder management", "User experience design", "Feature prioritization",
            "Product lifecycle", "KPIs", "Cross-functional collaboration"
        ]
    }
}

# Experience Levels and Work Environments
experience_levels = ["Entry-level", "Mid-level", "Senior-level", "Lead", "Director"]
work_environments = ["Remote", "Hybrid", "In-office"]

# Randomized Result Generator
def generate_result():
    return random.choice(["selected", "rejected"])

# Generate Reason
def generate_reason(result, skills, role, years):
    reasons_selected = [
        f"The candidate has a solid background in {', '.join(skills[:3])}, and with {years} years of experience, they possess the key capabilities needed for the {role} role.",
        f"Proven expertise in {', '.join(skills[:2])} along with {years} years of hands-on experience in {role}-specific tasks makes this candidate an excellent fit for the position.",
        f"The candidate's {years} years of experience combined with proficiency in {', '.join(skills[:3])} positions them well to contribute effectively to the {role} role."
    ]

    reasons_rejected = [
        f"The candidate lacks sufficient expertise in crucial skills such as {', '.join(skills[:3])}, which are essential for the {role} role.",
        f"With only {years} years of experience and gaps in {', '.join(skills[:2])}, this candidate is not yet ready for the {role}.",
        f"Insufficient mastery of {', '.join(skills[:3])} made it challenging to consider this candidate for the {role} position."
    ]
    if result == "selected":
        return random.choice(reasons_selected)
    else:
        return random.choice(reasons_rejected)

# Generate Job Description
def generate_job_description(role, years):
    descriptions = [
        f"We are looking for a highly skilled {role} with a proven track record of at least {years} years in the industry to drive success in our team.",
        f"Join our team as a {role} and leverage your {years} years of experience to make an impact and contribute to innovative projects.",
        f"Seeking an experienced {role} with {years} years of hands-on expertise to lead and collaborate on challenging and exciting projects."
    ]
    return random.choice(descriptions)

# Generate Profile
def generate_profile(name, role, skills, result, experience, work_env, rating):
    profile = (
        f"Create a detailed resume for {name}, applying for the {role} position, highlighting their {result} status for the role. "
        f"Emphasize their {experience} years of relevant experience, the {rating} achieved in competitions, and include any other accomplishments or skills that are pertinent to the role. "
        f"Ensure the resume reflects their expertise in {', '.join(skills)}, and the ability to thrive in a {work_env} environment. "
        f"Do not include introductory text such as 'Here's a sample resume for {name} applying for a {role} position:.'"
    )
    response = client.chat.completions.create(
        model="meta-llama/Llama-Vision-Free",
        messages=[{"role": "user", "content": profile}]
    )
    return response.choices[0].message.content.strip()

# Generate Interview Transcript
def generate_transcript(name, role, result, skills, experience, work_env):
    prompt = (
        f"Create an interview dialogue between the interviewer and -{name}-, "
        f"applying for the {role} position with {experience} years of experience. "
        f"Include questions related to their expertise in {', '.join(skills)} and how they would adapt to a {work_env} work environment. "
        f"Ensure the candidate's performance is reflected as {result} throughout the conversation. "
        f"Do not provide any introductory or explanatory text about the interview or the data."
    )
    response = client.chat.completions.create(
        model="meta-llama/Llama-Vision-Free",
        messages=[{"role": "user", "content": prompt}]
    )
    return response.choices[0].message.content.strip()

# Generate Random Skills
def get_random_skills(role):
    skills = roles_skills[role]["skills"]
    num_skills = min(len(skills), 7)
    return random.sample(skills, k=num_skills)

# Generate Random Rating
def generate_rating():
    return round(random.uniform(3.0, 5.0), 1)


In [3]:
# Main automation cycle with a 0.1-second pause
data = []
for candidate_id in range(1, 202):
    # Select random details for each candidate
    name = random.choice(names)
    role = random.choice(list(roles_skills.keys()))
    skills = get_random_skills(role)
    result = generate_result()
    experience = random.choice(experience_levels)
    work_env = random.choice(work_environments)
    rating = generate_rating()
    years = random.randint(2, 8)

    # Generate profile, reason, and transcript with error handling
    try:
        print(f"Processing candidate {candidate_id} - {name}, {role}")  # Debugging print to see progress

        profile = generate_profile(name, role, skills, result, experience, work_env, rating)
        print(f"Generated profile for {name}")  # Check if profile was generated

        reason = generate_reason(result, skills, role, years)
        print(f"Generated reason for {name}")  # Check if reason was generated

        transcript = generate_transcript(name, role, result, skills, experience, work_env)
        print(f"Generated transcript for {name}")  # Check if transcript was generated

        job_description = generate_job_description(role, years)
        print(f"Generated job description for {role}")  # Check if job description was generated

        # Append data
        data.append({
            "ID": f"U_{candidate_id}",
            "Name": name,
            "Role": role,
            "Transcript": transcript,
            "Resume": profile,
            "Performance (select/reject)": result,
            "Reason for decision": reason,
            "Job Description": job_description,
        })

    except Exception as e:
        print(f"Error for candidate {candidate_id}: {e}")  # Debugging: print out any errors

    # Delay before next iteration
    time.sleep(0.1)  # Ensure this is at the same indentation level as the rest of the loop

    # Show progress every 10 iterations
    if candidate_id % 10 == 0:
        print(f"Processed {candidate_id} candidates.")


Processing candidate 1 - Charlie, Data Scientist
Generated profile for Charlie
Generated reason for Charlie
Generated transcript for Charlie
Generated job description for Data Scientist
Processing candidate 2 - Sophia, Software Engineer
Generated profile for Sophia
Generated reason for Sophia
Generated transcript for Sophia
Generated job description for Software Engineer
Processing candidate 3 - Zara, Product Manager
Generated profile for Zara
Generated reason for Zara
Generated transcript for Zara
Generated job description for Product Manager
Processing candidate 4 - Emily, Data Engineer
Generated profile for Emily
Generated reason for Emily
Generated transcript for Emily
Generated job description for Data Engineer
Processing candidate 5 - Mason, Data Scientist
Generated profile for Mason
Generated reason for Mason
Generated transcript for Mason
Generated job description for Data Scientist
Processing candidate 6 - Jack, Data Analyst
Generated profile for Jack
Generated reason for Jack

In [4]:
# Convert to DataFrame
df = pd.DataFrame


In [7]:
!pip install openpyxl


Collecting openpyxl
  Downloading openpyxl-3.1.5-py2.py3-none-any.whl.metadata (2.5 kB)
Collecting et-xmlfile (from openpyxl)
  Downloading et_xmlfile-2.0.0-py3-none-any.whl.metadata (2.7 kB)
Downloading openpyxl-3.1.5-py2.py3-none-any.whl (250 kB)
Downloading et_xmlfile-2.0.0-py3-none-any.whl (18 kB)
Installing collected packages: et-xmlfile, openpyxl
Successfully installed et-xmlfile-2.0.0 openpyxl-3.1.5


In [8]:
# Convert the collected data to a pandas DataFrame
df = pd.DataFrame(data)

# Save the DataFrame to an Excel file
excel_filename = "Generated_Candidate_Data.xlsx"  # Set the filename of the Excel file
df.to_excel(excel_filename, index=False)  # Save DataFrame to Excel, without the index column

print(f"Data successfully saved to {excel_filename}")


Data successfully saved to Generated_Candidate_Data.xlsx


In [9]:
from IPython.display import FileLink

# Create a download link for the Excel file
FileLink(r"Generated_Candidate_Data.xlsx")
