In [5]:
import openai
import json
import random
from typing import List, Dict
import time
from datetime import datetime
from dotenv import load_dotenv
import os



In [24]:
def main():
    """
    Main function to run the job description generator
    """
    # Load environment variables
    load_dotenv()
    # Retrieve the API key from environment variables
    API_KEY = os.getenv("OpenAI_API_Key")
    if not API_KEY:
        raise ValueError("API key not found. Please set the 'OpenAI_API_Key' in your environment variables.")

In [28]:
    # Create an instance of the JobDescriptionGenerator
class JobDescriptionGenerator:
    def __init__(self, api_key: str):
        """
        Initialize the Job Description Generator with OpenAI API key

        Args:
            api_key (str): OpenAI API key
        """
    
        # Configure the OpenAI client for the new API
        self.client = openai.OpenAI(api_key=api_key)

        # Define job categories and roles
        self.job_categories = {
            "Software Engineering": [
                "Frontend Developer", "Backend Developer", "Full Stack Developer",
                "Senior Frontend Developer", "Senior Backend Developer", 
                "Lead Full Stack Developer", "Software Engineer", "Senior Software Engineer"
            ],
            "Data Science": [
                "Data Scientist", "Data Analyst", "Senior Data Scientist",
                "Database Administrator", "Business Intelligence Analyst",
                "Data Engineer", "Junior Data Analyst", "Principal Data Scientist"
            ],
            "DevOps": [
                "DevOps Engineer", "Site Reliability Engineer", "Cloud Specialist",
                "Senior DevOps Engineer", "Cloud Architect", "Infrastructure Engineer",
                "Platform Engineer", "Release Engineer"
            ],
            "Machine Learning": [
                "Machine Learning Engineer", "AI Researcher", "NLP Specialist",
                "Senior ML Engineer", "Computer Vision Engineer", "AI/ML Consultant",
                "Deep Learning Engineer", "MLOps Engineer"
            ],
            "Quality Assurance": [
                "QA Engineer", "Test Automation Specialist", "QA Manager",
                "Senior QA Engineer", "Performance Test Engineer", "QA Lead",
                "Manual Testing Specialist", "Test Architect"
            ],
            "Project Management": [
                "Project Manager", "Scrum Master", "Product Owner",
                "Senior Project Manager", "Technical Project Manager", "Agile Coach",
                "Program Manager", "Product Manager"
            ],
            "Business Analytics": [
                "Business Analyst", "BI Specialist", "Data Visualization Expert",
                "Senior Business Analyst", "Business Intelligence Developer",
                "Analytics Consultant", "Reporting Analyst", "Market Research Analyst"
            ]
        }
        
        # Company types and locations for variety
        self.company_types = [
            "Tech Startup", "Fortune 500 Company", "Consulting Firm", "Healthcare Organization",
            "Financial Services", "E-commerce Platform", "SaaS Company", "Manufacturing Company",
            "Government Agency", "Non-profit Organization", "Educational Institution", "Media Company"
        ]
        
        self.locations = [
            "San Francisco, CA", "New York, NY", "Seattle, WA", "Austin, TX", 
            "Boston, MA", "Chicago, IL", "Denver, CO", "Remote", "Atlanta, GA",
            "Los Angeles, CA", "Portland, OR", "Miami, FL", "Phoenix, AZ",
            "Dallas, TX", "Washington, DC", "San Diego, CA"
        ]
        
        self.experience_levels = ["Entry Level", "Mid Level", "Senior Level", "Lead/Principal"]


In [29]:
generator = JobDescriptionGenerator(API_KEY)

In [30]:
generator.job_categories.items() 

dict_items([('Software Engineering', ['Frontend Developer', 'Backend Developer', 'Full Stack Developer', 'Senior Frontend Developer', 'Senior Backend Developer', 'Lead Full Stack Developer', 'Software Engineer', 'Senior Software Engineer']), ('Data Science', ['Data Scientist', 'Data Analyst', 'Senior Data Scientist', 'Database Administrator', 'Business Intelligence Analyst', 'Data Engineer', 'Junior Data Analyst', 'Principal Data Scientist']), ('DevOps', ['DevOps Engineer', 'Site Reliability Engineer', 'Cloud Specialist', 'Senior DevOps Engineer', 'Cloud Architect', 'Infrastructure Engineer', 'Platform Engineer', 'Release Engineer']), ('Machine Learning', ['Machine Learning Engineer', 'AI Researcher', 'NLP Specialist', 'Senior ML Engineer', 'Computer Vision Engineer', 'AI/ML Consultant', 'Deep Learning Engineer', 'MLOps Engineer']), ('Quality Assurance', ['QA Engineer', 'Test Automation Specialist', 'QA Manager', 'Senior QA Engineer', 'Performance Test Engineer', 'QA Lead', 'Manual Tes

In [None]:
    # Create an instance of the JobDescriptionGenerator
class JobDescriptionGenerator:
    def __init__(self, api_key: str):
        """
        Initialize the Job Description Generator with OpenAI API key

        Args:
            api_key (str): OpenAI API key
        """
    
        # Configure the OpenAI client for the new API
        self.client = openai.OpenAI(api_key=api_key)

        # Define job categories and roles
        self.job_categories = {
            "Software Engineering": [
                "Frontend Developer", "Backend Developer", "Full Stack Developer",
                "Senior Frontend Developer", "Senior Backend Developer", 
                "Lead Full Stack Developer", "Software Engineer", "Senior Software Engineer"
            ],
            "Data Science": [
                "Data Scientist", "Data Analyst", "Senior Data Scientist",
                "Database Administrator", "Business Intelligence Analyst",
                "Data Engineer", "Junior Data Analyst", "Principal Data Scientist"
            ],
            "DevOps": [
                "DevOps Engineer", "Site Reliability Engineer", "Cloud Specialist",
                "Senior DevOps Engineer", "Cloud Architect", "Infrastructure Engineer",
                "Platform Engineer", "Release Engineer"
            ],
            "Machine Learning": [
                "Machine Learning Engineer", "AI Researcher", "NLP Specialist",
                "Senior ML Engineer", "Computer Vision Engineer", "AI/ML Consultant",
                "Deep Learning Engineer", "MLOps Engineer"
            ],
            "Quality Assurance": [
                "QA Engineer", "Test Automation Specialist", "QA Manager",
                "Senior QA Engineer", "Performance Test Engineer", "QA Lead",
                "Manual Testing Specialist", "Test Architect"
            ],
            "Project Management": [
                "Project Manager", "Scrum Master", "Product Owner",
                "Senior Project Manager", "Technical Project Manager", "Agile Coach",
                "Program Manager", "Product Manager"
            ],
            "Business Analytics": [
                "Business Analyst", "BI Specialist", "Data Visualization Expert",
                "Senior Business Analyst", "Business Intelligence Developer",
                "Analytics Consultant", "Reporting Analyst", "Market Research Analyst"
            ]
        }
        
        # Company types and locations for variety
        self.company_types = [
            "Tech Startup", "Fortune 500 Company", "Consulting Firm", "Healthcare Organization",
            "Financial Services", "E-commerce Platform", "SaaS Company", "Manufacturing Company",
            "Government Agency", "Non-profit Organization", "Educational Institution", "Media Company"
        ]
        
        self.locations = [
            "San Francisco, CA", "New York, NY", "Seattle, WA", "Austin, TX", 
            "Boston, MA", "Chicago, IL", "Denver, CO", "Remote", "Atlanta, GA",
            "Los Angeles, CA", "Portland, OR", "Miami, FL", "Phoenix, AZ",
            "Dallas, TX", "Washington, DC", "San Diego, CA"
        ]
        
        self.experience_levels = ["Entry Level", "Mid Level", "Senior Level", "Lead/Principal"]


    def generate_job_description_prompt(self, role: str, category: str, company_type: str, 
                                      location: str, experience_level: str) -> str:
        """
        Create a detailed prompt for generating job descriptions
        
        Args:
            role (str): Job role title
            category (str): Job category
            company_type (str): Type of company
            location (str): Job location
            experience_level (str): Experience level required
            
        Returns:
            str: Formatted prompt for Google Gemini API
        """
        prompt = f"""
        Generate a realistic and detailed job description for the following position:

        Job Title: {role}
        Category: {category}
        Company Type: {company_type}
        Location: {location}
        Experience Level: {experience_level}

        Please include the following sections in a professional format:

        1. **Job Title**: {role}
        2. **Company Overview**: Brief description of the company (2-3 sentences)
        3. **Job Summary**: Overview of the role and its importance (3-4 sentences)
        4. **Key Responsibilities**: 5-7 specific responsibilities
        5. **Required Qualifications**: 
            - Education requirements
            - Years of experience
            - Technical skills (be specific to the role)
            - Soft skills
        6. **Preferred Qualifications**: 2-3 nice-to-have skills or experiences
        7. **Benefits**: 3-4 attractive benefits
        8. **Location**: {location}
        9. **Employment Type**: Full-time
        10. **Salary Range**: Provide a realistic range based on the role and location

        Make the job description engaging, realistic, and tailored to the specific role and industry.
        Use professional language but keep it approachable.
        """
        return prompt

# Instantiate and test

generator = JobDescriptionGenerator(API_KEY)
result = generator.generate_job_description_prompt("FrontEnd Developer", "IT", "Startup", "Remote", "Mid-Level")
print(result)


        Generate a realistic and detailed job description for the following position:

        Job Title: FrontEnd Developer
        Category: IT
        Company Type: Startup
        Location: Remote
        Experience Level: Mid-Level

        Please include the following sections in a professional format:

        1. **Job Title**: FrontEnd Developer
        2. **Company Overview**: Brief description of the company (2-3 sentences)
        3. **Job Summary**: Overview of the role and its importance (3-4 sentences)
        4. **Key Responsibilities**: 5-7 specific responsibilities
        5. **Required Qualifications**: 
            - Education requirements
            - Years of experience
            - Technical skills (be specific to the role)
            - Soft skills
        6. **Preferred Qualifications**: 2-3 nice-to-have skills or experiences
        7. **Benefits**: 3-4 attractive benefits
        8. **Location**: Remote
        9. **Employment Type**: Full-time
        10. 

In [40]:
from typing import Optional

    # Create an instance of the JobDescriptionGenerator
class JobDescriptionGenerator:
    def __init__(self, api_key: str):
        """
        Initialize the Job Description Generator with OpenAI API key

        Args:
            api_key (str): OpenAI API key
        """
    
        # Configure the OpenAI client for the new API
        self.client = openai.OpenAI(api_key=api_key)

        # Define job categories and roles
        self.job_categories = {
            "Software Engineering": [
                "Frontend Developer", "Backend Developer", "Full Stack Developer",
                "Senior Frontend Developer", "Senior Backend Developer", 
                "Lead Full Stack Developer", "Software Engineer", "Senior Software Engineer"
            ],
            "Data Science": [
                "Data Scientist", "Data Analyst", "Senior Data Scientist",
                "Database Administrator", "Business Intelligence Analyst",
                "Data Engineer", "Junior Data Analyst", "Principal Data Scientist"
            ],
            "DevOps": [
                "DevOps Engineer", "Site Reliability Engineer", "Cloud Specialist",
                "Senior DevOps Engineer", "Cloud Architect", "Infrastructure Engineer",
                "Platform Engineer", "Release Engineer"
            ],
            "Machine Learning": [
                "Machine Learning Engineer", "AI Researcher", "NLP Specialist",
                "Senior ML Engineer", "Computer Vision Engineer", "AI/ML Consultant",
                "Deep Learning Engineer", "MLOps Engineer"
            ],
            "Quality Assurance": [
                "QA Engineer", "Test Automation Specialist", "QA Manager",
                "Senior QA Engineer", "Performance Test Engineer", "QA Lead",
                "Manual Testing Specialist", "Test Architect"
            ],
            "Project Management": [
                "Project Manager", "Scrum Master", "Product Owner",
                "Senior Project Manager", "Technical Project Manager", "Agile Coach",
                "Program Manager", "Product Manager"
            ],
            "Business Analytics": [
                "Business Analyst", "BI Specialist", "Data Visualization Expert",
                "Senior Business Analyst", "Business Intelligence Developer",
                "Analytics Consultant", "Reporting Analyst", "Market Research Analyst"
            ]
        }
        
        # Company types and locations for variety
        self.company_types = [
            "Tech Startup", "Fortune 500 Company", "Consulting Firm", "Healthcare Organization",
            "Financial Services", "E-commerce Platform", "SaaS Company", "Manufacturing Company",
            "Government Agency", "Non-profit Organization", "Educational Institution", "Media Company"
        ]
        
        self.locations = [
            "San Francisco, CA", "New York, NY", "Seattle, WA", "Austin, TX", 
            "Boston, MA", "Chicago, IL", "Denver, CO", "Remote", "Atlanta, GA",
            "Los Angeles, CA", "Portland, OR", "Miami, FL", "Phoenix, AZ",
            "Dallas, TX", "Washington, DC", "San Diego, CA"
        ]
        
        self.experience_levels = ["Entry Level", "Mid Level", "Senior Level", "Lead/Principal"]


    def generate_job_description_prompt(self, role: str, category: str, company_type: str, 
                                      location: str, experience_level: str) -> str:
        """
        Create a detailed prompt for generating job descriptions
        
        Args:
            role (str): Job role title
            category (str): Job category
            company_type (str): Type of company
            location (str): Job location
            experience_level (str): Experience level required
            
        Returns:
            str: Formatted prompt for Google Gemini API
        """
        prompt = f"""
        Generate a realistic and detailed job description for the following position:

        Job Title: {role}
        Category: {category}
        Company Type: {company_type}
        Location: {location}
        Experience Level: {experience_level}

        Please include the following sections in a professional format:

        1. **Job Title**: {role}
        2. **Company Overview**: Brief description of the company (2-3 sentences)
        3. **Job Summary**: Overview of the role and its importance (3-4 sentences)
        4. **Key Responsibilities**: 5-7 specific responsibilities
        5. **Required Qualifications**: 
            - Education requirements
            - Years of experience
            - Technical skills (be specific to the role)
            - Soft skills
        6. **Preferred Qualifications**: 2-3 nice-to-have skills or experiences
        7. **Benefits**: 3-4 attractive benefits
        8. **Location**: {location}
        9. **Employment Type**: Full-time
        10. **Salary Range**: Provide a realistic range based on the role and location

        Make the job description engaging, realistic, and tailored to the specific role and industry.
        Use professional language but keep it approachable.
        """
        return prompt
    
    def generate_single_jd(self, role: str, category: str, company_type: str = None, 
                          location: str = None, experience_level: str = None) -> Optional[Dict]:
        """
        Generate a single job description using the Google Gemini API
        
        Args:
            role (str): Job role
            category (str): Job category
            company_type (str, optional): Type of company
            location (str, optional): Job location
            experience_level (str, optional): Experience level required
            
        Returns:
            Dict: Generated job description data, or None if an error occurs
        """
        # Randomly select parameters if not provided
        company_type = company_type or random.choice(self.company_types)
        location = location or random.choice(self.locations)
        experience_level = experience_level or random.choice(self.experience_levels)
        
        # Validate inputs
        if category not in self.job_categories:
            print(f"Error: Invalid category. Choose from {list(self.job_categories.keys())}")
            return None
        if role not in self.job_categories[category]:
            print(f"Error: Invalid role for {category}. Choose from {self.job_categories[category]}")
            return None
        if company_type not in self.company_types:
            print(f"Error: Invalid company type. Choose from {self.company_types}")
            return None
        if location not in self.locations:
            print(f"Error: Invalid location. Choose from {self.locations}")
            return None
        if experience_level not in self.experience_levels:
            print(f"Error: Invalid experience level. Choose from {self.experience_levels}")
            return None

        # Generate the prompt
        prompt = self.generate_job_description_prompt(role, category, company_type, location, experience_level)
        
        try:
            # Call Google Gemini API
            response = self.client.chat.completions.create(
                model="gpt-3.5-turbo",
                messages=[
                    {"role": "user", "content": prompt}
                ]
            )

            job_description = response.choices[0].message.content.strip()

            # Create structured data
            jd_data = {
                "id": f"JD_{random.randint(1000, 9999)}_{int(time.time())}",
                "title": role,
                "category": category,
                "company_type": company_type,
                "location": location,
                "experience_level": experience_level,
                "full_description": job_description,
                "generated_at": datetime.now().isoformat(),
                "status": "active"
            }
            
            return jd_data
        
        except Exception as e:
            print(f"Error generating JD for {role}: {str(e)}")
            return None

    def generate_all_job_descriptions(self, num_descriptions: int) -> List[Dict]:
        """
        Generate multiple job descriptions by randomly selecting roles, categories, company types, locations, and experience levels
        
        Args:
            num_descriptions (int): Number of job descriptions to generate
            
        Returns:
            List[Dict]: List of generated job description dictionaries
        """
        job_descriptions = []
        for _ in range(num_descriptions):
            # Randomly select category and role
            category = random.choice(list(self.job_categories.keys()))
            role = random.choice(self.job_categories[category])
            
            # Generate job description
            jd = self.generate_single_jd(role, category)
            if jd:
                job_descriptions.append(jd)
        
        return job_descriptions
    
    def save_to_json(self, job_descriptions: List[Dict], filename: str = "job_descriptions.json"):
        """
        Save job descriptions to JSON file
        """
        with open(filename, 'w', encoding='utf-8') as f:
            json.dump(job_descriptions, f, indent=2, ensure_ascii=False)
        print(f"Job descriptions saved to {filename}")

    def generate_summary_report(self, job_descriptions: List[Dict]) -> Dict:
        """
        Generate a summary report of the generated job descriptions
        """
        if not job_descriptions:
            return {}
        
        category_counts = {}
        location_counts = {}
        experience_counts = {}
        company_type_counts = {}
        
        for jd in job_descriptions:
            category = jd.get("category")
            location = jd.get("location")
            experience_level = jd.get("experience_level")
            company_type = jd.get("company_type")
            
            category_counts[category] = category_counts.get(category, 0) + 1
            location_counts[location] = location_counts.get(location, 0) + 1
            experience_counts[experience_level] = experience_counts.get(experience_level, 0) + 1
            company_type_counts[company_type] = company_type_counts.get(company_type, 0) + 1
        
        return {
            "total_jobs": len(job_descriptions),
            "categories": category_counts,
            "locations": location_counts,
            "experience_levels": experience_counts,
            "company_types": company_type_counts
        }
    
    

# Example usage
API_KEY = os.getenv("OpenAI_API_Key")  # Load
generator = JobDescriptionGenerator(API_KEY)

# Generate a single job description
result = generator.generate_single_jd("Frontend Developer", "Software Engineering", "Tech Startup",  "Remote", "Mid Level")
if result:
    print("Single Job Description:")
    print(result)

# Generate multiple job descriptions
job_descriptions = generator.generate_all_job_descriptions(2)  # Generate 2 for testing
print("\nMultiple Job Descriptions:")
for i, jd in enumerate(job_descriptions, 1):
    print(f"\nJob Description {i}:")
    print(jd)  # Example usage to generate a job description



Single Job Description:
{'id': 'JD_2459_1753432376', 'title': 'Frontend Developer', 'category': 'Software Engineering', 'company_type': 'Tech Startup', 'location': 'Remote', 'experience_level': 'Mid Level', 'full_description': "**Job Title**: Frontend Developer\n\n**Company Overview**: Our tech startup is revolutionizing the way people interact with technology through innovative software solutions. We're a passionate team dedicated to pushing the boundaries of what's possible in the digital world.\n\n**Job Summary**: As a Frontend Developer, you will play a key role in shaping the user experience of our cutting-edge software products. Your expertise in HTML, CSS, and JavaScript will be crucial in bringing our designs to life and ensuring a seamless interface for our users.\n\n**Key Responsibilities**:\n1. Collaborate with designers and backend developers to implement user-friendly interfaces\n2. Write clean, scalable code to optimize web applications for maximum speed and scalability\n

In [46]:
def generate_all_job_descriptions(self, num_descriptions: int) -> List[Dict]:
    job_descriptions = []
    batch_size = 10
    for i in range(0, num_descriptions, batch_size):
        print(f"Processing batch {i//batch_size + 1}/{(num_descriptions-1)//batch_size + 1}")
        for j in range(min(batch_size, num_descriptions - i)):
            category = random.choice(list(self.job_categories.keys()))
            role = random.choice(self.job_categories[category])
            print(f"Generating job description {i+j+1}/{num_descriptions}: {role} ({category})")
            jd = self.generate_single_jd(role, category)
            if jd:
                job_descriptions.append(jd)
            time.sleep(1)
        time.sleep(10)  # Pause between batches
    print(f"Generated {len(job_descriptions)} job descriptions successfully!")
    return job_descriptions

In [47]:

def main():
    """
    Main function to run the job description generator
    """
    # Load environment variables
    load_dotenv()
    # Retrieve the API key from environment variables
    API_KEY = os.getenv("OpenAI_API_Key")
    if not API_KEY:
        raise ValueError("API key not found. Please set the 'OpenAI_API_Key' in your environment variables.")
# Create generator instance
    generator = JobDescriptionGenerator(API_KEY)
    
    # Generate job descriptions
    job_descriptions = generator.generate_all_job_descriptions(200)
    
    # Save to JSON file
    generator.save_to_json(job_descriptions, "job_descriptions_dataset.json")
    
    # Generate and save summary report
    summary = generator.generate_summary_report(job_descriptions)
    with open("generation_summary.json", 'w', encoding='utf-8') as f:
        json.dump(summary, f, indent=2, ensure_ascii=False)
    
    # Print summary
    print("\n" + "="*50)
    print("GENERATION SUMMARY")
    print("="*50)
    print(f"Total Job Descriptions Generated: {summary['total_jobs']}")
    print("\nBy Category:")
    for category, count in summary['categories'].items():
        print(f"  {category}: {count}")
    
    print(f"\nFiles generated:")
    print(f"  - job_descriptions_dataset.json (main dataset)")
    print(f"  - generation_summary.json (summary statistics)")

if __name__ == "__main__":
    main()

Job descriptions saved to job_descriptions_dataset.json

GENERATION SUMMARY
Total Job Descriptions Generated: 200

By Category:
  Data Science: 25
  Software Engineering: 27
  Project Management: 28
  Business Analytics: 38
  Machine Learning: 29
  Quality Assurance: 33
  DevOps: 20

Files generated:
  - job_descriptions_dataset.json (main dataset)
  - generation_summary.json (summary statistics)
