In [None]:
import pandas as pd
import numpy as np
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity
import joblib
import random
from datetime import datetime, timedelta

class AIRecruitmentPlatform:
    def __init__(self):
        # Initialize the TF-IDF vectorizer
        self.vectorizer = TfidfVectorizer(stop_words='english')

        # Create sample data
        self.create_sample_data()

    def create_sample_data(self):
        """Create sample data for jobs and candidates"""
        # Sample job postings
        self.jobs = pd.DataFrame({
            'job_id': range(1, 11),
            'title': [
                'Senior Software Engineer',
                'Data Scientist',
                'Frontend Developer',
                'DevOps Engineer',
                'Product Manager',
                'UX Designer',
                'Machine Learning Engineer',
                'Full Stack Developer',
                'IT Project Manager',
                'Cybersecurity Analyst'
            ],
            'description': [
                'Experienced software engineer with Java, Python, and cloud technologies. Microservices architecture knowledge required.',
                'Data scientist with expertise in machine learning, statistical analysis, and data visualization. Experience with Python, R, and SQL required.',
                'Frontend developer with React, Angular, or Vue.js experience. Strong CSS and JavaScript skills required.',
                'DevOps engineer with experience in AWS, Docker, Kubernetes, and CI/CD pipelines.',
                'Product manager with experience in agile methodologies, user research, and product lifecycle management.',
                'UX designer with experience in user research, wireframing, prototyping, and UI design.',
                'Machine learning engineer with experience in deep learning, NLP, and computer vision. Strong Python skills required.',
                'Full stack developer with experience in React, Node.js, and SQL/NoSQL databases.',
                'IT project manager with experience in agile methodologies, resource management, and stakeholder communication.',
                'Cybersecurity analyst with experience in security audits, vulnerability assessments, and incident response.'
            ],
            'required_skills': [
                'Java, Python, AWS, Microservices, Spring Boot',
                'Python, R, SQL, Machine Learning, Statistics',
                'HTML, CSS, JavaScript, React, Angular',
                'AWS, Docker, Kubernetes, Jenkins, Terraform',
                'Agile, User Stories, Product Roadmap, Data Analysis',
                'Figma, Sketch, User Testing, Wireframing',
                'Python, TensorFlow, PyTorch, NLP, CV',
                'JavaScript, React, Node.js, MongoDB, MySQL',
                'JIRA, MS Project, Agile, Scrum, Budgeting',
                'Security+, CISSP, Penetration Testing, Vulnerability Assessment'
            ],
            'experience_years': [5, 3, 3, 4, 5, 3, 4, 4, 6, 5],
            'salary_range': [
                '120,000 - 150,000',
                '100,000 - 130,000',
                '90,000 - 120,000',
                '110,000 - 140,000',
                '130,000 - 160,000',
                '90,000 - 120,000',
                '120,000 - 150,000',
                '100,000 - 130,000',
                '110,000 - 140,000',
                '100,000 - 130,000'
            ],
            'location': [
                'New York, NY',
                'San Francisco, CA',
                'Austin, TX',
                'Seattle, WA',
                'Boston, MA',
                'Chicago, IL',
                'San Francisco, CA',
                'Remote',
                'Denver, CO',
                'Washington, DC'
            ],
            'remote': [False, True, True, False, False, False, True, True, False, False],
            'company': [
                'TechCorp Inc.',
                'DataInsights LLC',
                'WebFrontier',
                'CloudOps Solutions',
                'ProductVision',
                'DesignMasters',
                'AI Innovations',
                'FullStackTech',
                'ProjectPro',
                'CyberShield'
            ]
        })

        # Sample candidates
        self.candidates = pd.DataFrame({
            'candidate_id': range(1, 21),
            'name': [
                'John Smith', 'Emma Johnson', 'Michael Brown', 'Sophia Garcia', 'William Davis',
                'Olivia Wilson', 'James Martinez', 'Charlotte Anderson', 'Benjamin Taylor', 'Amelia Thomas',
                'David Rodriguez', 'Mia Jackson', 'Joseph White', 'Isabella Harris', 'Daniel Martin',
                'Sophie Thompson', 'Matthew Clark', 'Ava Lewis', 'Andrew Lee', 'Ella Walker'
            ],
            'skills': [
                'Python, Java, AWS, Spring Boot, Microservices',
                'Python, R, SQL, Machine Learning, Statistics, Data Visualization',
                'HTML, CSS, JavaScript, React, Redux, TypeScript',
                'AWS, Docker, Kubernetes, Jenkins, GitLab CI',
                'Agile, Scrum, User Stories, Product Roadmap, SQL',
                'Figma, Sketch, User Testing, Wireframing, Adobe XD',
                'Python, TensorFlow, PyTorch, NLP, Deep Learning',
                'JavaScript, React, Node.js, Express, MongoDB',
                'JIRA, MS Project, Agile, Scrum, Budgeting, Risk Management',
                'Security+, CISSP, Penetration Testing, Vulnerability Assessment, Incident Response',
                'Java, Spring Boot, Kafka, Elasticsearch, Microservices, REST APIs',
                'Data Analysis, Python, Tableau, Power BI, SQL, Excel',
                'React, Angular, Vue.js, CSS, JavaScript, HTML5',
                'Docker, Kubernetes, Terraform, AWS, Azure, GCP',
                'Product Management, User Research, A/B Testing, Data Analysis, Roadmapping',
                'UX Research, Wireframing, Prototyping, UI Design, Accessibility',
                'Machine Learning, Computer Vision, TensorFlow, Keras, Python',
                'JavaScript, TypeScript, React, Node.js, GraphQL, MongoDB',
                'Agile, Scrum, Kanban, Stakeholder Management, Risk Assessment',
                'Network Security, Firewall Configuration, IDS/IPS, Penetration Testing'
            ],
            'experience': [
                'Senior Software Engineer at TechCorp (5 years), Software Engineer at CodeMinds (3 years)',
                'Data Scientist at Analytics Inc. (3 years), Statistical Analyst at DataWorks (2 years)',
                'Frontend Developer at WebSolutions (4 years), UI Developer at DesignHub (2 years)',
                'DevOps Engineer at CloudTech (3 years), Systems Administrator at NetOps (2 years)',
                'Product Manager at ProductVision (4 years), Business Analyst at BusinessSolutions (3 years)',
                'UX Designer at DesignMasters (3 years), UI Designer at CreativeMinds (2 years)',
                'Machine Learning Engineer at AI Innovations (3 years), Data Scientist at DataTech (2 years)',
                'Full Stack Developer at WebDev (4 years), Frontend Developer at AppBuilders (2 years)',
                'Project Manager at ProjectPro (5 years), Team Lead at LeadTech (3 years)',
                'Cybersecurity Analyst at SecureTech (4 years), Security Consultant at CyberDefense (2 years)',
                'Backend Developer at ServerSide (6 years), Java Developer at JavaTech (2 years)',
                'Data Analyst at DataCorp (4 years), Business Intelligence at InfoTech (2 years)',
                'Frontend Developer at WebFrontier (5 years), UI Developer at UserInterface (1 year)',
                'Cloud Engineer at CloudOps (3 years), DevOps at InfraTech (3 years)',
                'Product Manager at ProductCo (5 years), Product Owner at AgileTeam (2 years)',
                'UX Designer at UserX (4 years), Design Lead at DesignWorks (2 years)',
                'AI Engineer at DeepLearning (3 years), Research Scientist at AILab (2 years)',
                'Web Developer at WebTech (6 years), Software Engineer at CodeCraft (2 years)',
                'Project Manager at ProjectManage (4 years), Scrum Master at AgilePro (3 years)',
                'Security Engineer at SecureNet (5 years), Network Admin at NetworkOps (2 years)'
            ],
            'experience_years': [8, 5, 6, 5, 7, 5, 5, 6, 8, 6, 8, 6, 6, 6, 7, 6, 5, 8, 7, 7],
            'education': [
                'MS in Computer Science, BS in Computer Engineering',
                'MS in Data Science, BS in Statistics',
                'BS in Computer Science, Frontend Development Bootcamp',
                'BS in Information Technology, AWS Certified DevOps Engineer',
                'MBA, BS in Business Administration',
                'BFA in Design, UX Design Certification',
                'MS in Machine Learning, BS in Computer Science',
                'BS in Computer Science, Full Stack Web Development Bootcamp',
                'MBA, PMP Certification',
                'MS in Cybersecurity, BS in Computer Science, CISSP',
                'MS in Software Engineering, BS in Computer Science',
                'BS in Business Analytics, Data Science Certification',
                'BS in Computer Science, Frontend Masters Certification',
                'BS in Computer Engineering, Cloud Architecture Certification',
                'MBA, BS in Computer Science',
                'BFA in Design, MS in Human-Computer Interaction',
                'PhD in Computer Science, MS in Artificial Intelligence',
                'BS in Computer Science, MS in Web Technologies',
                'MBA, BS in Information Systems, PMP Certification',
                'MS in Information Security, BS in Computer Networks'
            ],
            'desired_salary': [
                '140,000', '115,000', '100,000', '125,000', '145,000',
                '105,000', '135,000', '110,000', '130,000', '115,000',
                '150,000', '95,000', '105,000', '120,000', '140,000',
                '100,000', '130,000', '115,000', '125,000', '110,000'
            ],
            'location_preference': [
                'New York, NY', 'San Francisco, CA', 'Austin, TX', 'Seattle, WA', 'Boston, MA',
                'Chicago, IL', 'San Francisco, CA', 'Remote', 'Denver, CO', 'Washington, DC',
                'New York, NY', 'Remote', 'Austin, TX', 'Seattle, WA', 'Boston, MA',
                'Chicago, IL', 'San Francisco, CA', 'Remote', 'Denver, CO', 'Washington, DC'
            ],
            'remote_preference': [
                True, True, True, False, False,
                False, True, True, False, False,
                False, True, True, False, False,
                True, True, True, False, False
            ],
            'last_updated': [
                (datetime.now() - timedelta(days=random.randint(1, 30))).strftime('%Y-%m-%d')
                for _ in range(20)
            ]
        })

        # Extract the full text for each job (for vectorization)
        self.jobs['full_text'] = self.jobs.apply(
            lambda x: f"{x['title']} {x['description']} {x['required_skills']} {x['location']} {x['company']}",
            axis=1
        )

        # Extract the full text for each candidate (for vectorization)
        self.candidates['full_text'] = self.candidates.apply(
            lambda x: f"{x['skills']} {x['experience']} {x['education']} {x['location_preference']}",
            axis=1
        )

    def train_model(self):
        """Train the model by vectorizing job and candidate data"""
        # Combine job and candidate texts for fitting the vectorizer
        all_texts = list(self.jobs['full_text']) + list(self.candidates['full_text'])

        # Fit the vectorizer
        self.vectorizer.fit(all_texts)

        # Transform job and candidate texts
        self.job_vectors = self.vectorizer.transform(self.jobs['full_text'])
        self.candidate_vectors = self.vectorizer.transform(self.candidates['full_text'])

        # Save the model
        joblib.dump(self.vectorizer, 'recruitment_vectorizer.pkl')

        return "Model training completed successfully."

    def match_candidates_to_job(self, job_id, top_n=5):
        """Match candidates to a specific job"""
        # Find the job by ID
        job_index = self.jobs.index[self.jobs['job_id'] == job_id].tolist()[0]

        # Get the job vector
        job_vector = self.job_vectors[job_index]

        # Calculate similarity scores
        similarity_scores = cosine_similarity(job_vector, self.candidate_vectors).flatten()

        # Create a DataFrame with candidate IDs and their scores
        match_df = pd.DataFrame({
            'candidate_id': self.candidates['candidate_id'],
            'name': self.candidates['name'],
            'similarity_score': similarity_scores,
            'skills': self.candidates['skills'],
            'experience_years': self.candidates['experience_years'],
            'location_preference': self.candidates['location_preference'],
            'remote_preference': self.candidates['remote_preference']
        })

        # Apply additional filters
        job_row = self.jobs.iloc[job_index]

        # Filter by experience years (must be >= job requirement)
        match_df = match_df[match_df['experience_years'] >= job_row['experience_years']]

        # Filter by location match or remote preference
        match_df['location_match'] = (match_df['location_preference'] == job_row['location']) | \
                                    (match_df['remote_preference'] & job_row['remote'])

        # Sort by similarity score and filter top N
        match_df = match_df.sort_values(by='similarity_score', ascending=False).head(top_n)

        return match_df[['candidate_id', 'name', 'similarity_score', 'skills', 'experience_years', 'location_match']]

    def match_jobs_to_candidate(self, candidate_id, top_n=5):
        """Match jobs to a specific candidate"""
        # Find the candidate by ID
        candidate_index = self.candidates.index[self.candidates['candidate_id'] == candidate_id].tolist()[0]

        # Get the candidate vector
        candidate_vector = self.candidate_vectors[candidate_index]

        # Calculate similarity scores
        similarity_scores = cosine_similarity(candidate_vector, self.job_vectors).flatten()

        # Create a DataFrame with job IDs and their scores
        match_df = pd.DataFrame({
            'job_id': self.jobs['job_id'],
            'title': self.jobs['title'],
            'company': self.jobs['company'],
            'similarity_score': similarity_scores,
            'required_skills': self.jobs['required_skills'],
            'experience_years': self.jobs['experience_years'],
            'location': self.jobs['location'],
            'remote': self.jobs['remote'],
            'salary_range': self.jobs['salary_range']
        })

        # Apply additional filters
        candidate_row = self.candidates.iloc[candidate_index]

        # Filter by experience years (candidate must have >= job requirement)
        match_df = match_df[match_df['experience_years'] <= candidate_row['experience_years']]

        # Filter by location match or remote preference
        match_df['location_match'] = (match_df['location'] == candidate_row['location_preference']) | \
                                    (match_df['remote'] & candidate_row['remote_preference'])

        # Sort by similarity score and filter top N
        match_df = match_df.sort_values(by='similarity_score', ascending=False).head(top_n)

        return match_df[['job_id', 'title', 'company', 'similarity_score', 'required_skills', 'location', 'location_match', 'salary_range']]

    def generate_job_recommendations(self, num_recommendations=10):
        """Generate job recommendations for all candidates"""
        recommendations = {}

        for candidate_id in self.candidates['candidate_id']:
            matches = self.match_jobs_to_candidate(candidate_id, top_n=3)
            candidate_name = self.candidates.loc[self.candidates['candidate_id'] == candidate_id, 'name'].iloc[0]
            recommendations[candidate_name] = matches

        return recommendations

    def generate_candidate_recommendations(self, num_recommendations=10):
        """Generate candidate recommendations for all jobs"""
        recommendations = {}

        for job_id in self.jobs['job_id']:
            matches = self.match_candidates_to_job(job_id, top_n=3)
            job_title = self.jobs.loc[self.jobs['job_id'] == job_id, 'title'].iloc[0]
            company = self.jobs.loc[self.jobs['job_id'] == job_id, 'company'].iloc[0]
            recommendations[f"{job_title} at {company}"] = matches

        return recommendations

    def search_jobs(self, keywords, location=None, remote=None, min_experience=None):
        """Search for jobs based on keywords and filters"""
        # Create a vector for the search query
        search_vector = self.vectorizer.transform([keywords])

        # Calculate similarity scores
        similarity_scores = cosine_similarity(search_vector, self.job_vectors).flatten()

        # Create a DataFrame with job IDs and their scores
        search_df = pd.DataFrame({
            'job_id': self.jobs['job_id'],
            'title': self.jobs['title'],
            'company': self.jobs['company'],
            'similarity_score': similarity_scores,
            'required_skills': self.jobs['required_skills'],
            'experience_years': self.jobs['experience_years'],
            'location': self.jobs['location'],
            'remote': self.jobs['remote'],
            'salary_range': self.jobs['salary_range']
        })

        # Apply filters
        if location:
            search_df = search_df[search_df['location'] == location]

        if remote is not None:
            search_df = search_df[search_df['remote'] == remote]

        if min_experience is not None:
            search_df = search_df[search_df['experience_years'] >= min_experience]

        # Sort by similarity score
        search_df = search_df.sort_values(by='similarity_score', ascending=False)

        return search_df[['job_id', 'title', 'company', 'similarity_score', 'required_skills', 'location', 'remote', 'salary_range']]

    def search_candidates(self, keywords, location=None, remote=None, min_experience=None):
        """Search for candidates based on keywords and filters"""
        # Create a vector for the search query
        search_vector = self.vectorizer.transform([keywords])

        # Calculate similarity scores
        similarity_scores = cosine_similarity(search_vector, self.candidate_vectors).flatten()

        # Create a DataFrame with candidate IDs and their scores
        search_df = pd.DataFrame({
            'candidate_id': self.candidates['candidate_id'],
            'name': self.candidates['name'],
            'similarity_score': similarity_scores,
            'skills': self.candidates['skills'],
            'experience_years': self.candidates['experience_years'],
            'location_preference': self.candidates['location_preference'],
            'remote_preference': self.candidates['remote_preference'],
            'desired_salary': self.candidates['desired_salary']
        })

        # Apply filters
        if location:
            search_df = search_df[search_df['location_preference'] == location]

        if remote is not None:
            search_df = search_df[search_df['remote_preference'] == remote]

        if min_experience is not None:
            search_df = search_df[search_df['experience_years'] >= min_experience]

        # Sort by similarity score
        search_df = search_df.sort_values(by='similarity_score', ascending=False)

        return search_df[['candidate_id', 'name', 'similarity_score', 'skills', 'experience_years', 'location_preference', 'remote_preference', 'desired_salary']]

# Example usage
if __name__ == "__main__":
    print("AI Recruitment Platform Demo")
    print("=" * 50)

    # Initialize the platform
    platform = AIRecruitmentPlatform()

    # Train the model
    print("\nTraining the model...")
    platform.train_model()
    print("Model training completed.")

    # Example 1: Find candidates for a specific job
    job_id = 1  # Senior Software Engineer
    print(f"\nTop candidates for Job ID {job_id}:")
    candidates = platform.match_candidates_to_job(job_id)
    print(candidates)

    # Example 2: Find jobs for a specific candidate
    candidate_id = 1  # John Smith
    print(f"\nTop job matches for Candidate ID {candidate_id}:")
    jobs = platform.match_jobs_to_candidate(candidate_id)
    print(jobs)

    # Example 3: Search for jobs
    print("\nSearch results for 'Python Machine Learning':")
    job_search = platform.search_jobs("Python Machine Learning")
    print(job_search.head(3))

    # Example 4: Search for candidates
    print("\nSearch results for 'DevOps AWS':")
    candidate_search = platform.search_candidates("DevOps AWS")
    print(candidate_search.head(3))

    # Example 5: Generate job recommendations for all candidates
    print("\nGenerating job recommendations for candidates...")
    job_recommendations = platform.generate_job_recommendations()

    # Display a sample recommendation
    sample_candidate = list(job_recommendations.keys())[0]
    print(f"\nJob recommendations for {sample_candidate}:")
    print(job_recommendations[sample_candidate])

    print("\nDemo completed!")

AI Recruitment Platform Demo

Training the model...
Model training completed.

Top candidates for Job ID 1:
    candidate_id             name  similarity_score  \
0              1       John Smith          0.713148   
10            11  David Rodriguez          0.439634   
13            14  Isabella Harris          0.199701   
17            18        Ava Lewis          0.150128   
3              4    Sophia Garcia          0.112588   

                                               skills  experience_years  \
0       Python, Java, AWS, Spring Boot, Microservices                 8   
10  Java, Spring Boot, Kafka, Elasticsearch, Micro...                 8   
13     Docker, Kubernetes, Terraform, AWS, Azure, GCP                 6   
17  JavaScript, TypeScript, React, Node.js, GraphQ...                 8   
3         AWS, Docker, Kubernetes, Jenkins, GitLab CI                 5   

    location_match  
0             True  
10            True  
13           False  
17           False  
3    