In [3]:
import numpy as np
import pandas as pd
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity
from typing import List, Dict, Any, Optional

ModuleNotFoundError: No module named 'numpy'

In [None]:
class JobSearchAI:
    def __init__(self, jobs: List[Dict[str, Any]]):
        """
        Initialize the JobSearchAI with a list of job postings
        
        :param jobs: List of job dictionaries
        """
        # Convert jobs to DataFrame for easier manipulation
        self.jobs_df = pd.DataFrame(jobs)
        
        # Preprocess job data
        self._preprocess_data()
        
        # Create TF-IDF vectorizer for semantic search
        self._create_vectorizer()
    
    def _preprocess_data(self):
        """
        Preprocess job data for advanced searching
        """
        # Combine relevant text fields for semantic search
        self.jobs_df['search_text'] = (
            self.jobs_df['title'] + ' ' + 
            self.jobs_df['description'] + ' ' + 
            self.jobs_df['company'] + ' ' + 
            self.jobs_df['skills'].apply(lambda x: ' '.join(x))
        )
        
        # One-hot encode job types
        self.jobs_df['is_full_time'] = (self.jobs_df['jobType'] == 'Full-Time').astype(int)
        self.jobs_df['is_part_time'] = (self.jobs_df['jobType'] == 'Part-Time').astype(int)
        self.jobs_df['is_contract'] = (self.jobs_df['jobType'] == 'Contract').astype(int)
    
    def _create_vectorizer(self):
        """
        Create TF-IDF vectorizer for semantic search
        """
        self.vectorizer = TfidfVectorizer(stop_words='english')
        self.tfidf_matrix = self.vectorizer.fit_transform(self.jobs_df['search_text'])
    
    def semantic_search(self, query: str, top_k: int = 5) -> List[Dict[str, Any]]:
        """
        Perform semantic search using TF-IDF and cosine similarity
        
        :param query: Search query string
        :param top_k: Number of top results to return
        :return: List of top matching job postings
        """
        # Vectorize the query
        query_vector = self.vectorizer.transform([query])
        
        # Calculate cosine similarity
        cosine_similarities = cosine_similarity(query_vector, self.tfidf_matrix).flatten()
        
        # Get top k indices
        top_indices = cosine_similarities.argsort()[-top_k:][::-1]
        
        # Return top matching jobs
        return self.jobs_df.iloc[top_indices].to_dict('records')
    
    def advanced_filter(
        self, 
        query: Optional[str] = None, 
        location: Optional[str] = None,
        min_experience: Optional[int] = None,
        job_types: Optional[List[str]] = None,
        skills: Optional[List[str]] = None
    ) -> List[Dict[str, Any]]:
        """
        Advanced filtering with multiple criteria
        
        :param query: Semantic search query
        :param location: Job location filter
        :param min_experience: Minimum years of experience
        :param job_types: List of job types to filter
        :param skills: List of skills to match
        :return: Filtered list of job postings
        """
        # Start with full dataset
        filtered_jobs = self.jobs_df.copy()
        
        # Semantic search if query provided
        if query:
            semantic_results = self.semantic_search(query, top_k=len(self.jobs_df))
            semantic_ids = [job['id'] for job in semantic_results]
            filtered_jobs = filtered_jobs[filtered_jobs['id'].isin(semantic_ids)]
        
        # Location filter
        if location:
            filtered_jobs = filtered_jobs[filtered_jobs['location'].str.contains(location, case=False, na=False)]
        
        # Experience filter
        if min_experience is not None:
            filtered_jobs = filtered_jobs[
                filtered_jobs['experienceRequired'].apply(
                    lambda x: int(x.split('+')[0]) <= min_experience
                )
            ]
        
        # Job type filter
        if job_types:
            filtered_jobs = filtered_jobs[filtered_jobs['jobType'].isin(job_types)]
        
        # Skills filter
        if skills:
            filtered_jobs = filtered_jobs[
                filtered_jobs['skills'].apply(
                    lambda x: any(skill.lower() in [s.lower() for s in x] for skill in skills)
                )
            ]
        
        return filtered_jobs.to_dict('records')
    
    def recommend_similar_jobs(self, job_id: int, top_k: int = 3) -> List[Dict[str, Any]]:
        """
        Recommend similar jobs based on semantic similarity
        
        :param job_id: ID of the reference job
        :param top_k: Number of similar jobs to recommend
        :return: List of recommended job postings
        """
        # Find the index of the reference job
        job_index = self.jobs_df[self.jobs_df['id'] == job_id].index[0]
        
        # Calculate cosine similarity between this job and all others
        job_vector = self.tfidf_matrix[job_index]
        cosine_similarities = cosine_similarity(job_vector, self.tfidf_matrix).flatten()
        
        # Remove the original job from consideration and get top similar jobs
        cosine_similarities[job_index] = -1  # Exclude the original job
        top_indices = cosine_similarities.argsort()[-top_k:][::-1]
        
        return self.jobs_df.iloc[top_indices].to_dict('records')

# Example usage
job_search_ai = JobSearchAI(jobs)