In [3]:
import os
import faiss
import numpy as np
from transformers import AutoTokenizer, AutoModel
from langchain_groq import ChatGroq  # Import Groq client for handling the chat completions


In [4]:
# Avoid OpenMP conflict
os.environ["KMP_DUPLICATE_LIB_OK"] = "TRUE"

class Joblander:
    def __init__(self):
        # Initialize the LLM using Groq
        self.llm = ChatGroq(
            temperature=0,
            groq_api_key=os.getenv("GROQ_API_KEY"),
            model_name="llama-3.1-70b-versatile"
        )

        # Initialize the HuggingFace embedding model
        self.tokenizer = AutoTokenizer.from_pretrained("sentence-transformers/all-MiniLM-L6-v2")
        self.model = AutoModel.from_pretrained("sentence-transformers/all-MiniLM-L6-v2")

        # Initialize FAISS index for similarity search
        self.dimension = 384  # Assuming the HuggingFace model has a 384-dimensional output
        self.index = faiss.IndexFlatL2(self.dimension)


In [2]:
import os

In [5]:
    def embed_text(self, text):
        """Embed text using HuggingFace transformers."""
        inputs = self.tokenizer(text, return_tensors="pt", padding=True, truncation=True)
        outputs = self.model(**inputs)
        embeddings = outputs.last_hidden_state.mean(dim=1).detach().numpy()
        return embeddings

In [6]:

    def read_knowledge_base(self, file_path):
        """Read and return lines from the knowledge base text file."""
        with open(file_path, 'r') as file:
            data = file.readlines()
        return data


In [7]:
    def initialize_knowledge_base(self, knowledge_base_file):
        """Initialize FAISS index with embeddings from the knowledge base."""
        knowledge_base = self.read_knowledge_base(knowledge_base_file)
        
        # Embed the knowledge base into a list of vectors
        knowledge_base_embeddings = [self.embed_text(text) for text in knowledge_base]
        
        # Convert the list of embeddings into a 2D numpy array
        knowledge_base_embeddings = np.vstack(knowledge_base_embeddings)
        
        # Add the embeddings to the FAISS index
        self.index.add(knowledge_base_embeddings)
        self.knowledge_base = knowledge_base

In [8]:

    def query_rag(self, input_text):
        """Perform a similarity search on the FAISS index to get relevant context."""
        input_embedding = self.embed_text(input_text)
        D, I = self.index.search(np.array(input_embedding), k=5)
        retrieved_texts = [self.knowledge_base[i] for i in I[0]]
        context = "\n".join(retrieved_texts)
        return context

In [9]:

    def generate_response(self, messages, model="llama3-8b-8192", temperature=0.5, max_tokens=1024):
        """Generate a response from the Groq LLM using streaming."""
        stream = self.llm(
            messages=messages,
            model=model,
            temperature=temperature,
            max_tokens=max_tokens,
            top_p=1,
            stop=None,
            stream=True
        )
        
        # Collect the generated output
        result = ""
        for chunk in stream:
            content = chunk.choices[0].delta.content
            if content:  # Avoid concatenating None
                result += content

        return result

In [10]:

    def optimize_resume(self, job_description, current_resume):
        """Optimize a resume based on the job description."""
        context = self.query_rag(job_description)
        
        # Properly structured messages for Groq
        messages = [
            {"role": "system", "content": "You are a resume optimization assistant."},
            {"role": "user", "content": f"Context: {context}\n\nOptimize this resume:\n{current_resume}\n\nJob Description: {job_description}\nOptimized Resume:"}
        ]
        
        return self.generate_response(messages)


In [13]:
inputs = self.tokenizer(text, return_tensors="pt", padding=True, truncation=True, clean_up_tokenization_spaces=True)


NameError: name 'self' is not defined

In [12]:
import os
import faiss
import numpy as np
from transformers import AutoTokenizer, AutoModel
from langchain_groq import ChatGroq  # Import Groq client for handling the chat completions
import glob

# Avoid OpenMP conflict
os.environ["KMP_DUPLICATE_LIB_OK"] = "TRUE"

class Joblander:
    def __init__(self, vector_store_path="/vector_stores"):
        # Initialize the LLM using Groq
        self.llm = ChatGroq(
            temperature=0,
            groq_api_key=os.getenv("GROQ_API_KEY"),
            model_name="llama-3.1-70b-versatile"
        )

        # Initialize the HuggingFace embedding model
        self.tokenizer = AutoTokenizer.from_pretrained("sentence-transformers/all-MiniLM-L6-v2")
        self.model = AutoModel.from_pretrained("sentence-transformers/all-MiniLM-L6-v2")

        # Initialize FAISS index for similarity search
        self.dimension = 384  # Assuming the HuggingFace model has a 384-dimensional output
        self.index = faiss.IndexFlatL2(self.dimension)
        
        # Store the vector store path
        self.vector_store_path = vector_store_path

    def embed_text(self, text):
        """Embed text using HuggingFace transformers."""
        inputs = self.tokenizer(text, return_tensors="pt", padding=True, truncation=True)
        outputs = self.model(**inputs)
        embeddings = outputs.last_hidden_state.mean(dim=1).detach().numpy()
        return embeddings

    def read_knowledge_base(self, file_path):
        """Read and return lines from the knowledge base text file."""
        with open(file_path, 'r') as file:
            data = file.readlines()
        return data

    def initialize_knowledge_base(self, knowledge_base_file):
        """Initialize FAISS index with embeddings from the knowledge base or load existing FAISS indexes."""
        # Check if vector stores exist in the specified directory
        vector_store_files = glob.glob(os.path.join(self.vector_store_path, "*.faiss"))
        
        if vector_store_files:
            # Load the existing vector store files into the FAISS index
            for store_file in vector_store_files:
                faiss_index = faiss.read_index(store_file)
                self.index.merge_from(faiss_index)
            print(f"Loaded existing vector stores from {self.vector_store_path}")
        else:
            # If no existing stores, initialize from the knowledge base
            knowledge_base = self.read_knowledge_base(knowledge_base_file)
            
            # Embed the knowledge base into a list of vectors
            knowledge_base_embeddings = [self.embed_text(text) for text in knowledge_base]
            
            # Convert the list of embeddings into a 2D numpy array
            knowledge_base_embeddings = np.vstack(knowledge_base_embeddings)
            
            # Add the embeddings to the FAISS index
            self.index.add(knowledge_base_embeddings)
            self.knowledge_base = knowledge_base
            
            # Save the new FAISS index to a file
            store_file_path = os.path.join(self.vector_store_path, "new_vector_store.faiss")
            faiss.write_index(self.index, store_file_path)
            print(f"New vector store saved to {store_file_path}")

    def query_rag(self, input_text):
        """Perform a similarity search on the FAISS index to get relevant context."""
        input_embedding = self.embed_text(input_text)
        D, I = self.index.search(np.array(input_embedding), k=5)
        retrieved_texts = [self.knowledge_base[i] for i in I[0]]
        context = "\n".join(retrieved_texts)
        return context

    def generate_response(self, messages, model="llama3-8b-8192", temperature=0.5, max_tokens=1024):
        """Generate a response from the Groq LLM using streaming."""
        stream = self.llm(
            messages=messages,
            model=model,
            temperature=temperature,
            max_tokens=max_tokens,
            top_p=1,
            stop=None,
            stream=True
        )
        
        # Collect the generated output
        result = ""
        for chunk in stream:
            content = chunk.choices[0].delta.content
            if content:  # Avoid concatenating None
                result += content

        return result

    def optimize_resume(self, job_description, current_resume):
        """Optimize a resume based on the job description."""
        context = self.query_rag(job_description)
        
        # Properly structured messages for Groq
        messages = [
            {"role": "system", "content": "You are a resume optimization assistant."},
            {"role": "user", "content": f"Context: {context}\n\nOptimize this resume:\n{current_resume}\n\nJob Description: {job_description}\nOptimized Resume:"}
        ]
        
        return self.generate_response(messages)

    def write_cover_letter(self, job_description):
        """Write a cover letter based on the job description."""
        context = self.query_rag(job_description)
        
        # Properly structured messages for Groq
        messages = [
            {"role": "system", "content": "You are a cover letter assistant."},
            {"role": "user", "content": f"Context: {context}\n\nWrite a cover letter for the following job description:\n{job_description}\n\nCover Letter:"}
        ]
        
        return self.generate_response(messages)

    def write_cold_email(self, job_description):
        """Write a cold email based on the job description."""
        context = self.query_rag(job_description)
        
        # Properly structured messages for Groq
        messages = [
            {"role": "system", "content": "You are a cold email assistant."},
            {"role": "user", "content": f"Context: {context}\n\nWrite a professional cold email for the following job description:\n{job_description}\n\nCold Email:"}
        ]
        
        return self.generate_response(messages)

    def chat_agent(self, query):
        """AI Assistant to guide the job seeker in career advice."""
        context = self.query_rag(query)
        
        # Properly structured messages for Groq
        messages = [
            {"role": "system", "content": "You are a career guidance assistant."},
            {"role": "user", "content": f"Context: {context}\n\nBased on the query '{query}', provide guidance to help the job seeker improve their chances of getting a job. Suggest courses, projects, and steps they should take."}
        ]
        
        return self.generate_response(messages)

# Example Usage
if __name__ == "__main__":
    joblander = Joblander()
    joblander.initialize_knowledge_base("result.txt")
    
    job_description = "Software Engineer position at XYZ Corp focusing on Python and machine learning."
    current_resume = "Experienced software engineer with skills in Python, C++, and cloud computing."
    
    optimized_resume = joblander.optimize_resume(job_description, current_resume)
    cover_letter = joblander.write_cover_letter(job_description)
    cold_email = joblander.write_cold_email(job_description)
    
    print("Optimized Resume:\n", optimized_resume)
    print("\nCover Letter:\n", cover_letter)
    print("\nCold Email:\n", cold_email)
    
    # AI Assistant Query Example
    query = "I want to become a machine learning engineer, what should I do?"
    assistant_response = joblander.chat_agent(query)
    print("\nAI Assistant Response:\n", assistant_response)




RuntimeError: Error in __cdecl faiss::FileIOWriter::FileIOWriter(const char *) at D:\a\faiss-wheels\faiss-wheels\faiss\faiss\impl\io.cpp:98: Error: 'f' failed: could not open /vector_stores\new_vector_store.faiss for writing: No such file or directory

NameError: name 'self' is not defined

## New

In [15]:
import os
import faiss
import numpy as np
from transformers import AutoTokenizer, AutoModel
from langchain_groq import ChatGroq  # Import Groq client for handling the chat completions
import glob

In [16]:
# Avoid OpenMP conflict
os.environ["KMP_DUPLICATE_LIB_OK"] = "TRUE"


In [17]:
import os
import faiss
import numpy as np
from transformers import AutoTokenizer, AutoModel
from langchain_groq import ChatGroq  # Import Groq client for handling the chat completions
import glob

# Avoid OpenMP conflict
os.environ["KMP_DUPLICATE_LIB_OK"] = "TRUE"

class Joblander:
    def __init__(self, vector_store_path="/user_1_git_faiss_store"):
        # Initialize the LLM using Groq
        self.llm = ChatGroq(
            temperature=0,
            groq_api_key=os.getenv("GROQ_API_KEY"),
            model_name="llama-3.1-70b-versatile"
        )

        # Initialize the HuggingFace embedding model
        self.tokenizer = AutoTokenizer.from_pretrained("sentence-transformers/all-MiniLM-L6-v2")
        self.model = AutoModel.from_pretrained("sentence-transformers/all-MiniLM-L6-v2")

        # Initialize FAISS index for similarity search
        self.dimension = 384  # Assuming the HuggingFace model has a 384-dimensional output
        self.index = faiss.IndexFlatL2(self.dimension)
        
        # Store the vector store path
        self.vector_store_path = vector_store_path

    def embed_text(self, text):
        """Embed text using HuggingFace transformers."""
        inputs = self.tokenizer(text, return_tensors="pt", padding=True, truncation=True, clean_up_tokenization_spaces=False)
        outputs = self.model(**inputs)
        embeddings = outputs.last_hidden_state.mean(dim=1).detach().numpy()
        return embeddings

    def read_knowledge_base(self, file_path):
        """Read and return lines from the knowledge base text file."""
        with open(file_path, 'r') as file:
            data = file.readlines()
        return data

    def initialize_knowledge_base(self, knowledge_base_file):
        """Initialize FAISS index with embeddings from the knowledge base or load existing FAISS indexes."""
        # Check if vector stores exist in the specified directory
        vector_store_files = glob.glob(os.path.join(self.vector_store_path, "*.faiss"))
        
        if vector_store_files:
            # Load the existing vector store files into the FAISS index
            for store_file in vector_store_files:
                faiss_index = faiss.read_index(store_file)
                self.index.merge_from(faiss_index)
            print(f"Loaded existing vector stores from {self.vector_store_path}")
        else:
            # If no existing stores, initialize from the knowledge base
            knowledge_base = self.read_knowledge_base(knowledge_base_file)
            
            # Embed the knowledge base into a list of vectors
            knowledge_base_embeddings = [self.embed_text(text) for text in knowledge_base]
            
            # Convert the list of embeddings into a 2D numpy array
            knowledge_base_embeddings = np.vstack(knowledge_base_embeddings)
            
            # Add the embeddings to the FAISS index
            self.index.add(knowledge_base_embeddings)
            self.knowledge_base = knowledge_base
            
            # Save the new FAISS index to a file
            store_file_path = os.path.join(self.vector_store_path, "new_vector_store.faiss")
            faiss.write_index(self.index, store_file_path)
            print(f"New vector store saved to {store_file_path}")

    def query_rag(self, input_text):
        """Perform a similarity search on the FAISS index to get relevant context."""
        input_embedding = self.embed_text(input_text)
        D, I = self.index.search(np.array(input_embedding), k=5)
        retrieved_texts = [self.knowledge_base[i] for i in I[0]]
        context = "\n".join(retrieved_texts)
        return context

    def generate_response(self, messages, model="llama3-8b-8192", temperature=0.5, max_tokens=1024):
        """Generate a response from the Groq LLM using streaming."""
        stream = self.llm(
            messages=messages,
            model=model,
            temperature=temperature,
            max_tokens=max_tokens,
            top_p=1,
            stop=None,
            stream=True
        )
        
        # Collect the generated output
        result = ""
        for chunk in stream:
            content = chunk.choices[0].delta.content
            if content:  # Avoid concatenating None
                result += content

        return result

    def optimize_resume(self, job_description, current_resume):
        """Optimize a resume based on the job description."""
        context = self.query_rag(job_description)
        
        # Properly structured messages for Groq
        messages = [
            {"role": "system", "content": "You are a resume optimization assistant."},
            {"role": "user", "content": f"Context: {context}\n\nOptimize this resume:\n{current_resume}\n\nJob Description: {job_description}\nOptimized Resume:"}
        ]
        
        return self.generate_response(messages)

    def write_cover_letter(self, job_description):
        """Write a cover letter based on the job description."""
        context = self.query_rag(job_description)
        
        # Properly structured messages for Groq
        messages = [
            {"role": "system", "content": "You are a cover letter assistant."},
            {"role": "user", "content": f"Context: {context}\n\nWrite a cover letter for the following job description:\n{job_description}\n\nCover Letter:"}
        ]
        
        return self.generate_response(messages)

    def write_cold_email(self, job_description):
        """Write a cold email based on the job description."""
        context = self.query_rag(job_description)
        
        # Properly structured messages for Groq
        messages = [
            {"role": "system", "content": "You are a cold email assistant."},
            {"role": "user", "content": f"Context: {context}\n\nWrite a professional cold email for the following job description:\n{job_description}\n\nCold Email:"}
        ]
        
        return self.generate_response(messages)

    def chat_agent(self, query):
        """AI Assistant to guide the job seeker in career advice."""
        context = self.query_rag(query)
        
        # Properly structured messages for
