In [1]:
import json
import os
from typing import List
import networkx as nx
import nltk
import pandas as pd
import plotly.express as px
import plotly.graph_objects as go
import streamlit as st
from annotated_text import annotated_text, parameters
from streamlit_extras import add_vertical_space as avs
from streamlit_extras.badges import badge
from scripts.similarity import get_similarity_score, find_path, read_config
from scripts.utils import get_filenames_from_dir
from scripts import ReadPdf, JobDescriptionProcessor, ResumeProcessor, KeytermsExtraction
import cohere
from scripts.KeytermsExtraction import KeytermExtractor
from scripts.similarity.get_similarity_score import get_similarity_score
import uuid
from langchain.embeddings import HuggingFaceEmbeddings

In [94]:
from scripts.parsers.ParseJobDescToJson import ParseJobDesc


# class HuggingFaceEmbeddings:
#     def __init__(self, model_name="all-MiniLM-L6-v2"):
#         self.model = SentenceTransformer(model_name)

#     def embed_documents(self, text):
#         return self.model.encode(text, convert_to_tensor=True)

class JobDescriptionHandler:
    def __init__(self, job_desc_directory="Data/JobDescription/"):
        self.job_desc_directory = job_desc_directory

    def read_pdf(self, file_path):
        # Assuming ReadPdf.read_single_pdf is a function that reads PDF and returns text
        return ReadPdf.read_single_pdf(file_path)

    def process_job_description(self, input_data):
        # Process input data (PDF file path or text) and return processed job description
        if input_data.endswith('.pdf'):
            # Handle as a PDF file
            job_desc_processor = JobDescriptionProcessor(input_data)
        else:
            # Handle as text input
            job_desc_text = input_data
            job_desc_processor = ParseJobDesc(job_desc_text).get_JSON()
            
        job_desc_processed = job_desc_processor._read_job_desc()
        return job_desc_processed
    def job_description_keyword(self,job_desc_text):
        return job_desc_text['bi_grams']

    def embed_job_description(self, job_desc_text):
        embeddings = HuggingFaceEmbeddings()
        job_desc_embedding = embeddings.embed_documents([job_desc_text['bi_grams']])
        return job_desc_embedding
    def process_all_job_descriptions(self):
        # Process all job descriptions in the directory
        job_files = [f for f in os.listdir(self.job_desc_directory)
                     if os.path.isfile(os.path.join(self.job_desc_directory, f))]

        job_processed = []
        for job_file in job_files:
            job_processor = JobDescriptionProcessor(job_file)
            job_data = job_processor._read_job_desc()
            job_processor._write_json_file(job_data)
            job_processed.append(job_data)
        return job_processed

In [95]:
handler = JobDescriptionHandler()
processed_job_desc = handler.process_job_description(
    "job_desc_front_end_engineer.pdf")


In [91]:
# processed_job_desc['bi_grams']

'[Job Description, End Engineer, Tech Solutions, Solutions San, San Francisco, Tech Solutions, build products, solve complex, complex problems, improve people, End Engineer, dynamic team, San Francisco, Job Description, End Engineer, developing scalable, userfriendly web, web applications, successful candidate, modern JavaScript, JavaScript frameworks, libraries HTML, HTML CSS, responsive design, design principles, contribute significantly, user interfaces, web applications, •Develop new, new userfacing, userfacing features, modern JavaScript, JavaScript frameworks, frameworks like, like Reactjs, Reactjs Vuejs, •Build reusable, reusable code, future use, technical feasibility, UX designs, •Optimize application, maximum speed, user input, backend services, team members, •2 years, End Developer, similar role, web markup, markup including, including HTML5, HTML5 CSS3, modern JavaScript, JavaScript programming, libraries like, like jQuery, modern frontend, frontend build, build pipelines, 

In [96]:
embedding = handler.embed_job_description(processed_job_desc)

Batches: 100%|██████████| 1/1 [00:00<00:00, 10.02it/s]


In [98]:
from scripts.parsers.ParseResumeToJson import ParseResume


class ResumeHandler:
    def __init__(self, resumes_directory="Data/Resumes/"):
        self.resumes_directory = resumes_directory

    def read_pdf(self, file_path):
        # Assuming ReadPdf.read_single_pdf is a function that reads PDF and returns text
        return ReadPdf.read_single_pdf(file_path)

    def process_resume(self, input_data):
        # Process input data (PDF file path or text) and return processed resume
        if input_data.endswith('.pdf'):
            # Handle as a PDF file
            resume_processor = ResumeProcessor(input_data)
            print("into this if loop")
        else:
            # Handle as text input
            resume_text = input_data
            resume_processor = ParseResume(resume_text).get_JSON()

        # Process the resume text
        #resume_processor = ResumeProcessor(resume_text)
        resume_data = resume_processor._read_resumes()
        return resume_data
    def resume_keyword(self,resume_text):
        return resume_text['bi_grams']

    def process_all_resumes(self):
        # Process all resumes in the directory
        resume_files = [f for f in os.listdir(self.resumes_directory)
                        if os.path.isfile(os.path.join(self.resumes_directory, f))]

        resumes_processed = []
        for resume_file in resume_files:
            resume_processor = ResumeProcessor(resume_file)
            resume_data = resume_processor._read_resumes()
            resume_processor._write_json_file(resume_data)
            resumes_processed.append(resume_data)
        return resumes_processed

    def embed_resume_description(self, resume_text):
        embeddings = HuggingFaceEmbeddings()
        resume_embedding = embeddings.embed_documents(
            [resume_text['bi_grams']])
        return resume_embedding

In [99]:
resume_Handler = ResumeHandler()
resume_processed = resume_Handler.process_resume("john_doe.pdf")

into this if loop


In [102]:
resume_processed['bi_grams']

'[JOHN DOE, Main St, LinkedIn linkedincom, GitHub githubcom, PROFESSIONAL SUMMARY, Highly skilled, Stack Developer, Angular development, designing building, building testing, maintaining web, web applications, applications Proficient, technologies including, including Java, Java Spring, Spring Boot, Boot Angular, Angular HTML5, HTML5 CSS3, SQL Exceptional, Exceptional ability, selfdirect Committed, providing highquality, highquality results, little supervision, •Spring Boot, Boot Spring, Spring MVC, •Angular versions, •JavaScript TypeScript, TypeScript HTML5, HTML5 CSS3, •RESTful APIs, NoSQL databases, databases MySQL, MySQL MongoDB, WORK EXPERIENCE, Stack Java, Java Developer, ABC Company, Company Inc, USA June, •Developed scalable, scalable robust, maintainable enterpriselevel, enterpriselevel applications, •Used Angular, developing dynamic, responsive web, web frontends, frontends improving, improving user, user experience, •Integrated applications, MongoDB databases, retrieve data,

In [100]:
resume_embedding = resume_Handler.embed_resume_description(resume_processed)

Batches: 100%|██████████| 1/1 [00:00<00:00,  8.37it/s]


In [111]:
resume_embedding

[[0.07864290475845337,
  0.06131061539053917,
  -0.040140047669410706,
  -0.014679858461022377,
  0.015326236374676228,
  -0.023457923904061317,
  0.04388463869690895,
  0.021196261048316956,
  -0.008244347758591175,
  0.01627918891608715,
  0.014733993448317051,
  0.04415017366409302,
  0.05835065618157387,
  0.054540347307920456,
  0.0063525959849357605,
  -0.0005988007760606706,
  0.011590296402573586,
  0.04002145305275917,
  0.018117833882570267,
  -0.019006088376045227,
  -0.0469488650560379,
  0.039729371666908264,
  0.026730749756097794,
  0.03112989105284214,
  0.02957754023373127,
  -0.014077085070312023,
  0.06709063053131104,
  -0.0017923667328432202,
  0.014511358924210072,
  -0.0294629093259573,
  0.005821209866553545,
  -0.031157368794083595,
  -0.058376021683216095,
  -0.03808202967047691,
  2.287094730490935e-06,
  -0.03089061751961708,
  0.016181688755750656,
  0.006304536014795303,
  -0.029432490468025208,
  -0.0071504125371575356,
  -0.03967450186610222,
  0.0404881

In [123]:
from sklearn.metrics.pairwise import cosine_similarity
import numpy as np


class EmbeddingSimilarityCalculator:
    def __init__(self):
        pass

    def flatten_embedding(self, embedding):
        if embedding.ndim > 2:
            return embedding.reshape(1, -1)
        return embedding

    def compute_similarity(self, embedding1, embedding2) -> float:
        # Ensure embeddings are numpy arrays and flatten them
        embedding1 = self.flatten_embedding(np.array(embedding1))
        embedding2 = self.flatten_embedding(np.array(embedding2))

        similarity = cosine_similarity(embedding1, embedding2)[0][0]
        return similarity

In [125]:
similarity_calculator = EmbeddingSimilarityCalculator()
similarity_score = similarity_calculator.compute_similarity(
    embedding, resume_embedding)
print(f"Similarity Score: {similarity_score}")

Similarity Score: 0.7224677145136769
