In [1]:
import os
import json
import numpy as np
from tqdm import tqdm
from pinecone import Pinecone 
from pinecone import ServerlessSpec
from sentence_transformers import SentenceTransformer
from sklearn.metrics.pairwise import cosine_similarity


In [2]:
file_name="structured_resumes.json"
model_name = 'multi-qa-MiniLM-L6-cos-v1'
pinecone_api_key=os.getenv("PINECONE_API_KEY")
index_name = "job-fit-ai"
pc=Pinecone(api_key=pinecone_api_key)

In [3]:
def cos_similar(a, b):
    a=np.array(a).reshape(1, -1)
    b=np.array(b).reshape(1, -1)
    return cosine_similarity(a, b)[0][0]

In [4]:
# The Embedding model is loaded
model = SentenceTransformer(model_name)
# Generate an embedding for any sample text
sample_text = "This is a test sentence."
embedding = model.encode(sample_text)

# Get the length of the embedding
embedding_length = len(embedding)
print(f"Embedding length: {embedding_length}")



Embedding length: 384


In [5]:
if not pc.has_index(index_name):
    pc.create_index(
        name=index_name,
        dimension=embedding_length,
        metric="cosine",
        spec=ServerlessSpec(
                cloud="aws",
                region="us-east-1"),
        deletion_protection="disabled"
)
index = pc.Index(index_name)

In [7]:
# read the dataset
with open(file_name, 'r') as file:
    data = json.load(file)
 
vectors=[]
for group_name, dict_values in tqdm(data.items()):
    # Only concerned with the technical skills
    resume= f"education: {dict_values['education']}\nexperience: {dict_values['experience']}\nskills: {dict_values['skills']}\ncourses: {dict_values['courses']}\n"
    # embed the resume 
    resume_embedding = model.encode(resume)
    vectors.append({"id": group_name, "values": resume_embedding})
    
index.upsert(vectors)

100%|██████████| 20/20 [00:01<00:00, 10.72it/s]


{'upserted_count': 20}

In [22]:
# strong_fit_jd="Tech Innovators Inc. is seeking a talented and experienced Senior Front-End Developer to join our dynamic team. The ideal candidate will have a robust background in front-end development with a focus on creating user-friendly, responsive websites and applications. Responsibilities include developing and maintaining high-quality, scalable web applications using HTML5, React.js, and GraphQL, collaborating with design and backend teams to implement modern UI/UX designs and features, optimizing applications for maximum speed and scalability using tools such as Yarn and Gulp, and utilizing version control systems like GitHub for code management and collaboration. The candidate should have a Bachelor of Science in Computer Science or a related field, with 5+ years of professional experience in front-end development, including recent experience at companies like Expedia Group or Zillow. Proficiency in HTML5, React.js, GitHub, Yarn, Gulp, Joomla, Mocha, and GraphQL is required. Strong understanding of responsive design principles and front-end frameworks is needed, along with excellent problem-solving skills and the ability to work collaboratively in a team environment. Experience with Bootstrap and Visual Studio Code is highly desirable. Preferred qualifications include experience with content management systems (CMS) like Joomla and familiarity with Agile development methodologies."
# worst_fit_jd= "Future Insights LLC is looking for a skilled Data Scientist to analyze and interpret complex data to help drive business decisions. The successful candidate will use advanced analytical techniques and machine learning algorithms to uncover insights and trends from large datasets. Responsibilities include developing and implementing machine learning models and data-driven solutions to solve business problems, analyzing complex data sets using statistical methods and data visualization tools, collaborating with cross-functional teams to understand business needs and provide actionable insights, and preparing detailed reports and presentations to communicate findings to stakeholders. The candidate should have a Bachelor of Science in Computer Science, Mathematics, Statistics, or a related field, with 3+ years of experience in data science, a strong background in machine learning, statistical analysis, and data visualization. Proficiency in programming languages such as Python or R, and tools such as TensorFlow or PyTorch is required, along with experience with SQL databases and data manipulation. Strong analytical and problem-solving skills are essential. Preferred qualifications include an advanced degree in a related field and experience in a similar role at technology companies."