In [60]:
pip install pdfplumber



In [62]:
pip install python-dotenv



In [63]:
pip install -q -U google-generativeai

In [64]:
from transformers import pipeline
import pdfplumber
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity
import numpy as np
import pandas as pd

In [65]:
import os
from dotenv import load_dotenv
import google.generativeai as genai

In [66]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [67]:
genai.configure(api_key=os.environ["google_ai_studio_key"])

In [68]:
def extract_text_from_pdf(pdf_path):
    with pdfplumber.open(pdf_path) as pdf:
        text = ''
        for page in pdf.pages:
            text += page.extract_text()
    return text

In [69]:
cv_files = ['/content/drive/MyDrive/CVRanker/dataset/Benjamin_Salebaigi.pdf',
            '/content/drive/MyDrive/CVRanker/dataset/CV Martin Kriletich 2024.pdf',
            '/content/drive/MyDrive/CVRanker/dataset/CV Podcast Host_Producer Damien  Swaby  (1).pdf',
            '/content/drive/MyDrive/CVRanker/dataset/Kindra Keener 2024 CV.pdf',
            '/content/drive/MyDrive/CVRanker/dataset/Violetta_Nadbitova_CV-1.pdf']  # List your CV PDF file paths here

In [70]:
cv_texts = [extract_text_from_pdf(cv_file) for cv_file in cv_files]

In [71]:
# Extract text from the JD PDF
jd_text = extract_text_from_pdf('/content/drive/MyDrive/CVRanker/dataset/Podcaster Job Ad.pdf')

In [72]:
# Load environment variables from .env file
load_dotenv(dotenv_path='/content/drive/MyDrive/CVRanker/.env')

# Access the API key
api_key = os.getenv('google_ai_studio_key')

In [73]:
model=genai.GenerativeModel(
  model_name="gemini-1.5-flash",
  system_instruction="You are a hiring officer's personal assistant. You are experienced at the job.")

In [74]:
response = model.generate_content("Good morning! How are you?")
print(response.text)

Good morning! I'm doing well, thank you for asking. How can I help you today? 😊 



In [75]:
def summarize_cv(cv_text):
    # Define the prompt
    prompt = f"""
    The hiring officer wants you to help summarize the key skills and experiences of a CV an applicant submitted in two paragraphs.
    He plans to read your summary and make a decision to hire or not hire each applicant respectively by comparing your summary of the applicant's
    skills and experiences with the Job description which he has and was already advertised. Make the first line a heading with only applicants name.
    Be careful to not miss any relevant experience or skill. This is the CV: {cv_text}"""
    # Use the model to summarize the CV
    response = model.generate_content(prompt)
    return response.text

In [77]:
cv_summaries = [summarize_cv(cv) for cv in cv_texts]  # Summarize each CV and put into a list

In [78]:
documents = [jd_text] + cv_texts

In [79]:
# Vectorize the texts
vectorizer = TfidfVectorizer(stop_words='english')
X = vectorizer.fit_transform(documents)

In [83]:
# Calculate cosine similarity
similarities = cosine_similarity(X[0], X[1:]) #scores
similarities = (similarities * 100).flatten().tolist()  # Convert to percentage

In [85]:
names = [] #names
for summary in cv_summaries:
    name = summary.split('\n')[0].replace("## ", "")
    names.append(name)

In [89]:
# Create a DataFrame
df = pd.DataFrame({
    'Name': names,
    'Summary': cv_summaries,
    'Score (%)': similarities
})
df.sort_values(by='Score (%)', ascending=False, inplace=True)

In [94]:
df.to_csv('output.csv', index=False)