In [None]:
import tkinter as tk
from tkinter import filedialog
import shutil
import os

def upload_resume():
    global uploaded_filename
    
    # Function to handle uploading of resume
    filename = filedialog.askopenfilename(initialdir="./", title="Select file", filetypes=(("PDF files", "*.pdf"), ("All files", "*.*")))
    if filename:
        print("Uploaded file:", filename)
        # Save the filename to a variable for further use
        uploaded_filename = os.path.basename(filename)
        
        # Save the uploaded file to the "./dataset/" folder
        destination = os.path.join("./dataset/", uploaded_filename)
        shutil.copyfile(filename, destination)
        print("File saved to:", destination)
        
        # Mock dataset of job positions
        job_positions = []
        for i in range(len(docID)):
            job_positions.append({"title": positions[i], "details": jd[i]})
        
        # Update message label
        message_label.config(text="Resume uploaded. Processing resume...")
        
        # After 3-4 seconds, close the window
        root.after(3000, root.destroy)
        
    else:
        print("No file selected.")

# Create the main window
root = tk.Tk()
root.title("Matching CV with Job Descriptions")
root.geometry("800x500")

# Create the canvas
canvas = tk.Canvas(root, width=800, height=500, bg="white")
canvas.pack()

# Create the "Upload Resume" button
upload_button = tk.Button(canvas, text="Upload Resume", bg="blue", fg="white", font=("Arial", 14), command=upload_resume)
upload_button_window = canvas.create_window(400, 250, window=upload_button, anchor="center")

# Message label
message_label = tk.Label(canvas, text="", font=("Arial", 12))
message_label_window = canvas.create_window(400, 300, window=message_label, anchor="center")

root.mainloop()


In [None]:
# %pip install nltk
import pandas as pd
import os
from nltk.stem import PorterStemmer
import math
import re

def tokenizer(text):
    # the R.E removes any letter that is not an alphabet (a-zA-Z)
    text = re.sub("[^a-zA-Z]+", " ", text)
    tokens = text.split()
    return tokens


def indexer(jobs, stopwords_path):
    global index

    c = 0
    for x, jd in enumerate(jobs):
        tokens = []
        # read and lowercase data from current job -> pass to tokenizer()
        data = tokenizer(jd)
        for i in data:
            tokens.append(i)

        with open(os.path.join(stopwords_path, 'stopwords.txt'), 'r') as f:
            stop = [line.strip() for line in f]

        filename = x

        for word in tokens:
            if word not in stop:
                word = ps.stem(word)
                if word not in index:
                    index[word] = {} 
                    index[word][filename] = 1
                else:
                    if filename not in index[word]:
                        index[word][filename] = 1
                    else:
                        index[word][filename] += 1
        

    # sorting the index by tokens
    index = sorted(index.items())
    index = dict(index)

    for i in range(len(jobs)):
        dvectors[i] = [0]*len(index)
    
    for i, k in enumerate(index):
        for key in dvectors.keys(): 
            if key in index[k]:
                df = len(index[k])
                idf = math.log(len(dvectors) / df, 10)
                score = idf*index[k][key]
                dvectors[key][i] = score
    return


if __name__=="__main__":
    # index is the global dictionary
    index = {}
    # dvectors contains all document vectors
    dvectors = {}
    stop = set()

    df = pd.read_csv("./dataset/data.csv")
    jd = df['Job Description'].tolist()
    companies = df['company'].tolist()
    positions = df['position'].tolist()
    docID = df['docid'].tolist()

    # path to the corpus folder
    stopwords_path = '.\\stopwords'

    # stemmer
    ps = PorterStemmer()
    # all the preprocessing is done inside the indexer()
    indexer(jd, stopwords_path)
    index = dict(sorted(index.items()))

    print(f"Index created with {len(index)} unique terms!")


In [None]:
%pip install pdfminer
from pdfminer.pdfinterp import PDFResourceManager, PDFPageInterpreter
from pdfminer.pdfpage import PDFPage
from pdfminer.converter import TextConverter
from pdfminer.layout import LAParams
from io import StringIO

def pdfparser(data):
    with open(data, 'rb') as fp:
        rsrcmgr = PDFResourceManager()
        retstr = StringIO()
        codec = 'utf-8'
        laparams = LAParams()
        device = TextConverter(rsrcmgr, retstr, laparams=laparams)
        # Create a PDF interpreter object.
        interpreter = PDFPageInterpreter(rsrcmgr, device)
        # Process each page contained in the document.

        for page in PDFPage.get_pages(fp):
            interpreter.process_page(page)
            data =  retstr.getvalue()

    return data

x = "./dataset/"
x += uploaded_filename
parsed_text = pdfparser(x)
with open('resumeconverted.txt', 'w', encoding='utf-8') as f:
    f.write(parsed_text)

In [None]:
def dist(vector):
    s = 0
    for i in vector:
        s += i ** 2
    return(math.sqrt(s))

def calculateCos(query):
  result = []
  qDist = dist(query)
  if (qDist == 0):
    return result


  for doc in dvectors.keys():
    cos = 0
    dotProduct = 0
    docVector = None

    docVector = dvectors[doc]

    if docVector is not None:
      docDist = dist(docVector)

      for i in range(0, len(index)):
        if (query[i] == 0 or docVector[i] == 0):
          continue
        else:
          dotProduct = dotProduct + (query[i] * docVector[i])
      cos = dotProduct / (qDist * docDist)

    if (cos > 0.08):
      result.append((doc, cos))

  result = sorted(result, key=lambda x: -x[1])

  with open("result.txt", "w") as f:
    for i in result:
        temp = ""
        for j in i:
            temp += str(j)
            temp += ","
        temp = temp[:-1]
        temp += "\n"
        f.write(temp)

  return result

def processQuery(query_tokens): 
    for i in range(0, len(query_tokens)):
        query_tokens[i] = ps.stem(query_tokens[i])
    
    qvector = [0]*len(index)
    qDict = {}

    for words in query_tokens:
        if(words not in qDict):
            qDict[words] = 1
        else:
            qDict[words] += 1

    for i, key in enumerate(index):
        if(key in qDict):
            df = len(index[key])
            idf = math.log(len(dvectors) / df, 10)
            score = idf * qDict[key]
            qvector[i] = score
    
    return qvector


with open("resumeconverted.txt", "r", encoding="utf-8") as f:
    data = f.read().lower()
    query_tokens = tokenizer(data)

queryVector = processQuery(query_tokens)
result = calculateCos(queryVector)

if result: 
  for i in result:
    print(i)
else:
   print("[]")

In [None]:
import tkinter as tk

def show_job_description(index):
    position_index, _ = result[index]
    company_label.config(text=f"Company: {companies[position_index]}")
    description_text.config(state=tk.NORMAL)
    description_text.delete("1.0", tk.END)
    description_text.insert(tk.END, jd[position_index])
    description_text.config(state=tk.DISABLED)

def create_sidebar(root):
    sidebar = tk.Frame(root, width=200, bg="gray")
    sidebar.pack(side=tk.LEFT, fill=tk.Y)

    for i, _ in enumerate(result):
        position_index, _ = result[i]
        position = positions[position_index]
        button = tk.Button(sidebar, text=f"Position: {position}",
                           command=lambda i=i: show_job_description(i))
        button.pack(fill=tk.X)

def create_main_window(root):
    global company_label, description_text
    main_window = tk.Frame(root)
    main_window.pack(side=tk.LEFT, fill=tk.BOTH, expand=True)

    company_label = tk.Label(main_window, text="Company:", font=("Helvetica", 12, "bold"))
    company_label.pack(anchor=tk.W, padx=10, pady=10)

    description_text = tk.Text(main_window, wrap=tk.WORD, font=("Helvetica", 10), height=20, width=50)
    description_text.pack(fill=tk.BOTH, expand=False, padx=10, pady=5)  # Set expand to False

    description_text.config(state=tk.DISABLED)

if __name__ == "__main__":
    
    root = tk.Tk()
    root.title("Job Descriptions")

    create_sidebar(root)
    create_main_window(root)

    root.mainloop()
