# Resume Parsing

In [1]:
import fitz  # PyMuPDF
import spacy
from spacy.matcher import PhraseMatcher

1. Using PhraseMatcher

In [2]:
def extract_text_from_pdf(pdf_path):
    try:
        doc = fitz.open(pdf_path)
        text = ""
        for page_num in range(len(doc)):
            page = doc.load_page(page_num)
            text += page.get_text()
        return text
    except Exception as e:
        print(f"Error opening or reading PDF: {e}")
        return None

def extract_skills_and_points(text):
    nlp = spacy.load('en_core_web_sm')
    doc = nlp(text)

    skills = []
    key_points = []

    # Define some common skills to search for
    skill_phrases = [
        "machine learning", "deep learning", "data analysis", "python", "java",
        "project management", "communication", "teamwork", "sql", "excel"
    ]
    matcher = PhraseMatcher(nlp.vocab)
    patterns = [nlp(skill) for skill in skill_phrases]
    matcher.add("SKILLS", patterns)

    # Use matcher to find skills
    matches = matcher(doc)
    for match_id, start, end in matches:
        skills.append(doc[start:end].text)

    # Extract key points
    for token in doc:
        if token.pos_ == 'VERB' and token.dep_ == 'ROOT':
            key_points.append(token.text)

    return skills, key_points

if __name__ == "__main__":
    # Replace with your actual path to the resume PDF file
    pdf_path = '/content/resume.pdf'

    resume_text = extract_text_from_pdf(pdf_path)

    if resume_text:
        skills, key_points = extract_skills_and_points(resume_text)

        print("Skills:")
        for skill in skills:
            print(skill)

        print("\nKey Points:")
        for point in key_points:
            print(point)

Error opening or reading PDF: no such file: '/content/resume.pdf'


2. Using BST

In [3]:
class TrieNode:
    def __init__(self):
        self.children = {}
        self.is_end_of_phrase = False

class Trie:
    def __init__(self):
        self.root = TrieNode()

    def insert(self, phrase):
        node = self.root
        for char in phrase:
            if char not in node.children:
                node.children[char] = TrieNode()
            node = node.children[char]
        node.is_end_of_phrase = True

    def search(self, text):
        node = self.root
        matched_phrases = []
        i = 0
        while i < len(text):
            char = text[i]
            if char in node.children:
                temp_node = node
                temp_phrase = ''
                while i < len(text) and char in temp_node.children:
                    temp_phrase += char
                    temp_node = temp_node.children[char]
                    if temp_node.is_end_of_phrase:
                        matched_phrases.append(temp_phrase)
                    i += 1
                    if i < len(text):
                        char = text[i]
                i -= len(temp_phrase) - 1
            i += 1
        return matched_phrases

In [4]:

# Usage of Trie in the main script
def extract_skills_and_points(text):
    nlp = spacy.load('en_core_web_sm')
    doc = nlp(text)

    skills = []
    key_points = []

    # Define some common skills to search for
    skill_phrases = [
        "machine learning", "deep learning", "data analysis", "python", "java",
        "project management", "communication", "teamwork", "sql", "excel"
    ]

    # Build the trie
    trie = Trie()
    for phrase in skill_phrases:
        trie.insert(phrase.lower())

    # Use trie to find skills
    lower_text = text.lower()
    skills = trie.search(lower_text)

    # Extract key points
    for token in doc:
        if token.pos_ == 'VERB' and token.dep_ == 'ROOT':
            key_points.append(token.text)

    return skills, key_points

if __name__ == "__main__":
    # Replace with your actual path to the resume PDF file
    pdf_path = '/content/resume.pdf'

    resume_text = extract_text_from_pdf(pdf_path)

    if resume_text:
        skills, key_points = extract_skills_and_points(resume_text)

        print("Skills:")
        for skill in skills:
            print(skill)

        print("\nKey Points:")
        for point in key_points:
            print(point)


Error opening or reading PDF: no such file: '/content/resume.pdf'


#### Job Recommendation system

In [None]:
# Import necessary libraries
import pandas as pd
from sklearn.model_selection import train_test_split
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Input, Embedding, Flatten, Concatenate, Dense
from sklearn.metrics import accuracy_score

In [None]:
# Load your dataset
path = "Data/"
dataset = pd.read_csv(path)

# Define features and target for recommendation system
features = ['Experience', 'Qualifications', 'Location', 'Work_Type', 'Role', 'Skills']
target = ['Job_Title']

In [5]:
class Job_Recommendation:
    def __init__(self, features, target, dataset):
        self.features = features
        self.target = target
        self.dataset = dataset
        self.model = self.model_build()

    def build_model(self):
        # This will create an input layer for each feature
        inputs = {feature: Input(shape = (1, ), name = feature) for feature in self.features}
        embeddings = {feature: Embedding(self.dataset[feature].nunique(), 10)(inputs[feature]) for feature in features}
        flattened = [Flatten()(embeddings[feature]) for feature in self.features]
        concat = Concatenate()(flattened)
        dense1 = Dense(units = 128, activation = "relu")(concat)
        dense2 = Dense(units = 64, activation = "relu")(dense1)
        output = Dense(units = self.dataset[self.target].nunique(), activation = "sigmoid")(dense2)

        model = Model(inputs = [inputs[feature] for feature in self.features], output = output)

        model.compile(optimizer = "adam", loss = "sparse_categorical_entropy", metrics = ["accuracy"])

        return model
    
    def train(self, epochs = 10, batch_size = 32, validation_split = 0.2):
        X_train, X_test, y_train, y_test = train_test_split(self.dataset[self.features], 
                                                            self.dataset[self.target], 
                                                            test_size = 0.2, 
                                                            random_state = 0)
        
        self.model.fit({feature: X_train[feature] for feature in self.features}, 
                       y_train, 
                       epochs = epochs,
                       batch_size = batch_size,
                       validation_split = validation_split)
        
        self.X_test, self.y_test = X_test, y_test
        
    def evaluate(self):
        y_pred = self.model({feature: X_test[feature] for feature in self.features})
        y_pred_class = y_pred.argmax(axis = 1)
        accuracy = accuracy_score(self.y_test, y_pred_class)
        return accuracy

#### Salary Prediction

In [4]:
import tensorflow as tf
from tensorflow.keras.layers import Dense
from tensorflow.keras.models import Sequential
import keras

In [None]:
class Linear_Model:
    def __init__(self, input_dim, hidden_layers, output_dim):
        """
        Parameters:
        input_dim (int): Number of the input features
        hidden_layers (list): list of the number of units in the hidden layer
        output_dim: nNumber of the output units 
        """
        self.model = Sequential()
        
        # Input layer
        self.model.add(Dense(hidden_layers[0], input_dim = input_dim, activation = "relu"))
        
        # Hidden layers
        for units in hidden_layers[1:]:
            self.model.add(Dense(units, activation = "relu"))
        
        # Output layer
        self.model.add(Dense(output_dim, activation = "linear"))
        
        # Compile the model
        self.model.compile(optimizer = "adam", loss = "mse", metrics = ["mse"])
    
    def train(self, X_train, y_train, epochs = 10, batch_size = 32):
        self.model.fit(X_train, y_train, epochs = epochs, batch_size = batch_size)
    
    def evaluate(self, X_test, y_test):
        return self.model.evaluate(X_test, y_test)
    
    def predict(self, X):
        return self.model.predict(X)