In [None]:
# Install libraries
!pip install kaggle scikit-learn tensorflow joblib

# Upload  kaggle.json
from google.colab import files
files.upload()  # Upload kaggle.json here

# Kaggle credentials
!mkdir -p ~/.kaggle
!mv kaggle.json ~/.kaggle/
!chmod 600 ~/.kaggle/kaggle.json

# Download Resume Dataset by Gaurav Dutta
!kaggle datasets download -d gauravduttakiit/resume-dataset
!unzip resume-dataset.zip

#  Load / prepare the data
import pandas as pd
import numpy as np
import re
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import TfidfVectorizer

# Load resumes.csv
df = pd.read_csv('UpdatedResumeDataSet.csv')

print(f"Dataset loaded: {df.shape[0]} resumes.")

# Extract just the resume text (fixed column)
resumes = df['Resume'].dropna().tolist()

# 5. Preprocess the text
def clean_text(text):
    text = re.sub(r'\n', ' ', text)
    text = re.sub(r'[^a-zA-Z0-9\s]', '', text)
    text = text.lower()
    return text

cleaned_resumes = [clean_text(r) for r in resumes]

# Break resumes into short sentences/phrases
phrases = []
for resume in cleaned_resumes:
    sentences = resume.split('. ')
    phrases.extend(sentences)

# Remove  short meaningless phrases
phrases = [p.strip() for p in phrases if len(p.split()) >= 3]

print(f"Prepared {len(phrases)} phrases for training.")

# Auto-label keywords (basic rule for now)

# If phrase contains certain words label as keyword
keyword_indicators = [
    'python', 'tensorflow', 'machine learning', 'aws', 'docker',
    'cloud', 'backend', 'frontend', 'java', 'react', 'developer',
    'engineer', 'data science', 'artificial intelligence', 'sql'
]

def auto_label(phrase):
    return int(any(word in phrase for word in keyword_indicators))

labels = np.array([auto_label(p) for p in phrases])

# TF-IDF Vectorise
vectorizer = TfidfVectorizer(max_features=3000)
X = vectorizer.fit_transform(phrases).toarray()
y = labels

# 9Train/Test Split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Build the TensorFlow Logistic Regression Model
import tensorflow as tf

model = tf.keras.Sequential([
    tf.keras.layers.Input(shape=(X.shape[1],)),
    tf.keras.layers.Dense(1, activation='sigmoid')
])

model.compile(
    optimizer='adam',
    loss='binary_crossentropy',
    metrics=['accuracy']
)

# Train Model
history = model.fit(X_train, y_train, epochs=10, validation_data=(X_test, y_test))

# Evaluate Model
loss, acc = model.evaluate(X_test, y_test)
print(f"\n Final Test Accuracy: {acc:.2f}")

print("\n Model trained and ready!")

# 13. Test it on new text (Extract keywords from a new CV)
new_text = """
Experienced Machine Learning engineer with expertise in Python, TensorFlow, and AWS cloud services.
Strong communication, leadership, and project management skills.
Worked on agile development teams.
Bachelor's degree in Computer Science.
"""

# Tokenize new words
from sklearn.feature_extraction import text
stop_words = text.ENGLISH_STOP_WORDS

words = re.findall(r'\b\w+\b', new_text.lower())
filtered_words = [word for word in words if word not in stop_words]

# Transform and predict
word_vectors = vectorizer.transform(filtered_words).toarray()
predictions = model.predict(word_vectors)

# Display keywords
print("\n🔎 Extracted Keywords from New Text:")
for word, pred in zip(filtered_words, predictions):
    if pred > 0.5:
        print(f"✅ {word} (Confidence: {pred[0]:.2f})")


In [None]:
# install libraries
!pip install spacy datasets python-docx

# download English spacy model
!python -m spacy download en_core_web_sm

# import libraries
import spacy
from spacy.training.example import Example
from datasets import load_dataset
import random
import docx
from google.colab import files
import numpy as np

# load  blank spaCy model
nlp = spacy.blank("en")

# load conll2003 dataset
dataset = load_dataset("conll2003", trust_remote_code=True)

# prepare training data properly
train_data = []
for tokens, tags in zip(dataset['train']['tokens'], dataset['train']['ner_tags']):
    sentence = " ".join(tokens)
    entities = []
    start = 0
    for token, tag in zip(tokens, tags):
        word = token
        tag_name = dataset['train'].features['ner_tags'].feature.names[tag]
        if tag_name != "O":
            end = start + len(word)
            entities.append((start, end, tag_name))
        start += len(word) + 1  # account for space
    if entities:
        train_data.append((sentence, {"entities": entities}))

# create NER component
ner = nlp.add_pipe("ner")

# added all entity labels to the NER model
for _, annotations in train_data:
    for start, end, label in annotations["entities"]:
        ner.add_label(label)

# disable other pipes during training
other_pipes = [pipe for pipe in nlp.pipe_names if pipe != "ner"]

# setup early stopping variables
patience = 4
min_delta = 0.005
best_loss = float('inf')
no_improve_counter = 0

# train model
with nlp.disable_pipes(*other_pipes):
    optimizer = nlp.begin_training()
    for epoch in range(20):  # allow max 60 epochs
        random.shuffle(train_data)
        losses = {}
        batches = spacy.util.minibatch(train_data, size=64)
        for batch in batches:
            for text, annotations in batch:
                example = Example.from_dict(nlp.make_doc(text), annotations)
                nlp.update([example], drop=0.2, losses=losses)

        current_loss = losses.get('ner', 0.0)
        print(f"Epoch {epoch+1}, Loss: {current_loss:.4f}")

        # early stopping check
        if best_loss - current_loss > min_delta:
            best_loss = current_loss
            no_improve_counter = 0
        else:
            no_improve_counter += 1

        if no_improve_counter >= patience:
            print(f"\nNo major improvement for {patience} epochs. Early stopping at epoch {epoch+1}.")
            break

print("\nTraining complete.")

# helper: read docx files
def read_docx(filepath):
    doc = docx.Document(filepath)
    return "\n".join([p.text for p in doc.paragraphs])

# extract entities (group multi word correctly)
def extract_entities(text):
    doc = nlp(text)
    results = []
    for ent in doc.ents:
        if ent.label_ not in ("CARDINAL", "DATE", "ORDINAL", "TIME", "PERCENT", "MONEY"):  # filter boring types
            results.append((ent.text, ent.label_))
    return results

# upload and test your CV
uploaded = files.upload()

for filename in uploaded.keys():
    print(f"\nUploaded: {filename}")
    text = read_docx(filename)
    entities = extract_entities(text)

    print("\nExtracted Entities:\n")
    for entity_text, entity_label in entities:
        print(f"{entity_text:<40} --> {entity_label}")

In [6]:
!pip install -U spacy==3.8.5
!python -m spacy download en_core_web_sm

Collecting en-core-web-sm==3.8.0
  Downloading https://github.com/explosion/spacy-models/releases/download/en_core_web_sm-3.8.0/en_core_web_sm-3.8.0-py3-none-any.whl (12.8 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m12.8/12.8 MB[0m [31m128.6 MB/s[0m eta [36m0:00:00[0m
[?25h[38;5;2m✔ Download and installation successful[0m
You can now load the package via spacy.load('en_core_web_sm')
[38;5;3m⚠ Restart to reload dependencies[0m
If you are in a Jupyter or Colab notebook, you may need to restart Python in
order to load all the package's dependencies. You can do this by selecting the
'Restart kernel' or 'Restart runtime' option.


In [9]:
from google.colab import files
uploaded = files.upload()


Saving software_cv_ner.json to software_cv_ner.json


In [30]:
# Install required libraries
!pip install -q spacy==3.8.5
!python -m spacy download en_core_web_sm

# Imports
import spacy
import json
import random
from pathlib import Path
from spacy.tokens import DocBin
from sklearn.model_selection import train_test_split
from google.colab import files

# Upload your .jsonl file
uploaded = files.upload()

# Set your uploaded filename manually if needed
jsonl_file = list(uploaded.keys())[0]

# Load your custom NER data
examples = []
with open(jsonl_file, 'r', encoding='utf-8') as f:
    for line in f:
        data = json.loads(line)
        text = data['text']
        entities = data['entities']
        examples.append((text, {"entities": [(start, end, label) for start, end, label in entities]}))

print(f" Loaded {len(examples)} training examples.")

# Split into train/dev
train_data, dev_data = train_test_split(examples, test_size=0.1, random_state=42)

# Helper to convert into spaCy format
def create_docbin(data, nlp):
    db = DocBin()
    for text, annot in data:
        doc = nlp.make_doc(text)
        ents = []
        for start, end, label in annot['entities']:
            span = doc.char_span(start, end, label=label)
            if span:
                ents.append(span)
        doc.ents = ents
        db.add(doc)
    return db

# Create output folders
Path("data").mkdir(parents=True, exist_ok=True)
nlp_blank = spacy.blank("en")  # Blank English model

create_docbin(train_data, nlp_blank).to_disk("data/train.spacy")
create_docbin(dev_data, nlp_blank).to_disk("data/dev.spacy")

# Create a config file optimized for CPU
!python -m spacy init config data/config.cfg --lang en --pipeline ner --optimize efficiency --force

# Train the model
!python -m spacy train data/config.cfg --output model --paths.train data/train.spacy --paths.dev data/dev.spacy --gpu-id -1

print("\n Model Training Complete!")

# Load and test your trained model
import spacy

nlp_trained = spacy.load("model/model-best")

# Test text
test_text = """
John Doe — Backend Developer skilled in Python, Flask, and PostgreSQL.
Worked at DevSolutions Ltd from 2020-2024. MSc Computer Science, University of Manchester.
"""

doc = nlp_trained(test_text)

print("\nEntities Found:\n")
for ent in doc.ents:
    print(f"{ent.text} ({ent.label_})")

Collecting en-core-web-sm==3.8.0
  Using cached https://github.com/explosion/spacy-models/releases/download/en_core_web_sm-3.8.0/en_core_web_sm-3.8.0-py3-none-any.whl (12.8 MB)
[38;5;2m✔ Download and installation successful[0m
You can now load the package via spacy.load('en_core_web_sm')
[38;5;3m⚠ Restart to reload dependencies[0m
If you are in a Jupyter or Colab notebook, you may need to restart Python in
order to load all the package's dependencies. You can do this by selecting the
'Restart kernel' or 'Restart runtime' option.


Saving software_cv_ner2.json to software_cv_ner2 (1).json
✅ Loaded 17 training examples.
  _torch_pytree._register_pytree_node(
  _torch_pytree._register_pytree_node(
[38;5;4mℹ Generated config template specific for your use case[0m
- Language: en
- Pipeline: ner
- Optimize for: efficiency
- Hardware: CPU
- Transformer: None
[38;5;2m✔ Auto-filled config with all values[0m
[38;5;2m✔ Saved config[0m
data/config.cfg
You can now add your data and train your pipeline:
python -m spacy train config.cfg --paths.train ./train.spacy --paths.dev ./dev.spacy
[38;5;4mℹ Saving to output directory: model[0m
[38;5;4mℹ Using CPU[0m
[38;5;4mℹ To switch to GPU 0, use the option: --gpu-id 0[0m
[1m
  _torch_pytree._register_pytree_node(
  _torch_pytree._register_pytree_node(
[38;5;2m✔ Initialized pipeline[0m
[1m
[38;5;4mℹ Pipeline: ['tok2vec', 'ner'][0m
[38;5;4mℹ Initial learn rate: 0.001[0m
E    #       LOSS TOK2VEC  LOSS NER  ENTS_F  ENTS_P  ENTS_R  SCORE 
---  ------  ------------  -

In [26]:
import re

# CV text
cv_text = """
James Carter
Email: james.carter@example.com
Phone: +44 1234 567890
LinkedIn: linkedin.com/in/jamescarter
GitHub: github.com/jcarter
Location: London, UK

Professional Summary
Software Engineer with 4+ years of experience in designing, developing, and maintaining scalable software solutions. Adept in Java, Python, and JavaScript frameworks, with strong skills in Agile methodologies, teamwork, and problem-solving.

Technical Skills
Languages: Java, Python, JavaScript, C++
Frameworks: React, Angular, Node.js, Django, Spring Boot
Tools & Platforms: Git, Docker, Kubernetes, AWS, Azure, Jenkins
Databases: MySQL, PostgreSQL, MongoDB

Professional Experience
Software Engineer | TechSolutions Ltd., London, UK
March 2021 – Present
Responsibilities:
- Developed and maintained web applications using React, Node.js, and MongoDB.
- Improved application performance by 25% through code optimisation and refactoring.
- Led integration of RESTful APIs, enhancing application scalability.
- Collaborated with cross-functional teams using Agile methodologies.

Junior Software Developer | Innovatech Inc., London, UK
January 2019 – February 2021
Responsibilities:
- Assisted in developing backend systems with Java (Spring Boot) and Python (Django).
- Contributed to database design and management, optimising query performance by 15%.
- Participated in code reviews, debugging sessions, and maintained coding standards.

Education
BSc Computer Science | University College London, UK | Graduated: 2018

Certifications
- AWS Certified Solutions Architect – Associate (2022)
- Oracle Certified Java Programmer (2021)

Projects
Inventory Management System (React, Node.js, MongoDB) – Developed a full-stack web application to manage warehouse inventory.
Chat Application (Java, Spring Boot, WebSocket) – Created a real-time chat application with secure authentication.

Languages
- English (Native)
- French (Intermediate)
"""





# Initialize the extracted fields
extracted_info = {}

# Extract name (first non-empty line)
lines = [line.strip() for line in cv_text.strip().split('\n') if line.strip()]
extracted_info['Name'] = lines[0]

# Extract email
email_match = re.search(r'[\w\.-]+@[\w\.-]+', cv_text)
extracted_info['Email'] = email_match.group(0) if email_match else None

# Extract phone number
phone_match = re.search(r'\+?\d[\d\s\-]{7,}\d', cv_text)
extracted_info['Phone'] = phone_match.group(0) if phone_match else None

# Extract LinkedIn URL
linkedin_match = re.search(r'linkedin\.com\/[^\s]+', cv_text)
extracted_info['LinkedIn'] = linkedin_match.group(0) if linkedin_match else None

# Extract Professional Title (first line under "Professional Summary")
prof_summary_match = re.search(r'Professional Summary\n([^\n]+)', cv_text)
if prof_summary_match:
    first_line_summary = prof_summary_match.group(1)
    # Assume the profession is the first few words before "with" or comma
    profession_match = re.match(r'(.+?)(?: with|,)', first_line_summary)
    extracted_info['Profession Title'] = profession_match.group(1) if profession_match else first_line_summary

# Extract Years of Experience
years_match = re.search(r'(\d+)\+?\s*years? of experience', cv_text, re.IGNORECASE)
extracted_info['Years of Experience'] = years_match.group(1) if years_match else None

# Print all extracted fields
for field, value in extracted_info.items():
    print(f"{field}: {value}")


Name: James Carter
Email: james.carter@example.com
Phone: +44 1234 567890
LinkedIn: linkedin.com/in/jamescarter
Profession Title: Software Engineer
Years of Experience: 4


**# Full Model Training for Fine Tuning BERT and similarity head**

In [11]:
!pip install transformers datasets torch scikit-learn

import torch
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader
from transformers import DistilBertModel, DistilBertTokenizerFast
import torch.optim as optim
from datasets import load_dataset
import numpy as np
from sklearn.model_selection import train_test_split

# --- Setup ---
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model_name = 'distilbert-base-uncased'
tokenizer = DistilBertTokenizerFast.from_pretrained(model_name)

# --- Load STS-B Dataset (from Hugging Face) ---
raw_dataset = load_dataset("glue", "stsb")
train_data = raw_dataset['train']
val_data = raw_dataset['validation']

# Normalize scores from 0–5 → 0–1
def normalize(score): return float(score) / 5.0

# --- Custom Dataset Class ---
class TextSimilarityDataset(Dataset):
    def __init__(self, sent1_list, sent2_list, scores, tokenizer, max_len=128):
        self.sent1 = sent1_list
        self.sent2 = sent2_list
        self.scores = scores
        self.tokenizer = tokenizer
        self.max_len = max_len

    def __len__(self):
        return len(self.sent1)

    def __getitem__(self, idx):
        encoding = self.tokenizer(
            self.sent1[idx],
            self.sent2[idx],
            truncation=True,
            padding='max_length',
            max_length=self.max_len,
            return_tensors='pt'
        )
        return {
            'input_ids': encoding['input_ids'].squeeze(),
            'attention_mask': encoding['attention_mask'].squeeze(),
            'labels': torch.tensor(self.scores[idx], dtype=torch.float)
        }

# pfepare data
train_dataset = TextSimilarityDataset(
    sent1_list=train_data['sentence1'],
    sent2_list=train_data['sentence2'],
    scores=[normalize(s) for s in train_data['label']],
    tokenizer=tokenizer
)

val_dataset = TextSimilarityDataset(
    sent1_list=val_data['sentence1'],
    sent2_list=val_data['sentence2'],
    scores=[normalize(s) for s in val_data['label']],
    tokenizer=tokenizer
)

train_loader = DataLoader(train_dataset, batch_size=16, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=16)

# model
class BertSimilarityModel(nn.Module):
    def __init__(self, bert_model_name):
        super().__init__()
        self.bert = DistilBertModel.from_pretrained(bert_model_name)
        self.fc = nn.Sequential(
            nn.Linear(self.bert.config.hidden_size, 128),
            nn.ReLU(),
            nn.Dropout(0.2),
            nn.Linear(128, 1)
        )

    def forward(self, input_ids, attention_mask):
        pooled_output = self.bert(input_ids=input_ids, attention_mask=attention_mask).last_hidden_state[:, 0, :]
        score = self.fc(pooled_output)
        return score.squeeze()

model = BertSimilarityModel(model_name).to(device)

# train setup
criterion = nn.MSELoss()
optimizer = optim.AdamW(model.parameters(), lr=2e-5)
epochs = 3

# training loop
model.train()
for epoch in range(epochs):
    total_loss = 0
    for batch in train_loader:
        optimizer.zero_grad()
        input_ids = batch['input_ids'].to(device)
        attention_mask = batch['attention_mask'].to(device)
        labels = batch['labels'].to(device)

        outputs = model(input_ids, attention_mask)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()
        total_loss += loss.item()

    print(f"Epoch {epoch + 1}/{epochs} - Training Loss: {total_loss / len(train_loader):.4f}")

# validation
model.eval()
val_loss = 0
with torch.no_grad():
    for batch in val_loader:
        input_ids = batch['input_ids'].to(device)
        attention_mask = batch['attention_mask'].to(device)
        labels = batch['labels'].to(device)

        outputs = model(input_ids, attention_mask)
        loss = criterion(outputs, labels)
        val_loss += loss.item()

print(f"Validation Loss: {val_loss / len(val_loader):.4f}")

README.md:   0%|          | 0.00/35.3k [00:00<?, ?B/s]

train-00000-of-00001.parquet:   0%|          | 0.00/502k [00:00<?, ?B/s]

validation-00000-of-00001.parquet:   0%|          | 0.00/151k [00:00<?, ?B/s]

test-00000-of-00001.parquet:   0%|          | 0.00/114k [00:00<?, ?B/s]

Generating train split:   0%|          | 0/5749 [00:00<?, ? examples/s]

Generating validation split:   0%|          | 0/1500 [00:00<?, ? examples/s]

Generating test split:   0%|          | 0/1379 [00:00<?, ? examples/s]

Epoch 1/3 - Training Loss: 0.0494
Epoch 2/3 - Training Loss: 0.0223
Epoch 3/3 - Training Loss: 0.0151
Validation Loss: 0.0243


In [38]:
!pip install -q sentence-transformers python-docx

import torch
from sentence_transformers import SentenceTransformer, util
import logging
from transformers.utils import logging as hf_logging
import numpy as np

# Logging
logging.basicConfig(level=logging.INFO)
hf_logging.set_verbosity_info()

class CVMatcher:
    def __init__(self, model_name='all-mpnet-base-v2', device=None):
        self.device = device or ('cuda' if torch.cuda.is_available() else 'cpu')
        logging.info(f"Loading model: {model_name} on {self.device}")
        self.model = SentenceTransformer(model_name, device=self.device)
        logging.info("Model loaded successfully.")

    def segment_text(self, text, max_length=256):
        segments = []
        current = []
        count = 0
        for line in text.split('\n'):
            line = line.strip()
            if not line:
                continue
            current.append(line)
            count += len(line.split())
            if count >= max_length:
                segments.append(' '.join(current))
                current = []
                count = 0
        if current:
            segments.append(' '.join(current))
        return segments

    def compute_similarity(self, cv_text, job_text) -> float:
        cv_chunks = self.segment_text(cv_text)
        job_chunks = self.segment_text(job_text)

        print(f"Comparing {len(cv_chunks)} CV segments to {len(job_chunks)} job segments...")

        cv_embeddings = self.model.encode(cv_chunks, convert_to_tensor=True)
        job_embeddings = self.model.encode(job_chunks, convert_to_tensor=True)

        sim_matrix = util.cos_sim(cv_embeddings, job_embeddings).cpu().numpy()
        max_sim = np.max(sim_matrix)

        print(f"Max segment similarity: {max_sim * 100:.2f}%")
        return max_sim * 100

    def rank_jobs(self, cv: str, jobs: dict, top_k: int = None, verbose=True):
        print("Ranking jobs based on similarity to the CV\n")
        results = []
        for title, desc in jobs.items():
            print(f"Evaluating job: {title}")
            score = self.compute_similarity(cv, desc)
            results.append((title, score))
        ranked = sorted(results, key=lambda x: x[1], reverse=True)
        if verbose:
            print("\nFinal Job Matching Results:")
            for title, score in ranked[:top_k or len(ranked)]:
                print(f" - {title:40s}: {score:.2f}%")
        return ranked


In [46]:
from google.colab import files
import docx
import os

# Upload CV
uploaded = files.upload()
docx_filename = next(iter(uploaded))

# Read CV text
def read_docx_text(path):
    doc = docx.Document(path)
    return "\n".join([para.text for para in doc.paragraphs if para.text.strip()])

cv_text = read_docx_text(docx_filename)

# jobs test
jobs = {
    "Software Engineer (React/AWS)": """
        We are seeking a full-stack software engineer to develop cloud-native applications using React, Node.js,
        and AWS. Experience with containerisation tools like Docker and CI/CD pipelines using Jenkins is required.
        Must be a strong team player and comfortable working in Agile environments.
    """,
    "Data Scientist": """
        We are looking for a Data Scientist with solid experience in Python, deep learning, and ML frameworks like
        PyTorch and TensorFlow. Knowledge in NLP, pandas, scikit-learn, and AWS is highly desirable. Candidates
        should be able to build models, run experiments, and deliver production-grade systems.
    """,
    "Random Text": "I have quick react speed on cars"
}

# Run matcher
matcher = CVMatcher()
matcher.rank_jobs(cv=cv_text, jobs=jobs)

# Cleanup uploaded file
os.remove(docx_filename)


Saving Machine_Learning_CV.docx to Machine_Learning_CV.docx


loading configuration file config.json from cache at /root/.cache/huggingface/hub/models--sentence-transformers--all-mpnet-base-v2/snapshots/12e86a3c702fc3c50205a8db88f0ec7c0b6b94a0/config.json
Model config MPNetConfig {
  "architectures": [
    "MPNetForMaskedLM"
  ],
  "attention_probs_dropout_prob": 0.1,
  "bos_token_id": 0,
  "eos_token_id": 2,
  "hidden_act": "gelu",
  "hidden_dropout_prob": 0.1,
  "hidden_size": 768,
  "initializer_range": 0.02,
  "intermediate_size": 3072,
  "layer_norm_eps": 1e-05,
  "max_position_embeddings": 514,
  "model_type": "mpnet",
  "num_attention_heads": 12,
  "num_hidden_layers": 12,
  "pad_token_id": 1,
  "relative_attention_num_buckets": 32,
  "transformers_version": "4.51.3",
  "vocab_size": 30527
}

loading weights file model.safetensors from cache at /root/.cache/huggingface/hub/models--sentence-transformers--all-mpnet-base-v2/snapshots/12e86a3c702fc3c50205a8db88f0ec7c0b6b94a0/model.safetensors
All model checkpoint weights were used when initial

Ranking jobs based on similarity to the CV

Evaluating job: Software Engineer (React/AWS)
Comparing 2 CV segments to 1 job segments...
Max segment similarity: 48.83%
Evaluating job: Data Scientist
Comparing 2 CV segments to 1 job segments...
Max segment similarity: 70.55%
Evaluating job: Random Text
Comparing 2 CV segments to 1 job segments...
Max segment similarity: 18.17%

Final Job Matching Results:
 - Data Scientist                          : 70.55%
 - Software Engineer (React/AWS)           : 48.83%
 - Random Text                             : 18.17%
