In [11]:
import torch
from torch.utils.data import Dataset
from transformers import AutoTokenizer, AutoModelForTokenClassification, TrainingArguments, Trainer

In [12]:
resume_text = """
Braulio Jose Cespedes Acosta | Toronto, ON | (437) 733-7952 | brauliojose.cespedesacosta@georgebrown.ca|
https://github.com/Murphyx2 | https://www.linkedin.com/in/braulio-cespedes-acosta/
Objective:
Seeking Software Development Co-op at Loblaw, leveraging expertise from the Applied A.I. Solutions
Development Postgraduate Co-op program. Eager to apply skills in developing AI and data analytics
solutions, while fostering collaboration within the team.
Highlights of Qualifications:
• Strong knowledge and experience in Java programming language, developing backend and middleware
systems.
• Moderate Knowledge of Python, training and developing machine learning models.
• Demonstrated ability to refine technical specifications for efficient project execution.
• Proven track record in planning, requirements gathering, and risk management for successful projects.
• Exceptional communication skills, translating user requirements into technical requirements for
collaboration across teams and developing software.
• Moderate skill at data visualization with Tableau software.
Technical Skill:
• Databases Systems: Oracle, MS SQL server, MySQL.
• Control Version: Git
• Software Documentation: Microsoft Office (Excel, Word, PowerPoint)
• Testing: Java Mockito, Python Pytest
• Programming languages: Java, Python
Education:
Applied A.I. Solutions Development Postgraduate Jan 2024 – Dec 2024
George Brown College, Toronto, ON
• Collaborate with cross-disciplinary teams developing and understanding machine learning models in
order to perform the tasks at hand and complete the objectives of the course.
• Lead cross-disciplinary teams with the purpose of achieving the task and objectives at hand, bringing
their best potential in order to achieve the best score.
• Moderate knowledge of Tableau visualization tools, for the visualization and data presentation.
• Basic knowledge of the different big data technologies, such as Hadoop, Spark, Hive and Apache Pig as
well as their deployment in a cloud environment in MS Azure.
Software Engineer 2014 - 2019
Universidad Pro Educación y Cultura (APEC)
• Knowledge and the skills to analyze, design, develop, test, and implement software with the highest
standards and best practices on the market, all of these to fulfill users' needs on time.
• Collaborated with teams in order to plan, design, and develop software for different purposes in Java,
Python, and C#.
Mechatronics Technologist 2010 - 2013
Las Americas Institute of Technology (ITLA)
• As a Mechatronics Technologist, we dominate the automation of manufacturing processes by integrating
mechanical, electrical, electronic, and software components used for the control.
• Capable of designing and manufacturing electronic prototypes and devices.
Braulio Jose Cespedes Acosta | Toronto, ON | (437) 733-7952 | brauliojose.cespedesacosta@georgebrown.ca|
https://github.com/Murphyx2 | https://www.linkedin.com/in/braulio-cespedes-acosta/
Professional Experience:
Mid Software Developer Feb 2022 – Feb 2024
FullstackLabs, Santo Domingo DR - Remote
• Analyze, design, develop and test software in order to create new features and solve problems in the
organization, in order to meet users' needs and expectations.
• Provide support to users and colleagues for problem-solving situations, testing phase for new software
features, issues in the production environment, and consultation for the development and implementation of
new features and other requests related to the software development cycle.
• Development of SQL scripts to fulfill the software needs or for information requests.
• Translate user requests and requirements into tasks and technical requirements in order to develop features
that fulfill organizations' needs.
Engineer IV (Software Engineer) Jun 2018 – Feb 2022
Claro Dominicana, Santo Domingo DR
• Analyze, design, develop and test software in order to create new features and solve problems in the
organization, in order to meet users' needs and expectations.
• Development of SQL scripts, procedures, and views to fulfill the software needs or for information requests.
• Provided mentorship and guidance for new or old colleagues related to software development.
• Provided guidance and software tools to QA and support production teams in order to fulfill the
organization's needs and goals.
• Provide support for the integration and implementation of third-party software into the organization.
• Lead a small development team in order to maintain and implement new features for designated
applications.
• Planning and management of software delivery from the dev environment into the production environment.
Software Engineer Apr 2016 – Jun 2018
NewTech.srl, Santo Domingo DR
• Analyze, design, develop and test software in order to create new features and solve problems in the
organization, in order to meet users' needs and expectations.
• Provide support to users and colleagues for problem-solving situations, testing phase for new software
features, issues in the production environment, and consultation for the development and implementation of
new features and other requests related to the software development cycle.
• Development of SQL scripts to fulfill the software needs or for information requests.
• Planning and management of software delivery from the dev environment into the production environment.
Interests:
• Stock Market, Real estate, Economics, Assembling scale models, Books, Playing Video games, World
History, Military equipment, strategists and Science and Pixel Art.
"""

entities = [
    (336, 340, "SKILL"),     # Java
    (374, 380, "SKILL"),     # Python
    (444, 467, "SKILL"),     # machine learning
    (587, 606, "SKILL"),     # communication skills
    (660, 680, "SKILL"),     # data visualization
    (700, 706, "SKILL"),     # Oracle
    (708, 720, "SKILL"),     # MS SQL server
    (722, 728, "SKILL"),     # MySQL
    (745, 748, "SKILL"),     # Git
    (770, 795, "SKILL"),     # Microsoft Office
    (808, 821, "SKILL"),     # Java Mockito
    (823, 835, "SKILL"),     # Python Pytest
    (854, 858, "SKILL"),     # Java
    (860, 866, "SKILL"),     # Python
    (1220, 1226, "SKILL"),   # Hadoop
    (1228, 1233, "SKILL"),   # Spark
    (1235, 1239, "SKILL"),   # Hive
    (1244, 1254, "SKILL"),   # Apache Pig
    (1274, 1282, "SKILL"),   # MS Azure
    (1476, 1479, "SKILL"),   # Java
    (1481, 1487, "SKILL"),   # Python
    (1492, 1494, "SKILL")    # C#
]
tokenizer = AutoTokenizer.from_pretrained("bert-base-cased")

In [13]:
def tokenize_and_align_labels(text, entities, tokenizer):
    tokens = tokenizer(text, return_offsets_mapping=True, truncation=True)
    labels = ["0"] * len(tokens["input_ids"])
    
    for start, end, label in entities:
        for idx, (token_start, token_end) in enumerate(tokens["offset_mapping"]):
            if token_start >= start and token_end <=end:
                labels[idx] = label
    
    return tokens, labels

In [14]:
tokens, labels = tokenize_and_align_labels(resume_text, entities, tokenizer)

In [15]:
class ResumeDataset(Dataset):
    def __init__(self, encodings, labels):
        self.encodings = encodings
        self.labels = labels
    
    def __getitem__(self, idx):
        item = {key: torch.tensor(val[idx]) for key, val in self.encodings.items()}
        item["labels"] = torch.tensor(self.labels[idx])
        return item
    
    def __len__(self):
        return len(self.labels)

In [16]:
# Creating Dataset
encodings, labels = tokenize_and_align_labels(resume_text, entities, tokenizer)
dataset = ResumeDataset(encodings, [labels])

# Training the model

In [19]:
model = AutoModelForTokenClassification.from_pretrained("bert-base-cased", num_labels=len(labels))

training_args = TrainingArguments(
    output_dir="./results",
    evaluation_strategy="epoch",
    learning_rate=2e-5,
    per_device_train_batch_size=16,
    per_device_eval_batch_size=16,
    num_train_epochs=3,
    weight_decay=0.01,    
)

trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=dataset,
    eval_dataset=dataset,
    tokenizer=tokenizer,
)

trainer.train()

Some weights of BertForTokenClassification were not initialized from the model checkpoint at bert-base-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


TypeError: TrainingArguments.__init__() got an unexpected keyword argument 'bias'

In [18]:
# Evaluate the model
trainer.evaluate()

# Save the model
trainer.save_model("./ner_model")
tokenizer.save_pretrained("./ner_model")

NameError: name 'trainer' is not defined

In [18]:
import torch
from transformers import BertTokenizerFast, BertForTokenClassification, Trainer, TrainingArguments
from datasets import Dataset
import pandas as pd

# Define the annotated training data
train_data = [
    {
        "text": """
        Maria Elena Gonzalez
        Location: San Francisco, CA | Phone: (415) 555-1234 | Email: maria.gonzalez@example.com
        LinkedIn: https://www.linkedin.com/in/maria-elena-gonzalez/

        Objective:
        Detail-oriented Software Developer with a passion for creating innovative web applications. Seeking a position at Innovatech, where I can utilize my skills in front-end development, user experience design, and software engineering to contribute to the company's success.

        Highlights of Qualifications:
        - Extensive experience in developing responsive web applications using HTML, CSS, JavaScript, and React.
        - Proficient in backend development with Node.js and Express.
        - Skilled in using version control systems like Git and project management tools like Jira.
        - Strong problem-solving abilities and a keen eye for detail.
        - Excellent communication skills and a proven track record of collaborating effectively with cross-functional teams.

        Technical Skills:
        - Programming Languages: JavaScript, Python, Ruby
        - Web Development: HTML, CSS, React, Angular, Node.js, Express
        - Databases: MySQL, MongoDB, PostgreSQL
        - Tools & Technologies: Docker, Kubernetes, Git, Jenkins, Jira
        - Cloud Platforms: AWS, Heroku, Azure
        - Other: Agile methodologies, Test-Driven Development (TDD), RESTful APIs

        Education:
        Bachelor of Science in Computer Science
        University of California, San Francisco - San Francisco, CA
        September 2015 - June 2019
        - GPA: 3.8/4.0
        - Relevant Coursework: Web Development, Data Structures, Algorithms, Database Systems, Cloud Computing

        Professional Experience:
        Front-End Developer
        Web Creators Inc., San Francisco, CA
        July 2019 - Present
        - Developed and maintained front-end components for a high-traffic e-commerce platform using React and Redux.
        - Improved site performance and user experience by optimizing CSS and JavaScript code.
        - Collaborated with backend developers to integrate APIs and ensure seamless data flow.
        - Participated in code reviews and provided mentorship to junior developers.

        Software Developer Intern
        Tech Innovators, Palo Alto, CA
        June 2018 - August 2018
        - Assisted in the development of a web-based project management tool using Angular and Node.js.
        - Implemented RESTful APIs for data retrieval and storage.
        - Conducted unit testing and debugging to ensure the reliability and performance of the application.

        Research Assistant
        Human-Computer Interaction Lab, UCSF
        September 2017 - May 2018
        - Conducted research on improving user interaction with web interfaces.
        - Developed prototypes and conducted usability testing to gather user feedback.
        - Published findings in a peer-reviewed journal and presented at the CHI Conference.

        Projects:
        Personal Portfolio Website
        - Designed and developed a personal portfolio website to showcase my projects and skills.
        - Used HTML, CSS, JavaScript, and React for the front-end and hosted on GitHub Pages.

        E-commerce Website
        - Developed a full-stack e-commerce website with user authentication, product listings, and a shopping cart.
        - Used React for the front-end and Node.js with Express for the backend, with MongoDB as the database.

        Certifications:
        - Certified JavaScript Developer
        - AWS Certified Developer - Associate
        - Certified ScrumMaster (CSM)

        Interests:
        - Hiking, Photography, Traveling, Reading Tech Blogs, Volunteering at Coding Bootcamps, Playing the Piano
        """,
        "labels": [
            (1, 22, "NAME"),            # Maria Elena Gonzalez
            (62, 76, "PHONE"),          # (415) 555-1234
            (85, 115, "EMAIL"),         # maria.gonzalez@example.com
            (700, 710, "SKILL"),        # JavaScript
            (712, 718, "SKILL"),        # Python
            (720, 724, "SKILL"),        # Ruby
            (747, 751, "SKILL"),        # HTML
            (753, 756, "SKILL"),        # CSS
            (758, 763, "SKILL"),        # React
            (765, 772, "SKILL"),        # Angular
            (774, 781, "SKILL"),        # Node.js
            (783, 790, "SKILL"),        # Express
            (813, 818, "SKILL"),        # MySQL
            (820, 827, "SKILL"),        # MongoDB
            (829, 838, "SKILL"),        # PostgreSQL
            (860, 866, "SKILL"),        # Docker
            (868, 878, "SKILL"),        # Kubernetes
            (880, 883, "SKILL"),        # Git
            (885, 892, "SKILL"),        # Jenkins
            (894, 898, "SKILL"),        # Jira
            (920, 923, "SKILL"),        # AWS
            (925, 931, "SKILL"),        # Heroku
            (933, 938, "SKILL"),        # Azure
            (960, 978, "SKILL"),        # Agile methodologies
            (980, 1004, "SKILL"),       # Test-Driven Development (TDD)
            (1006, 1016, "SKILL"),      # RESTful APIs
        ]
    },
    {
        "text": """
        Lisa Marie Johnson
        Location: Chicago, IL | Phone: (312) 555-6789 | Email: lisa.johnson@example.com
        LinkedIn: https://www.linkedin.com/in/lisa-marie-johnson/

        Objective:
        Detail-oriented and highly organized Accountant with over 8 years of experience in financial reporting, budgeting, and auditing. Seeking a position at Financial Solutions Inc. to utilize my expertise in financial analysis, compliance, and team collaboration to support the company's financial health and growth.

        Highlights of Qualifications:
        - Extensive experience in preparing financial statements, balance sheets, and income statements.
        - Proficient in conducting internal and external audits to ensure compliance with financial regulations and standards.
        - Skilled in budgeting, forecasting, and financial planning.
        - Strong analytical skills and attention to detail.
        - Excellent communication skills and a proven track record of working effectively with cross-functional teams.

        Technical Skills:
        - Accounting Software: QuickBooks, SAP, Oracle Financials
        - Financial Analysis Tools: Microsoft Excel, Microsoft Access, Tableau
        - Regulatory Compliance: GAAP, IFRS, SOX
        - Other: Financial Reporting, Budgeting, Auditing, Tax Preparation, Payroll Processing

        Education:
        Bachelor of Science in Accounting
        University of Illinois at Chicago - Chicago, IL
        September 2008 - June 2012
        - GPA: 3.7/4.0
        - Relevant Coursework: Financial Accounting, Managerial Accounting, Corporate Finance, Business Law

        Professional Experience:
        Senior Accountant
        ABC Financial Services, Chicago, IL
        July 2016 - Present
        - Prepared and analyzed monthly, quarterly, and annual financial statements and reports.
        - Conducted internal audits to ensure accuracy and compliance with GAAP and company policies.
        - Managed budgeting and forecasting processes, collaborating with department heads to develop financial plans.
        - Provided financial analysis and insights to support strategic decision-making.
        - Supervised and mentored junior accounting staff.

        Accountant
        XYZ Corporation, Chicago, IL
        June 2012 - June 2016
        - Assisted in the preparation of financial statements and reports.
        - Conducted variance analysis and reconciled accounts to ensure accuracy.
        - Supported external audits by providing documentation and explanations of financial transactions.
        - Processed payroll and prepared tax returns in compliance with federal and state regulations.
        - Assisted in the implementation of a new accounting software system, improving efficiency and accuracy.

        Projects:
        Financial Reporting Automation
        - Led a project to automate financial reporting processes using Microsoft Excel and VBA, reducing report generation time by 50%.

        Budgeting and Forecasting Model
        - Developed a comprehensive budgeting and forecasting model using Microsoft Access, improving accuracy and efficiency in financial planning.

        Certifications:
        - Certified Public Accountant (CPA)
        - Certified Management Accountant (CMA)

        Interests:
        - Volunteering at Local Non-Profits, Reading Financial Journals, Traveling, Cooking, Yoga
        """,
        "labels": [
            (1, 21, "NAME"),            # Lisa Marie Johnson
            (51, 65, "PHONE"),          # (312) 555-6789
            (74, 104, "EMAIL"),         # lisa.johnson@example.com
            (665, 674, "SKILL"),        # QuickBooks
            (676, 679, "SKILL"),        # SAP
            (681, 699, "SKILL"),        # Oracle Financials
            (723, 738, "SKILL"),        # Microsoft Excel
            (740, 756, "SKILL"),        # Microsoft Access
            (758, 765, "SKILL"),        # Tableau
            (787, 791, "SKILL"),        # GAAP
            (793, 797, "SKILL"),        # IFRS
            (799, 802, "SKILL"),        # SOX
            (824, 842, "SKILL"),        # Financial Reporting
            (844, 853, "SKILL"),        # Budgeting
            (855, 863, "SKILL"),        # Auditing
            (865, 881, "SKILL"),        # Tax Preparation
            (883, 900, "SKILL"),        # Payroll Processing
        ]
    },
]

In [19]:
# Create a label mapping
label_list = ["O", "B-NAME", "I-NAME", "B-PHONE", "I-PHONE", "B-EMAIL", "I-EMAIL", "B-SKILL", "I-SKILL"]
label2id = {label: i for i, label in enumerate(label_list)}
id2label = {i: label for i, label in enumerate(label_list)}

# Use BertTokenizerFast
tokenizer = BertTokenizerFast.from_pretrained("bert-base-cased")

def tokenize_and_align_labels(examples):
    tokenized_inputs = tokenizer(examples["text"], truncation=True, padding=True, return_offsets_mapping=True)
    labels = []
    for i, label in enumerate(examples["labels"]):
        word_ids = tokenized_inputs.word_ids(batch_index=i)
        label_ids = [-100] * len(tokenized_inputs["input_ids"][i])
        for start, end, entity in label:
            start_idx = tokenized_inputs.char_to_token(i, start)
            end_idx = tokenized_inputs.char_to_token(i, end - 1)
            if start_idx is not None and end_idx is not None:
                label_ids[start_idx:end_idx + 1] = [label2id[f"B-{entity}"]] + [label2id[f"I-{entity}"]] * (end_idx - start_idx)
        labels.append(label_ids)
    tokenized_inputs["labels"] = labels
    return tokenized_inputs


In [20]:
# Convert the data to a Dataset
train_dataset = Dataset.from_dict({
    "text": [example["text"] for example in train_data],
    "labels": [example["labels"] for example in train_data],
})

train_dataset = train_dataset.map(tokenize_and_align_labels, batched=True)


ArrowInvalid: Could not convert 'NAME' with type str: tried to convert to int64

In [None]:

# Define the model
model = BertForTokenClassification.from_pretrained("bert-base-cased", num_labels=len(set(entity for example in train_data for _, _, entity in example["labels"])))

# Define training arguments
training_args = TrainingArguments(
    output_dir="./results",
    evaluation_strategy="epoch",
    learning_rate=2e-5,
    per_device_train_batch_size=8,
    per_device_eval_batch_size=8,
    num_train_epochs=3,
    weight_decay=0.01,
)

# Create Trainer instance
trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=train_dataset,
)

# Train the model
trainer.train()