In [None]:
# Installing the libraries for project
!pip install -U pip setuptools wheel
!pip install -U spacy
!pip install spacy-transformers
!python -m spacy download en_core_web_trf

In [7]:
import os 
import json
import pandas as pd 
import numpy as np
import matplotlib.pyplot as plt
import spacy 
from spacy.tokens import DocBin
from tqdm import tqdm

In [8]:
tagged_data = 'tagged_data'
list_tagged_files = os.listdir(tagged_data)

In [9]:
def make_v3_dataset(data, db = []):
    nlp = spacy.blank('en')
    failed_record = []
    if not db:
        db = DocBin()
    for text, annot in tqdm(data):
        doc = nlp.make_doc(text)
        ents = []
        for start, end, label in annot['entities']:
            span = doc.char_span(start, end, label = label, alignment_mode = 'contract')
            if span is None:
                print(f'empty entity, {text}, {annot["entities"]}') #I expect this to never happen
            else:
                ents.append(span)
        try:
            doc.ents = ents
        except:
            failed_record.append((text, annot))
        db.add(doc)
    return db, failed_record

In [None]:
# Converting json annotated files to spacy format
for i in tqdm(range(len(list_tagged_files))):
  file_path = os.path.join(spider_tagged_data,list_tagged_files[i])
  # print(file_path)
  with open(file_path,'r') as f:
    data = json.load(f)
  saving_path = os.path.join('tagged_spacy_format',list_tagged_files[i][:-5]+'.spacy')
  a,b = make_v3_dataset(data['annotations'])
  a.to_disk(saving_path)

  0%|          | 0/51 [00:00<?, ?it/s]
100%|██████████| 8/8 [00:00<00:00, 351.18it/s]
  2%|▏         | 1/51 [00:02<02:26,  2.93s/it]
100%|██████████| 9/9 [00:00<00:00, 327.18it/s]
  4%|▍         | 2/51 [00:04<01:42,  2.09s/it]
100%|██████████| 8/8 [00:00<00:00, 299.43it/s]
  6%|▌         | 3/51 [00:05<01:28,  1.84s/it]
100%|██████████| 6/6 [00:00<00:00, 243.50it/s]
  8%|▊         | 4/51 [00:07<01:20,  1.71s/it]
100%|██████████| 7/7 [00:00<00:00, 173.09it/s]
 10%|▉         | 5/51 [00:11<01:55,  2.51s/it]
100%|██████████| 9/9 [00:00<00:00, 456.09it/s]
 12%|█▏        | 6/51 [00:13<01:53,  2.52s/it]
100%|██████████| 7/7 [00:00<00:00, 284.64it/s]
 14%|█▎        | 7/51 [00:16<01:50,  2.52s/it]
100%|██████████| 7/7 [00:00<00:00, 161.76it/s]
 16%|█▌        | 8/51 [00:19<01:56,  2.71s/it]
100%|██████████| 9/9 [00:00<00:00, 414.20it/s]
 18%|█▊        | 9/51 [00:22<01:50,  2.63s/it]
100%|██████████| 8/8 [00:00<00:00, 204.12it/s]
 20%|█▉        | 10/51 [00:25<01:52,  2.75s/it]
100%|██████████| 6/6

References from some bellow cell code blocks is taken from https://towardsdatascience.com/how-to-fine-tune-bert-transformer-with-spacy-3-6a90bfe57647

In [None]:
# Initialization setting
!python -m spacy init fill-config base_config.cfg config.cfg

[38;5;2m✔ Auto-filled config with all values[0m
[38;5;2m✔ Saved config[0m
config.cfg
You can now add your data and train your pipeline:
python -m spacy train config.cfg --paths.train ./train.spacy --paths.dev ./dev.spacy


In [10]:
# This took approximately 8 hour for training on google colab pro gpu mode 
!python -m spacy train -g 0 config.cfg  --output generated_model/ --paths.train tagged_spacy_format/ --paths.dev cross_val/


^C


[2023-02-21 21:41:20,315] [INFO] Set up nlp object from config
[2023-02-21 21:41:20,332] [INFO] Pipeline: ['transformer', 'ner']
[2023-02-21 21:41:20,339] [INFO] Created vocabulary
[2023-02-21 21:41:20,342] [INFO] Finished initializing nlp object

Downloading:   0%|          | 0.00/481 [00:00<?, ?B/s]
Downloading: 100%|██████████| 481/481 [00:00<00:00, 72.9kB/s]

Downloading:   0%|          | 0.00/878k [00:00<?, ?B/s]
Downloading:   1%|▏         | 12.0k/878k [00:00<00:16, 54.5kB/s]
Downloading:   4%|▍         | 36.0k/878k [00:00<00:09, 90.2kB/s]
Downloading:  10%|▉         | 84.0k/878k [00:00<00:05, 154kB/s] 
Downloading:  24%|██▎       | 208k/878k [00:00<00:02, 319kB/s] 
Downloading:  49%|████▉     | 432k/878k [00:01<00:00, 588kB/s]
Downloading: 100%|██████████| 878k/878k [00:01<00:00, 785kB/s]

Downloading:   0%|          | 0.00/446k [00:00<?, ?B/s]
Downloading:   1%|          | 4.00k/446k [00:00<00:29, 15.2kB/s]
Downloading:   9%|▉         | 40.0k/446k [00:00<00:04, 98.9kB/s]
Downlo

✔ Created output directory: generated_model
ℹ Saving to output directory: generated_model
ℹ Using GPU: 0
[1m


Downloading:  83%|████████▎ | 398M/478M [01:54<00:22, 3.66MB/s]
Downloading:  83%|████████▎ | 398M/478M [01:54<00:22, 3.71MB/s]
Downloading:  83%|████████▎ | 399M/478M [01:55<00:22, 3.71MB/s]
Downloading:  84%|████████▎ | 399M/478M [01:55<00:22, 3.71MB/s]
Downloading:  84%|████████▎ | 400M/478M [01:55<00:22, 3.74MB/s]
Downloading:  84%|████████▎ | 400M/478M [01:55<00:21, 3.73MB/s]
Downloading:  84%|████████▎ | 400M/478M [01:55<00:21, 3.72MB/s]
Downloading:  84%|████████▍ | 401M/478M [01:55<00:21, 3.71MB/s]
Downloading:  84%|████████▍ | 401M/478M [01:55<00:21, 3.69MB/s]
Downloading:  84%|████████▍ | 401M/478M [01:55<00:21, 3.68MB/s]
Downloading:  84%|████████▍ | 402M/478M [01:55<00:21, 3.68MB/s]
Downloading:  84%|████████▍ | 402M/478M [01:55<00:22, 3.58MB/s]
Downloading:  84%|████████▍ | 402M/478M [01:56<00:21, 3.63MB/s]
Downloading:  84%|████████▍ | 403M/478M [01:56<00:21, 3.66MB/s]
Downloading:  84%|████████▍ | 403M/478M [01:56<00:21, 3.66MB/s]
Downloading:  84%|████████▍ | 403M/478M 

In [16]:
nlp = spacy.load('generated_model/model-best')


In [18]:
# Test example
main_str = ['''Manufacturing Production Manager Resume
Desired Industry Manufacturing
SpiderID 78692
Desired Job Location Windsor Colorado
Date Posted 1 23 2017
Type of Position Full Time Permanent
Availability Date Immediately
Desired Wage 95000
U S Work Authorization Yes
Job Level Management Manager Director 
Willing to Travel 
Highest Degree Attained Other
Willing to Relocate Yes
Objective Dynamic and growth driven professional offering hands on management experience and comprehensive background in manufacturing and engineering operations within highly competitive setting Adept at reengineering unproductive work processes as well as in planning and implementing various sustainable and cost effective work programs to drive continuous improvement of operations Armed with exceptional organizational and critical problem solving aptitudes to formulate effective solutions on complex production and quality issues Equipped with tactical leadership capabilities in supervising and guiding teams toward the successful and timely completion of projects Proficient with Microsoft Office applications Project Management and Microsoft Visio 
Experience Relevant ExperienceVestas Blades America Windsor COProduction Engineer Jan 2014 PresentContribute efforts in achieving production plan budget tooling equipment and bill of material along with quality control and safety Conceptualized new methods to optimize production levels while maintaining production costs yields quality and safety Identified and resolved process problems with effective solutions which decreased downtime and minimized costs Initiated plant trials to measure performance capabilities while ensuring updated documentation of process procedures Leveraged industry expertise in streamlining the manufacturing of turbine blades Production Supervisor Mar 2009 Jan 2014Rendered oversight to more than 70 employees to guarantee accordance of operation with production plan and goals Observed strict compliance with safety and quality guidelines and handled inventory control created schedules delegated work and facilitated training of staff Generated production reports for production and operations managers regarding production areas performance Served as a Shells Lighthouse project member while drafting and modifying all standard operating procedures Functioned as department lead for production quality training and process improvements Closely monitored operations productivity to determine areas for improvement in overall production process Pioneered the development of all shells production process job cards that decreased process times and improved efficiency and quality through changed production flow layout Anheuser Busch Fort Collis COBrew House Control Panel Operator Mar 2007 Mar 2009Efficiently administered beer brewing process from raw material selection and recipe formulation through the use of Siemens software Strictly enforced standard operating procedures and safe working practices Determined and evaluated all critical control points to achieve consistent product quality within allotted schedules Other ExperienceUnited States Airways Denver CoCustomer Service Representative Kroger Co King Soopers Smiths Food Drug Fort Collins CoGrocery Manager Head Clerk Night Crew Manager Front End ManagerInventory Control Manager Warehouse Manager 
Education EducationAssociate of Science with emphasis in chemistry and biologyFront Range Community College Fort Collins COPharmacy Pre Requisites for Doctor of Pharmacy Pharm D University of Wyoming Laramie WYPharmacy Pre Requisites for Doctor of Pharmacy Pharm D Western Wyoming Community College Rock Springs WY Deans Honor RollProfessional DevelopmentCertificationsSupervisor Certification Six Sigma Yellow Belt Project ManagementQuality Management Crucial Conversations Fort Lift License Crane License
Affiliations 
Skills Manufacturing Production Manager Project Planning Cost Reduction and Budget Optimization Resource Allocation Six Sigma Quality ControlLean Manufacturing Plant Management Manufacturing Inspection Administration Cross functional Team Building
Additional Information TrainingProduction Instructor Coordinator Planner Coordinator Wrote Training Document
Reference Available upon request 
Candidate Contact Information 
JobSpider com has chosen not to make contact information available on this page Click Contact Candidate to send this candidate a response 
Manufacturing Production Manager Resume
Desired Industry Manufacturing
SpiderID 78692
Desired Job Location Windsor Colorado
Date Posted 1 23 2017
Type of Position Full Time Permanent
Availability Date Immediately
Desired Wage 95000
U S Work Authorization Yes
Job Level Management Manager Director 
Willing to Travel 
Highest Degree Attained Other
Willing to Relocate Yes
Objective Dynamic and growth driven professional offering hands on management experience and comprehensive background in manufacturing and engineering operations within highly competitive setting Adept at reengineering unproductive work processes as well as in planning and implementing various sustainable and cost effective work programs to drive continuous improvement of operations Armed with exceptional organizational and critical problem solving aptitudes to formulate effective solutions on complex production and quality issues Equipped with tactical leadership capabilities in supervising and guiding teams toward the successful and timely completion of projects Proficient with Microsoft Office applications Project Management and Microsoft Visio 
Experience Relevant ExperienceVestas Blades America Windsor COProduction Engineer Jan 2014 PresentContribute efforts in achieving production plan budget tooling equipment and bill of material along with quality control and safety Conceptualized new methods to optimize production levels while maintaining production costs yields quality and safety Identified and resolved process problems with effective solutions which decreased downtime and minimized costs Initiated plant trials to measure performance capabilities while ensuring updated documentation of process procedures Leveraged industry expertise in streamlining the manufacturing of turbine blades Production Supervisor Mar 2009 Jan 2014Rendered oversight to more than 70 employees to guarantee accordance of operation with production plan and goals Observed strict compliance with safety and quality guidelines and handled inventory control created schedules delegated work and facilitated training of staff Generated production reports for production and operations managers regarding production areas performance Served as a Shells Lighthouse project member while drafting and modifying all standard operating procedures Functioned as department lead for production quality training and process improvements Closely monitored operations productivity to determine areas for improvement in overall production process Pioneered the development of all shells production process job cards that decreased process times and improved efficiency and quality through changed production flow layout Anheuser Busch Fort Collis COBrew House Control Panel Operator Mar 2007 Mar 2009Efficiently administered beer brewing process from raw material selection and recipe formulation through the use of Siemens software Strictly enforced standard operating procedures and safe working practices Determined and evaluated all critical control points to achieve consistent product quality within allotted schedules Other ExperienceUnited States Airways Denver CoCustomer Service Representative Kroger Co King Soopers Smiths Food Drug Fort Collins CoGrocery Manager Head Clerk Night Crew Manager Front End ManagerInventory Control Manager Warehouse Manager 
Education EducationAssociate of Science with emphasis in chemistry and biologyFront Range Community College Fort Collins COPharmacy Pre Requisites for Doctor of Pharmacy Pharm D University of Wyoming Laramie WYPharmacy Pre Requisites for Doctor of Pharmacy Pharm D Western Wyoming Community College Rock Springs WY Deans Honor RollProfessional DevelopmentCertificationsSupervisor Certification Six Sigma Yellow Belt Project ManagementQuality Management Crucial Conversations Fort Lift License Crane License
Affiliations 
Skills Manufacturing Production Manager Project Planning Cost Reduction and Budget Optimization Resource Allocation Six Sigma Quality ControlLean Manufacturing Plant Management Manufacturing Inspection Administration Cross functional Team Building
Additional Information TrainingProduction Instructor Coordinator Planner Coordinator Wrote Training Document
Reference Available upon request 
Candidate Contact Information 
JobSpider com has chosen not to make contact information available on this page Click Contact Candidate to send this candidate a response''' ] 


In [21]:
for doc in nlp.pipe(main_str, disable=["tagger", "parser"]):
  for ent in doc.ents:
    print((ent.text,ent.label_))
    # print([(ent.text, ent.label_) for ent in doc.ents])


('Manufacturing Production Manager', 'JOB_TITLE')
('Manager Director', 'JOB_TITLE')
('manufacturing and engineering operations', 'SKILL')
('reengineering unproductive work processes as well as in planning and implementing various sustainable and cost effective work programs to drive continuous improvement of operations', 'SKILL')
('Microsoft Office applications Project Management', 'TOOL')
('Microsoft Visio', 'TOOL')
('ExperienceVestas Blades America', 'ORG')
('COProduction Engineer Jan 2014 PresentContribute efforts in achieving production plan budget tooling equipment and bill of material along with quality control and safety', 'EXPERIENCE')
('EducationAssociate of Science with emphasis in chemistry', 'DEGREE')
('biologyFront Range Community College Fort Collins', 'ORG')
('Doctor of Pharmacy Pharm D', 'DEGREE')
('Doctor of Pharmacy Pharm D', 'DEGREE')
('Manufacturing Production ManagerProject', 'SKILL')
('Manufacturing Production Manager', 'JOB_TITLE')
('Manager Director', 'JOB_TITLE