## Import Libraries

In [1]:
import re
import json
import spacy
import random
import pandas as pd
from spacy import util
from spacy.tokens import Doc
from spacy.training import Example
from spacy.language import Language

## Read data
- We created this data specifically for the ``space model`` because it has a special form for training. You can view it from here [Data Format](https://stackoverflow.com/questions/47443976/formatting-training-dataset-for-spacy-ner)


In [2]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [3]:
# Data-Json file1
with open(r"/content/drive/MyDrive/Untitled folder/data_cloab/Data_Annotation_1.json") as src:
    ner_annotated_data0 = json.loads(src.read())
# Data-Json file2
with open(r"/content/drive/MyDrive/Untitled folder/data_cloab/Data_Annotation_2.json") as src:
    ner_annotated_data1 = json.loads(src.read())
# Data-Json file3
with open(r"/content/drive/MyDrive/Untitled folder/data_cloab/Data_Annotation_3.json") as src:
    ner_annotated_data2 = json.loads(src.read())

# aggregate all data into one list
ner_annotated_data = []
for i in ner_annotated_data0['annotations']:
    ner_annotated_data.append(i)
for i in ner_annotated_data1['annotations']:
    ner_annotated_data.append(i)
for i in ner_annotated_data2['annotations']:
    ner_annotated_data.append(i)

In [4]:
# Example
ner_annotated_data[2]

["small team experienced engineer determined change modern mobility including entrenched car rental industry emphasize team ownership iterative development moving fast without breaking thing looking someone drive development restful apis building rapidly scaling backend ideal candidate experience web programming comfortable working aws infrastructure know whatever framework library currently use open mastering new framework necessary currently hiring full-stack engineer experience level sde sde ii senior sdes applicant minimum 1+ year professional software development experience fluency least one dynamic language ruby preferred silvercar might building apis client facing apps ruby rail interacting data store like rds elasticache redis others creating maintaining scalable infrastructure leveraging modern aws product building amazing ux client side apps angular bootstrap j framework writing test documentation work feature development scrum team maintaining expertise current emerging tech

* This is the data format. If you notice, you will find the job description, followed by a beginning indicator and an ending indicator for each skill.

### Prepare Data
* to fit the model

In [5]:
"""
For Example:-
("Proficient in Python and Java programming languages.", {"entities": [(12, 18, "SKILL"), (23, 27, "SKILL")]}),
"""
train_data = []
for lis in ner_annotated_data:
  try:
    text = lis[0]
    label = []
    for i in lis[1]['entities']:
      label.append((i[0],i[1],i[2]))
    train_data.append((text,label))
  except:
    pass
train_data[2]

("small team experienced engineer determined change modern mobility including entrenched car rental industry emphasize team ownership iterative development moving fast without breaking thing looking someone drive development restful apis building rapidly scaling backend ideal candidate experience web programming comfortable working aws infrastructure know whatever framework library currently use open mastering new framework necessary currently hiring full-stack engineer experience level sde sde ii senior sdes applicant minimum 1+ year professional software development experience fluency least one dynamic language ruby preferred silvercar might building apis client facing apps ruby rail interacting data store like rds elasticache redis others creating maintaining scalable infrastructure leveraging modern aws product building amazing ux client side apps angular bootstrap j framework writing test documentation work feature development scrum team maintaining expertise current emerging tech

In [11]:
def print_doc_entities(_doc: Doc):
    if _doc.ents:
        for _ent in _doc.ents:
            print(f"     {_ent.text},'--->' {_ent.label_}")
    else:
        print("     NONE")

## Train Spacy Model

In [7]:
def customizing_pipeline_component(nlp: Language,epoch:int):
    #Disable all pipelines components except NER
    disabled_pipes = []
    for pipe_name in nlp.pipe_names:
        if pipe_name != 'ner':
            nlp.disable_pipes(pipe_name)
            disabled_pipes.append(pipe_name)
    # Train Model
    print("   Training ...")
    optimizer = nlp.create_optimizer()
    for _ in range(epoch):
        random.shuffle(train_data)
        for raw_text, entity_offsets in train_data:
            doc = nlp.make_doc(raw_text)
            example = Example.from_dict(doc, {"entities": entity_offsets})
            nlp.update([example], sgd=optimizer)

    # Enable all previously disabled pipe components
    for pipe_name in disabled_pipes:
        nlp.enable_pipe(pipe_name)
    return nlp
# Load the pre-trained model and fine-tune it for NER
epoch=10
model = spacy.load('en_core_web_sm')
model_spacy = customizing_pipeline_component(model,epoch)

   Training ...




In [12]:
x ="software developer integration immediate opening a dynamic akron or cleveland area company is looking for an experienced software developer. the integration developer designs, develops, tests, maintains and enhances multiple systems serving internal users as well as our suppliers and customers. the integration developer interacts with it and other internal business partners to deliver systems that advance company strategy and improve the enterprise capability of it. candidates need to demonstrate experience in the following skillsets proven technical expertise in the design, development, coding, testing, and debugging of enterprise software. strong knowledge of web services . experience working with relational databases. experience authoring and maintaining data access and or or database queries. familiar with standard software design patterns, methodologies, and the mvc architecture. proficient in the following technologies c , .net, .net core, html5, css3, mssql, mysql, reactjs. experience working with and or or administering microsoft iis experience with syspro s erp package a plus but not required candidates must be able to execute the following functions creates and deploys code and configuration associated with support and project work across our application portfolio. assists in creating, evolving and standardizing technical processes such as source code management, continuous integration, etc. submits code and configuration with high levels of quality, performance, security, scalability and extensibility. assists in the creation and evaluation of business requirements and project plans. other requirements include takes pride in delivering work on time and with quality good problem solving and decision making abilities ability to learn quickly in a fast paced environment ability to both work independently and with teams effectively juggles a continuous mix of support and project work able to work off hours or provide on call support when needed the ideal candidate must be a self starter, able to work independently, and have experience in the skillsets mentioned above. this position requires a highly responsible and organized individual with a good work ethic and a minimum of two years of experience using the tools and technologies noted above. compensation is commensurate with experience. industries manufacturing other education bachelor s degree in computer science, information systems, or related field, or combination of education and equivalent work experience. job type contract experience software development 2 years work location one location work remotely no"
print_doc_entities(model_spacy(x))

     web services .,'--->' SKILL
     relational databases.,'--->' SKILL
     mvc architecture.,'--->' SKILL
     c,'--->' SKILL
     .net,'--->' SKILL
     .net core,'--->' SKILL
     html5,'--->' SKILL
     css3,'--->' SKILL
     mssql,'--->' SKILL
     mysql,'--->' SKILL
     reactjs.,'--->' SKILL
     syspro s erp,'--->' SKILL
     decision making abilities ability,'--->' SKILL


In [14]:
# model_spacy.to_disk('/content/drive/MyDrive/Untitled folder/data_cloab/ner_model')

In [16]:
nlp = spacy.load('/content/drive/MyDrive/Untitled folder/data_cloab/ner_model')
x = "software developer integration immediate opening a dynamic akron or cleveland area company is looking for an experienced software developer. the integration developer designs, develops, tests, maintains and enhances multiple systems serving internal users as well as our suppliers and customers. the integration developer interacts with it and other internal business partners to deliver systems that advance company strategy and improve the enterprise capability of it. candidates need to demonstrate experience in the following skillsets proven technical expertise in the design, development, coding, testing, and debugging of enterprise software. strong knowledge of web services . experience working with relational databases. experience authoring and maintaining data access and or or database queries. familiar with standard software design patterns, methodologies, and the mvc architecture. proficient in the following technologies c , .net, .net core, html5, css3, mssql, mysql, reactjs. experience working with and or or administering microsoft iis experience with syspro s erp package a plus but not required candidates must be able to execute the following functions creates and deploys code and configuration associated with support and project work across our application portfolio. assists in creating, evolving and standardizing technical processes such as source code management, continuous integration, etc. submits code and configuration with high levels of quality, performance, security, scalability and extensibility. assists in the creation and evaluation of business requirements and project plans. other requirements include takes pride in delivering work on time and with quality good problem solving and decision making abilities ability to learn quickly in a fast paced environment ability to both work independently and with teams effectively juggles a continuous mix of support and project work able to work off hours or provide on call support when needed the ideal candidate must be a self starter, able to work independently, and have experience in the skillsets mentioned above. this position requires a highly responsible and organized individual with a good work ethic and a minimum of two years of experience using the tools and technologies noted above. compensation is commensurate with experience. industries manufacturing other education bachelor s degree in computer science, information systems, or related field, or combination of education and equivalent work experience. job type contract experience software development 2 years work location one location work remotely no"
print_doc_entities(model_spacy(x))

     web services .,'--->' SKILL
     relational databases.,'--->' SKILL
     mvc architecture.,'--->' SKILL
     c,'--->' SKILL
     .net,'--->' SKILL
     .net core,'--->' SKILL
     html5,'--->' SKILL
     css3,'--->' SKILL
     mssql,'--->' SKILL
     mysql,'--->' SKILL
     reactjs.,'--->' SKILL
     syspro s erp,'--->' SKILL
     decision making abilities ability,'--->' SKILL


In [17]:
# nlp = spacy.load('en_model_ner_skills')
# x = "software developer integration immediate opening a dynamic akron or cleveland area company is looking for an experienced software developer. the integration developer designs, develops, tests, maintains and enhances multiple systems serving internal users as well as our suppliers and customers. the integration developer interacts with it and other internal business partners to deliver systems that advance company strategy and improve the enterprise capability of it. candidates need to demonstrate experience in the following skillsets proven technical expertise in the design, development, coding, testing, and debugging of enterprise software. strong knowledge of web services . experience working with relational databases. experience authoring and maintaining data access and or or database queries. familiar with standard software design patterns, methodologies, and the mvc architecture. proficient in the following technologies c , .net, .net core, html5, css3, mssql, mysql, reactjs. experience working with and or or administering microsoft iis experience with syspro s erp package a plus but not required candidates must be able to execute the following functions creates and deploys code and configuration associated with support and project work across our application portfolio. assists in creating, evolving and standardizing technical processes such as source code management, continuous integration, etc. submits code and configuration with high levels of quality, performance, security, scalability and extensibility. assists in the creation and evaluation of business requirements and project plans. other requirements include takes pride in delivering work on time and with quality good problem solving and decision making abilities ability to learn quickly in a fast paced environment ability to both work independently and with teams effectively juggles a continuous mix of support and project work able to work off hours or provide on call support when needed the ideal candidate must be a self starter, able to work independently, and have experience in the skillsets mentioned above. this position requires a highly responsible and organized individual with a good work ethic and a minimum of two years of experience using the tools and technologies noted above. compensation is commensurate with experience. industries manufacturing other education bachelor s degree in computer science, information systems, or related field, or combination of education and equivalent work experience. job type contract experience software development 2 years work location one location work remotely no"
# nlp(x).ents