In [1]:
import spacy
import json
from spacy.tokens import DocBin
from sklearn.model_selection import train_test_split
with open('/content/resumeparser.tds.json', 'r', encoding='utf-8') as json_file:
    data = json.load(json_file)

# Split the data into training and testing sets
training_data, testing_data = train_test_split(data, test_size=0.2, random_state=42)

#to create training spacy file
nlp = spacy.blank("en")
db = DocBin()
skipped = 0
total = 0

for item in training_data:
    if isinstance(item, dict):
        text = item.get('text', '')
        entities = item.get('entities', [])
    elif isinstance(item, tuple):
        text, entities = item
    else:
        continue

    doc = nlp.make_doc(text)
    valid_ents = []

    try:
        for start, end, label in entities:
            span = doc.char_span(start, end, label=label, alignment_mode="contract")
            total += 1
            if (
                span is None
                or span.text.startswith(" ")
                or span.text.endswith(" ")
                or span.text != span.text.strip()
            ):
                ent_text = text[start:end]
                print(f"⚠  Skipping Entity : {text[0:30]}... {ent_text}")
                skipped += 1
            else:
                valid_ents.append(span)

        doc.ents = valid_ents
        db.add(doc)
    except Exception as ex:
        print("⚠ ", ex)
        skipped += 1

db.to_disk("training_data.spacy")


#to create testing spacy file
nlp = spacy.blank("en")
db = DocBin()
skipped = 0
total = 0

for item in testing_data:
    if isinstance(item, dict):
        text = item.get('text', '')
        entities = item.get('entities', [])
    elif isinstance(item, tuple):
        text, entities = item
    else:
        continue

    doc = nlp.make_doc(text)
    valid_ents = []

    try:
        for start, end, label in entities:
            span = doc.char_span(start, end, label=label, alignment_mode="contract")
            total += 1
            if (
                span is None
                or span.text.startswith(" ")
                or span.text.endswith(" ")
                or span.text != span.text.strip()
            ):
                ent_text = text[start:end]
                print(f"⚠  Skipping Entity : {text[0:30]}... {ent_text}")
                skipped += 1
            else:
                valid_ents.append(span)

        doc.ents = valid_ents
        db.add(doc)
    except Exception as ex:
        print("⚠ ", ex)
        skipped += 1

db.to_disk("testing_data.spacy")

training_data_len=len(training_data)
print("✨ Results:")
print(f"Total Training Data : {training_data_len}")

print(f"Failed to Convert : {skipped} / {total} = {skipped/total*100} %")


⚠  Skipping Entity : 		Present : Creative Logic Net... musthafampv786@gmail.com
⚠  Skipping Entity : Vaishali Kale
Pune, Maharasht... C#
⚠  Skipping Entity : Vaishali Kale
Pune, Maharasht... .net
⚠  Skipping Entity : Ravi Reddy
SR. JAVA/J2EE DEVE... Servlets
⚠  Skipping Entity : Ravi Reddy
SR. JAVA/J2EE DEVE... AWS
⚠  Skipping Entity : Ravi Reddy
SR. JAVA/J2EE DEVE... Servlets
⚠  Skipping Entity : Ravi Reddy
SR. JAVA/J2EE DEVE... Servlets
⚠  Skipping Entity : Ravi Reddy
SR. JAVA/J2EE DEVE... SPRING3.0
⚠  Skipping Entity : Ravi Reddy
SR. JAVA/J2EE DEVE... STRUTS2.0
⚠  Skipping Entity : Ravi Reddy
SR. JAVA/J2EE DEVE... SpringSecurities
⚠  Skipping Entity : Rao

201-701-3757

OBJECTI... Agile
⚠  Skipping Entity : RANDY ADAMS
Sr. Java Develope... SERVELT
⚠  Skipping Entity : RANDY ADAMS
Sr. Java Develope... JIRA
⚠  Skipping Entity : RANDY ADAMS
Sr. Java Develope... J2EE
⚠  Skipping Entity : Surinder Kumar
Ferozepur City... C++
⚠  Skipping Entity : 		Nitika Sharma
		Email:nitik... nitika9

In [7]:
!python -m spacy init fill-config base_config.cfg config.cfg


[38;5;2m✔ Auto-filled config with all values[0m
[38;5;2m✔ Saved config[0m
config.cfg
You can now add your data and train your pipeline:
python -m spacy train config.cfg --paths.train ./train.spacy --paths.dev ./dev.spacy


In [8]:
!python -m spacy debug data config.cfg  --paths.train ./training_data.spacy --paths.dev ./testing_data.spacy

[1m
[38;5;2m✔ Pipeline can be initialized with data[0m
[38;5;2m✔ Corpus is loadable[0m
[1m
Language: en
Training pipeline: tok2vec, ner
539 training docs
135 evaluation docs
[38;5;3m⚠ 10 training examples also in evaluation data[0m
[38;5;3m⚠ Low number of examples to train a new pipeline (539)[0m
[1m
[38;5;4mℹ 600787 total word(s) in the data (31106 unique)[0m
[38;5;4mℹ No word vectors present in the package[0m
[1m
[38;5;4mℹ 26 label(s)[0m
0 missing value(s) (tokens with '-' label)
[38;5;3m⚠ Low number of examples for label 'GPA/PERCENTAGE' (4)[0m
[2K[38;5;3m⚠ Low number of examples for label 'TECHNOLOGICS' (22)[0m
[2K[38;5;3m⚠ Low number of examples for label 'WORK EXPERIANCE' (5)[0m
[2K[38;5;3m⚠ Low number of examples for label 'LINKEDIN/GITHUB' (3)[0m
[2K[38;5;3m⚠ Low number of examples for label 'INTERNSHIP' (10)[0m
[2K[38;5;3m⚠ Low number of examples for label 'UNIVERSITY/COLLEGE' (47)[0m
[2K[38;5;3m⚠ Low number of examples for label 'TECHNOLOG

In [9]:
!python -m spacy train config.cfg  --output ./output_model --paths.train ./training_data.spacy --paths.dev ./testing_data.spacy

[38;5;2m✔ Created output directory: output_model[0m
[38;5;4mℹ Saving to output directory: output_model[0m
[38;5;4mℹ Using CPU[0m
[1m
[38;5;2m✔ Initialized pipeline[0m
[1m
[38;5;4mℹ Pipeline: ['tok2vec', 'ner'][0m
[38;5;4mℹ Initial learn rate: 0.001[0m
E    #       LOSS TOK2VEC  LOSS NER  ENTS_F  ENTS_P  ENTS_R  SCORE 
---  ------  ------------  --------  ------  ------  ------  ------
  0       0          0.00    127.23    0.00    0.00    0.00    0.00
  0     200       3288.21  32123.79   22.34   34.19   16.60    0.22
  0     400       7388.94  17286.36    9.00   45.04    5.00    0.09
  1     600      12158.21  19072.15   19.80   36.70   13.56    0.20
  1     800      17485.43  15317.48   16.98   46.46   10.39    0.17
  1    1000      48324.28  14442.64   32.18   36.92   28.51    0.32
  2    1200      21694.33  14982.02   17.92   44.98   11.19    0.18
  2    1400      13669.83  13164.21   22.64   26.14   19.97    0.23
  2    1600       9884.78  12496.72   30.47   41.78   

In [10]:
import spacy

nlp = spacy.load("./output_model/model-best")

text = """
 SASIDHAR THOTA
22 Giralda Close, E16 3SZ
+44 7471004624 sasi.thota7777@gmail.com LinkedIn
PERSONAL PROFILE
An enthusiastic and confident Software Engineer with a demonstrated history of working and experience in IT for
around 2 years. Skilled in Python, TypeScript, HTML, and CSS languages and Django, Angular and .NET frameworks
with experience in web development. Currently looking for an opportunity where I can grow professionally and aid in
the growth of the organization.
EDUCATION
2021 – 2022 University of Leicester School of Business, Leicester, United Kingdom.
International Management MSc - Merit
2014 – 2018 Vignan's Lara Institute of Technology and Science, Guntur, Andhra Pradesh, India.
Department of Electronics & Communication Engineering - First Class
TECHNICAL SKILLS
• Languages: Python, TypeScript, C#
• Web Technologies: HTML5, CSS3, Material UI, Kendo UI, Bootstrap
• Frameworks and Library: Django, Angular, .NET
• Webservices: Postman
• Version Control: GIT, Team Foundation Server (TFS)
• Database: MSSQL, Oracle
• Methodologies: Agile, Waterfall
ACADEMIC PROJECTS
• Industry 4.0: The Challenges and Benefits of Smart and Lean Manufacturing Adaptation for Manufacturing
SME’s.
RELEVANT WORK EXPERIENCE
September 2018 – October 2020 Tata Consultancy Services, Mumbai, Maharashtra, India.
Role: Full Stack Developer
Responsibilities:
• Analysing stories, writing code, implementing functional testing, contributing to release and iteration
planning.
• Expertise in building web applications ideally using Python in Django MVT Framework, Type Script in Angular
Framework, HTML, CSS, and Bootstrap.
• Experience in building front end UI using material UI and kendo UI frameworks.
• Implemented backend scripts through .NET and Python.
• Proficiency in creating SQL procedures in MsSQL and Oracle.
• Extensive experience in both mobile and desktop web application.
• Version Management using Team Foundation Server (TFS) and GIT.
• Performed system and integration testing with sample and live data.
• Maintain quality and ensure responsiveness of applications.
• Strong troubleshooting skills.
• Participated in project planning and/or provide accurate estimates on projects/tasks and supported during
production deployments.
• A profound experience to work in large, collaborative teams to achieve organizational goals.
• Learning new frameworks and providing knowledge transfer sessions to guide fellow teammates.
CERTIFICATIONS
• Python (Basic) Certified from HackerRank in May 2022.
• Angular (Basic) Certified from HackerRank in May 2022.
• SQL (Basic) Certified from HackerRank in May 2022.
• Problem Solving (Basic) Certified from HackerRank in Sep 2020.
ACHEIVEMENTS & AWARDS
• Tata Consultancy Services, Mumbai Maharashtra, India.
1. September 2020 Star Team Award: Excellence in Customer Delivery for outstanding performance.
2. Aug 2020 Star of The Month Award: Awarded for coherent deliverables in the developing application
with no defects and before time. Additionally creating concept application based on new technology and
integrating the same with application for enhanced features and performance.
3. May 2020 Ally of the Business Award: Awarded for untiring support and unwavering dedication, in
making virtual infrastructure functional as it enabled the smooth transition of systems and processes in a
time of crisis. The achievement is noted one of the highest in corporate ranks as it enabled business
continuity in a critical phase.
4. Dec 2019 Best Team Award: Exhibit Teamwork to achieve a major team goal and provide consistent
quality of services.
5. Jun 2019 Star of The Month Award: Awarded for coherent deliverables in the developing application
with no defects and before time. Accord achievement for a fresh graduate in the shortest possible time.
6. May 2019 On the Spot Award: Team-building activity conducted for own/other teams, displayed
creativity at work, and Active role in supporting organizational initiative
DECLARATION
I hereby declare that the above information provided is true to the best of my knowledge.
Sasidhar Thota
"""

doc = nlp(text)

for ent in doc.ents:
    print(ent.text, ent.label_)


sasi.thota7777@gmail.com EMAIL
TypeScript SKILLS
HTML SKILLS
Django SKILLS
Angular SKILLS
Python SKILLS
TypeScript SKILLS
C# SKILLS
HTML5 TECHNOLOGIES
Bootstrap SKILLS
Django TECHNOLOGIES
Angular SKILLS
.NET SKILLS
MSSQL SKILLS
Oracle SKILLS
Agile TECHNOLOGIES
Full Stack Developer
Responsibilities: 
• Analysing stories, writing code JOB ROLE
Type Script SKILLS
HTML SKILLS
CSS SKILLS
Strong troubleshooting skills. SKILLS
Python SKILLS
Angular SKILLS
SQL SKILLS
Problem Solving SKILLS
September 2020 Star Team Award: Excellence in Customer Delivery for outstanding performance. WORK EXPERIENCE
