Extracted realistic skilss, education, countries to belong and experience from real data of Surgeon.

In [1]:
import pandas as pd
import re
import spacy
from spacy.matcher import PhraseMatcher


In [2]:
nlp = spacy.load("en_core_web_sm")

In [8]:
df_surgeon = pd.read_csv("Surgeon_balanced_200.csv")
df_surgeon

Unnamed: 0,hard_text,profession,gender
0,Dr. Chung's patients gave him an average ratin...,surgeon,Male
1,He specializes in the use of arthroscopy in th...,surgeon,Male
2,Dr. Sherwood graduated from New York Medical C...,surgeon,Male
3,Dr. Brown graduated from New York Medical Coll...,surgeon,Male
4,He is a Diplomat of the American Board of Surg...,surgeon,Male
...,...,...,...
195,"She specializes in Brachial Plexus, Complex De...",surgeon,Female
196,Ayesha is interested in the re-design of the h...,surgeon,Female
197,Dr. Munoz graduated from University of Massach...,surgeon,Female
198,Dr McKertich graduated top of her year from Th...,surgeon,Female


In [5]:
# Define keywords
skills = ['General Surgery'
'Orthopedic Surgery',
'Cardiothoracic Surgery',
'Neurosurgery',
'Minimally Invasive Surgery',
'Laparoscopy',
'Surgical Planning',
'Patient Diagnosis',
'Post-Operative Care',
'Operating Room Management',
'Sterile Technique',
'Suturing',
'Anesthesia Collaboration'
]
education = ["MD", "Doctor of Medicine",
    "DO", "Doctor of Osteopathic Medicine",
    "MBBS", "Bachelor of Medicine and Bachelor of Surgery",
    "MBChB", "Bachelor of Medicine and Bachelor of Surgery",
    "FRCS", "Fellowship of the Royal College of Surgeons",
    "FACS", "Fellow of the American College of Surgeons",
    "Residency in General Surgery",
    "Fellowship in Surgery",
    "Board Certified Surgeon",
    "General Surgery Residency"
    ]

In [6]:
def extract_experience(text):
    """
    Extracts years of experience from a text string
    """
    match = re.search(r'(\d+)\s+years?', text.lower())
    return int(match.group(1)) if match else None

In [7]:

# Create phrase matchers
skill_matcher = PhraseMatcher(nlp.vocab)
edu_matcher = PhraseMatcher(nlp.vocab)

skill_patterns = [nlp.make_doc(skill) for skill in skills]
edu_patterns = [nlp.make_doc(edu) for edu in education]

skill_matcher.add("SKILL", skill_patterns)
edu_matcher.add("EDU", edu_patterns)


In [13]:
# ---------- Extraction loop ---------
all_skills = []
all_countries = []
all_experience = []
all_education = []

for bio in df_surgeon['hard_text']:
    doc = nlp(bio)

    # countries
    countries = [ent.text for ent in doc.ents if ent.label_ == "GPE"]

    # skills
    skills_found = [doc[start:end].text for match_id, start, end in skill_matcher(doc)]
    edu_found = [doc[start:end].text for match_id, start, end in edu_matcher(doc)]


    # experience
    experience = extract_experience(bio)

    all_skills.append(list(set(skills_found)))
    all_education.append(list(set(edu_found)))
    all_countries.append(list(set(countries)))
    all_experience.append(experience)

# ---------- Add to dataframe ----------
df_surgeon['skills_extracted'] = all_skills
df_surgeon['education_extracted'] = all_education
df_surgeon['countries_extracted'] = all_countries
df_surgeon['experience_years'] = all_experience

# ---------- Save enhanced dataset ----------
df_surgeon.to_csv("surgeon_extracted.csv", index=False)
print("‚úÖ Extraction complete. Saved as surgeon_extracted.csv")

‚úÖ Extraction complete. Saved as surgeon_extracted.csv


In [14]:
df = pd.read_csv("surgeon_extracted.csv")
df

Unnamed: 0,hard_text,profession,gender,skills_extracted,education_extracted,countries_extracted,experience_years
0,Dr. Chung's patients gave him an average ratin...,surgeon,Male,[],[],[],
1,He specializes in the use of arthroscopy in th...,surgeon,Male,[],[],['Toronto'],
2,Dr. Sherwood graduated from New York Medical C...,surgeon,Male,[],[],['Sidney'],47.0
3,Dr. Brown graduated from New York Medical Coll...,surgeon,Male,[],[],[],13.0
4,He is a Diplomat of the American Board of Surg...,surgeon,Male,[],[],['Dallas'],
...,...,...,...,...,...,...,...
195,"She specializes in Brachial Plexus, Complex De...",surgeon,Female,[],[],[],
196,Ayesha is interested in the re-design of the h...,surgeon,Female,[],[],['the United States'],
197,Dr. Munoz graduated from University of Massach...,surgeon,Female,[],[],"['New York', 'Cigna']",8.0
198,Dr McKertich graduated top of her year from Th...,surgeon,Female,[],[],"['Sydney', 'Melbourne', 'Perth']",


Generation of Job Ad for Surgeon

In [15]:

from collections import Counter         # a special Python class from the collections module that makes it extremely easy to count the frequency of items in a list.
import random


def aggregate_keywords(df):
    all_skills = Counter()
    all_education = Counter()
    all_countries = Counter()
    experience_list = []

    # List of words to ignore when counting countries
    ignore_countries = ["Cigna"]

    for _, row in df.iterrows():
        # Skills
        for s in eval(row['skills_extracted']):
            all_skills[s] += 1
        
        # Education
        for e in eval(row['education_extracted']):
            all_education[e] += 1
        
        # Countries
        for c in eval(row['countries_extracted']):
            if c not in ignore_countries:
                all_countries[c] += 1

        # Experience
        if not pd.isna(row['experience_years']):
            experience_list.append(int(row['experience_years']))

    top_skills = [s for s, _ in all_skills.most_common(5)]
    top_edu = [e for e, _ in all_education.most_common(2)]
    top_countries = [c for c, _ in all_countries.most_common(1)]
    avg_exp = int(sum(experience_list)/len(experience_list)) if experience_list else 5

    return top_skills, top_edu, top_countries, avg_exp

skills, edu, countries, exp = aggregate_keywords(df)
print("Skills:", skills)
print("Education:", edu)
print("Country:", countries)
print("Experience:", exp)


Skills: ['Neurosurgery', 'Minimally Invasive Surgery']
Education: ['MBBS', 'MD']
Country: ['Dallas']
Experience: 25


In [18]:
def generate_surgeon_job_ad(skills, education, country, experience):
    job_ad = f"""
üè• Job Title: Surgeon
üìç Location: {country}
üïí Employment Type: Full-time

About Us
Our hospital provides advanced surgical care in a supportive and collaborative environment. We are committed to delivering high-quality patient outcomes while fostering professional growth for our surgical team.

Position Overview
We are seeking a skilled Surgeon with approximately {experience}+ years of experience. The ideal candidate will have strong surgical expertise, excellent decision-making skills, and a commitment to patient safety and care.

Key Responsibilities
- Perform surgical procedures according to established protocols
- Assess patients pre- and post-operatively
- Collaborate with nurses, anesthesiologists, and other specialists
- Maintain accurate surgical and patient records
- Participate in multidisciplinary care meetings and continuous improvement initiatives

Required Qualifications
- Medical degree ({', '.join(education)})
- Completed surgical residency and board certification
- Active state medical license (or eligibility to obtain)
- Strong clinical, technical, and interpersonal skills

Preferred Skills
- {', '.join(skills)}

What We Offer
- Advanced surgical facilities and supportive team environment
- Competitive salary and benefits
- Opportunities for professional development and fellowship training
- A hospital culture that values teamwork, excellence, and patient-centered care

How to Apply
Interested applicants may submit their CV and a brief cover letter highlighting surgical experience. We look forward to welcoming a new member to our surgical team.
"""
    return job_ad


In [19]:
final_job_ad = generate_surgeon_job_ad(skills, edu, countries[0], exp)
print(final_job_ad)

output_filename = "surgeon_job_ad.txt"

with open(output_filename, "w", encoding="utf-8") as f:
    f.write(final_job_ad)


üè• Job Title: Surgeon
üìç Location: Dallas
üïí Employment Type: Full-time

About Us
Our hospital provides advanced surgical care in a supportive and collaborative environment. We are committed to delivering high-quality patient outcomes while fostering professional growth for our surgical team.

Position Overview
We are seeking a skilled Surgeon with approximately 25+ years of experience. The ideal candidate will have strong surgical expertise, excellent decision-making skills, and a commitment to patient safety and care.

Key Responsibilities
- Perform surgical procedures according to established protocols
- Assess patients pre- and post-operatively
- Collaborate with nurses, anesthesiologists, and other specialists
- Maintain accurate surgical and patient records
- Participate in multidisciplinary care meetings and continuous improvement initiatives

Required Qualifications
- Medical degree (MBBS, MD)
- Completed surgical residency and board certification
- Active state medical 