# Tests
## Import libraries

In [13]:
import os
import sys
import openai
import pandas as pd
from dotenv import load_dotenv


src_path = os.path.abspath(os.path.join(os.getcwd(), '..', 'src'))
if src_path not in sys.path:
    sys.path.insert(0, src_path)

%load_ext autoreload
%autoreload 2

from jobs_tools import tests_helpers, chat_gpt


load_dotenv()
client_async = openai.AsyncOpenAI()

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [14]:
df = pd.read_csv("../data/csv/df_translated.csv")
df.head()

Unnamed: 0,Location,Region,EU Member,Schengen Agreement,Google Domain Type,Google Domain Used,Job Title,Company Name,Job Location,Apply Options,Job Description,Work from home,Salary,Schedule type,Qualifications,Job ID,Search Date,Search Query,Language,Job Description English
0,Austria,Europe,True,True,local,google.at,Sandbox Innovation Sdn. Bhd. | Senior Mobile A...,Tideri Jobbörse,Austria,"Trabajo.org - Stellenangebote, Arbeit",We are a dynamic FinTech company headquartered...,,,Full-time,,eyJqb2JfdGl0bGUiOiJTYW5kYm94IElubm92YXRpb24gU2...,2025-01-13 12:33:21 UTC,iOS developer,en,We are a dynamic FinTech company headquartered...
1,Austria,Europe,True,True,default,google.com,Mobile Application Developer,Pearson Carter,Austria,"Trabajo.org - Stellenangebote, Arbeit",Lead Mobile Developer | Hyper Growth Startup |...,,,Full-time,No degree mentioned,eyJqb2JfdGl0bGUiOiJNb2JpbGUgQXBwbGljYXRpb24gRG...,2025-01-13 12:08:51 UTC,Android developer,en,Lead Mobile Developer | Hyper Growth Startup |...
2,Austria,Europe,True,True,default,google.com,"Android Developer – Kotlin (Austria based, Hyb...",Bitcoin Devs Company,"Vienna, Austria",Jobs3,Overview:\r\nThe Android Developer – Kotlin po...,,,Contractor,No degree mentioned,eyJqb2JfdGl0bGUiOiJBbmRyb2lkIERldmVsb3BlciDigJ...,2025-01-13 12:08:48 UTC,Android developer,en,Overview:\r\nThe Android Developer – Kotlin po...
3,Austria,Europe,True,True,default,google.com,Android & iOS Developer,ventopay gmbh,Austria,StudySmarter - Talents,Was sind deine Aufgaben?\r\n• Du gestaltest at...,,,Full-time,No degree mentioned,eyJqb2JfdGl0bGUiOiJBbmRyb2lkIFx1MDAyNiBpT1MgRG...,2025-01-13 12:20:43 UTC,iOS developer,de,**What are your tasks?** \r\n- You design att...
4,Austria,Europe,True,True,default,google.com,iOS Developer Up3 (f/m/d),Drei Österreich,"Vienna, Austria","MyAbility.jobs, Drei., Jobted.at",Do you want to push the frontier of digital se...,,,Full-time,No degree mentioned,eyJqb2JfdGl0bGUiOiJpT1MgRGV2ZWxvcGVyIFVwMyAoZi...,2025-01-13 12:20:39 UTC,iOS developer,en,Do you want to push the frontier of digital se...


## Split job descriptions into structured sections
### Columns into text

In [15]:
sample = df.sample(n=30, random_state=42)
tests_helpers.column_to_txt(column='Job Description English', output="../data/ground_truth/job_descriptions_sample.txt", df=sample)

### Load ground truth into DataFrame

In [17]:
test = tests_helpers.txt_to_column(txt_path="../data/ground_truth/job_descriptions_ground_truth.txt", df=sample)
test.head()

Unnamed: 0,Location,Region,EU Member,Schengen Agreement,Google Domain Type,Google Domain Used,Job Title,Company Name,Job Location,Apply Options,...,Work from home,Salary,Schedule type,Qualifications,Job ID,Search Date,Search Query,Language,Job Description English,Ground Truth
1575,Poland,Europe,True,True,default,google.com,Android Developer,Connectis_,"Kórnik, Poland",HitPraca.pl,...,,,Full-time,No degree mentioned,eyJqb2JfdGl0bGUiOiJBbmRyb2lkIERldmVsb3BlciIsIm...,2025-01-13 12:11:12 UTC,Android developer,pl,**Android Developer** \r\n**Location:** Pozna...,1. Platform: Android\n2. Salary: Not mentioned...
324,Canada,Northern America,False,False,default,google.com,iOS Developer (Remote),McAfee,"Waterloo, ON, Canada",Blind,...,,,Full-time,No degree mentioned,eyJqb2JfdGl0bGUiOiJpT1MgRGV2ZWxvcGVyIChSZW1vdG...,2025-01-13 12:21:02 UTC,iOS developer,en,Role OverviewMcAfee is searching for an interm...,1. Platform: iOS\n2. Salary: Not mentioned\n3....
1491,Netherlands,Europe,True,True,default,google.com,IOS developer,StarApple,"Eindhoven, Netherlands",Werkzoeken.nl,...,,,Full-time,No degree mentioned,eyJqb2JfdGl0bGUiOiJJT1MgZGV2ZWxvcGVyIiwiY29tcG...,2025-01-13 12:22:59 UTC,iOS developer,nl,**Organization & Department**\r\n\r\nAre you i...,"1. Platform: iOS\n2. Salary: €3,200 to €5,000\..."
1815,Romania,Europe,True,True,default,google.com,Android Developer / CTS Expert,Luxoft,Romania,"Indeed, Jooble, LinkedIn",...,,,Full-time,No degree mentioned,eyJqb2JfdGl0bGUiOiJBbmRyb2lkIERldmVsb3BlciAvIE...,2025-01-13 12:11:28 UTC,Android developer,en,"Remote Romania, Romania\r\n\r\nAndroid\r\n\r\n...",1. Platform: Android\n2. Salary: Not mentioned...
2174,United Kingdom,Europe,False,False,default,google.com,Embedded Software Developer (Android),Integrity,"Chesterton, United Kingdom",SitePoint,...,,,Full-time,,eyJqb2JfdGl0bGUiOiJFbWJlZGRlZCBTb2Z0d2FyZSBEZX...,2025-01-13 12:12:03 UTC,Android developer,en,If you have experience developing embedded rea...,1. Platform: Android\n2. Salary: £40-50K plus ...


### Split ground truth into sections

In [18]:
test["Platform_GT"] = test["Ground Truth"].apply(lambda x: tests_helpers.extract_section(x, r"1\. Platform"))
test["Salary_GT"] = test["Ground Truth"].apply(lambda x: tests_helpers.extract_section(x, r"2\. Salary"))
test["Requirements_GT"] = test["Ground Truth"].apply(lambda x: tests_helpers.extract_section(x, r"3\. Requirements"))
test["Nice to have_GT"] = test["Ground Truth"].apply(lambda x: tests_helpers.extract_section(x, r"4\. Nice to have"))
test["Responsibilities_GT"] = test["Ground Truth"].apply(lambda x: tests_helpers.extract_section(x, r"5\. Responsibilities"))
test["Benefits_GT"] = test["Ground Truth"].apply(lambda x: tests_helpers.extract_section(x, r"6\. Benefits"))

test.head()

Unnamed: 0,Location,Region,EU Member,Schengen Agreement,Google Domain Type,Google Domain Used,Job Title,Company Name,Job Location,Apply Options,...,Search Query,Language,Job Description English,Ground Truth,Platform_GT,Salary_GT,Requirements_GT,Nice to have_GT,Responsibilities_GT,Benefits_GT
1575,Poland,Europe,True,True,default,google.com,Android Developer,Connectis_,"Kórnik, Poland",HitPraca.pl,...,Android developer,pl,**Android Developer** \r\n**Location:** Pozna...,1. Platform: Android\n2. Salary: Not mentioned...,Android,Not mentioned,- Minimum 3 years of commercial experience as ...,- Familiarity with technologies and tools such...,- Designing and implementing new screens in th...,- Opportunity to participate in integration ev...
324,Canada,Northern America,False,False,default,google.com,iOS Developer (Remote),McAfee,"Waterloo, ON, Canada",Blind,...,iOS developer,en,Role OverviewMcAfee is searching for an interm...,1. Platform: iOS\n2. Salary: Not mentioned\n3....,iOS,Not mentioned,- You can develop iOS applications at an inter...,- Diving deep into lower-level libraries like ...,"(list out or say ""Not mentioned"")\n - Your wo...",Not mentioned
1491,Netherlands,Europe,True,True,default,google.com,IOS developer,StarApple,"Eindhoven, Netherlands",Werkzoeken.nl,...,iOS developer,nl,**Organization & Department**\r\n\r\nAre you i...,"1. Platform: iOS\n2. Salary: €3,200 to €5,000\...",iOS,"€3,200 to €5,000",- 3 years of work experience with Swift\n - B...,- Experience in an environment where cybersecu...,- Primarily focus on backend development for t...,- 28 vacation days + the option to purchase 10...
1815,Romania,Europe,True,True,default,google.com,Android Developer / CTS Expert,Luxoft,Romania,"Indeed, Jooble, LinkedIn",...,Android developer,en,"Remote Romania, Romania\r\n\r\nAndroid\r\n\r\n...",1. Platform: Android\n2. Salary: Not mentioned...,Android,Not mentioned,- Deep expertise in Google CTS tests\n - Stro...,- Docker\n - AUTOSAR (AUTomotive Open System ...,- Work inside an intercultural team of many la...,Not mentioned
2174,United Kingdom,Europe,False,False,default,google.com,Embedded Software Developer (Android),Integrity,"Chesterton, United Kingdom",SitePoint,...,Android developer,en,If you have experience developing embedded rea...,1. Platform: Android\n2. Salary: £40-50K plus ...,Android,£40-50K plus benefits,- Degree in relevant subject\n - Embedded rea...,- Experience of Linux kernel and system progra...,Not mentioned,- An early finish on Fridays\n - Bonus Privat...


### Chat GPT's version

In [19]:
system_prompt = """
You are an AI assistant. Your role is to extract specific information from job descriptions and format them in a strict structure.
"""

user_prompt = """
I will provide a job description. Please extract and present the information in **this exact order**:

1. Platform: (Android/iOS/Cross-platform)
2. Salary: (If stated; otherwise "Not mentioned")
3. Requirements: (verbatim from the job description or "Not mentioned")
4. Nice to have: (verbatim or "Not mentioned")
5. Responsibilities: (verbatim or "Not mentioned")
6. Benefits: (verbatim or "Not mentioned")

**Guidelines**:
- **DO NOT reword, paraphrase, or summarize** any part of the job description. Copy the sentences exactly as they appear.
- Combine all mandatory or required skill sections (e.g., "Requirements," "Skills," "Key Technologies," "About You") under **Requirements**.
- If the job description specifically says something is "a plus," "beneficial," or otherwise indicates it’s optional, place it under "Nice to have" even if it appears under a "Requirements" heading in the job description.
- If there is an "About you" or "About Role" section (or similar) that describs duties or tasks, include those under "Responsibilities".
- If the information is not in the job description, write "Not mentioned" for that section.
- For multiple platforms (e.g., Android, iOS), list them all in **Platform** and use headings under Requirements (and other sections, if needed) like "General Requirements:", "For Android Developers:", "For iOS Developers:".
- Present your answer **only** in the format above.

---
Here is the job description:
"""

df_split = await chat_gpt.chatgpt_async(
    input_column_name="Job Description English", 
    output_column_name="Job Description Extracted",
    input_text_length=None,
    output_text_length=None,
    num_rows=None,  
    df=test.copy(), 
    system_prompt=system_prompt,
    user_prompt=user_prompt,
    gpt_model="gpt-4o-2024-11-20",
    client=client_async,
    batch_size=5,
    concurrency_limit=10,
    cache_file="../data/cache/job_description_extracted_cache_gpt-4o-2024-11-20.json"
)

df_split.head()

Processing Batches: 100%|██████████| 7/7 [00:14<00:00,  2.10s/it]


Unnamed: 0,Location,Region,EU Member,Schengen Agreement,Google Domain Type,Google Domain Used,Job Title,Company Name,Job Location,Apply Options,...,Language,Job Description English,Ground Truth,Platform_GT,Salary_GT,Requirements_GT,Nice to have_GT,Responsibilities_GT,Benefits_GT,Job Description Extracted
1575,Poland,Europe,True,True,default,google.com,Android Developer,Connectis_,"Kórnik, Poland",HitPraca.pl,...,pl,**Android Developer** \r\n**Location:** Pozna...,1. Platform: Android\n2. Salary: Not mentioned...,Android,Not mentioned,- Minimum 3 years of commercial experience as ...,- Familiarity with technologies and tools such...,- Designing and implementing new screens in th...,- Opportunity to participate in integration ev...,1. Platform: Android \n2. Salary: Not mention...
324,Canada,Northern America,False,False,default,google.com,iOS Developer (Remote),McAfee,"Waterloo, ON, Canada",Blind,...,en,Role OverviewMcAfee is searching for an interm...,1. Platform: iOS\n2. Salary: Not mentioned\n3....,iOS,Not mentioned,- You can develop iOS applications at an inter...,- Diving deep into lower-level libraries like ...,"(list out or say ""Not mentioned"")\n - Your wo...",Not mentioned,1. Platform: iOS/macOS \n2. Salary: Not menti...
1491,Netherlands,Europe,True,True,default,google.com,IOS developer,StarApple,"Eindhoven, Netherlands",Werkzoeken.nl,...,nl,**Organization & Department**\r\n\r\nAre you i...,"1. Platform: iOS\n2. Salary: €3,200 to €5,000\...",iOS,"€3,200 to €5,000",- 3 years of work experience with Swift\n - B...,- Experience in an environment where cybersecu...,- Primarily focus on backend development for t...,- 28 vacation days + the option to purchase 10...,"1. Platform: iOS \n2. Salary: €3,200 to €5,00..."
1815,Romania,Europe,True,True,default,google.com,Android Developer / CTS Expert,Luxoft,Romania,"Indeed, Jooble, LinkedIn",...,en,"Remote Romania, Romania\r\n\r\nAndroid\r\n\r\n...",1. Platform: Android\n2. Salary: Not mentioned...,Android,Not mentioned,- Deep expertise in Google CTS tests\n - Stro...,- Docker\n - AUTOSAR (AUTomotive Open System ...,- Work inside an intercultural team of many la...,Not mentioned,1. Platform: Android \n2. Salary: Not mention...
2174,United Kingdom,Europe,False,False,default,google.com,Embedded Software Developer (Android),Integrity,"Chesterton, United Kingdom",SitePoint,...,en,If you have experience developing embedded rea...,1. Platform: Android\n2. Salary: £40-50K plus ...,Android,£40-50K plus benefits,- Degree in relevant subject\n - Embedded rea...,- Experience of Linux kernel and system progra...,Not mentioned,- An early finish on Fridays\n - Bonus Privat...,1. Platform: Android \n2. Salary: £40-50K plu...


In [21]:
tests_helpers.column_to_txt(column='Job Description Extracted', output="../data/ground_truth/job_descriptions_chatgpt.txt", df=df_split)

### Split Chat GPT's version into sections

In [22]:
df_split["Platform_GPT"] = df_split["Job Description Extracted"].apply(lambda x: tests_helpers.extract_section(x, r"1\. Platform"))
df_split["Salary_GPT"] = df_split["Job Description Extracted"].apply(lambda x: tests_helpers.extract_section(x, r"2\. Salary"))
df_split["Requirements_GPT"] = df_split["Job Description Extracted"].apply(lambda x: tests_helpers.extract_section(x, r"3\. Requirements"))
df_split["Nice to have_GPT"] = df_split["Job Description Extracted"].apply(lambda x: tests_helpers.extract_section(x, r"4\. Nice to have"))
df_split["Responsibilities_GPT"] = df_split["Job Description Extracted"].apply(lambda x: tests_helpers.extract_section(x, r"5\. Responsibilities"))
df_split["Benefits_GPT"] = df_split["Job Description Extracted"].apply(lambda x: tests_helpers.extract_section(x, r"6\. Benefits"))

df_split.head()

Unnamed: 0,Location,Region,EU Member,Schengen Agreement,Google Domain Type,Google Domain Used,Job Title,Company Name,Job Location,Apply Options,...,Nice to have_GT,Responsibilities_GT,Benefits_GT,Job Description Extracted,Platform_GPT,Salary_GPT,Requirements_GPT,Nice to have_GPT,Responsibilities_GPT,Benefits_GPT
1575,Poland,Europe,True,True,default,google.com,Android Developer,Connectis_,"Kórnik, Poland",HitPraca.pl,...,- Familiarity with technologies and tools such...,- Designing and implementing new screens in th...,- Opportunity to participate in integration ev...,1. Platform: Android \n2. Salary: Not mention...,Android,Not mentioned,- Minimum 3 years of commercial experience as ...,- Familiarity with technologies and tools such...,- Designing and implementing new screens in th...,- Opportunity to participate in integration ev...
324,Canada,Northern America,False,False,default,google.com,iOS Developer (Remote),McAfee,"Waterloo, ON, Canada",Blind,...,- Diving deep into lower-level libraries like ...,"(list out or say ""Not mentioned"")\n - Your wo...",Not mentioned,1. Platform: iOS/macOS \n2. Salary: Not menti...,iOS/macOS,Not mentioned,- You can develop iOS applications at an inter...,- Diving deep into lower-level libraries like ...,- Dive deep into anti-censorship technologies ...,Not mentioned
1491,Netherlands,Europe,True,True,default,google.com,IOS developer,StarApple,"Eindhoven, Netherlands",Werkzoeken.nl,...,- Experience in an environment where cybersecu...,- Primarily focus on backend development for t...,- 28 vacation days + the option to purchase 10...,"1. Platform: iOS \n2. Salary: €3,200 to €5,00...",iOS,"€3,200 to €5,000",- 3 years of work experience with Swift \n ...,- Experience in an environment where cybersecu...,- You’ll primarily focus on backend developmen...,- 28 vacation days + the option to purchase 10...
1815,Romania,Europe,True,True,default,google.com,Android Developer / CTS Expert,Luxoft,Romania,"Indeed, Jooble, LinkedIn",...,- Docker\n - AUTOSAR (AUTomotive Open System ...,- Work inside an intercultural team of many la...,Not mentioned,1. Platform: Android \n2. Salary: Not mention...,Android,Not mentioned,- Deep expertise in Google CTS tests \n - S...,- Docker \n - AUTOSAR (AUTomotive Open Syst...,- Work inside an intercultural team of many la...,Not mentioned
2174,United Kingdom,Europe,False,False,default,google.com,Embedded Software Developer (Android),Integrity,"Chesterton, United Kingdom",SitePoint,...,- Experience of Linux kernel and system progra...,Not mentioned,- An early finish on Fridays\n - Bonus Privat...,1. Platform: Android \n2. Salary: £40-50K plu...,Android,£40-50K plus benefits,- Degree in relevant subject \n - Embedded ...,- Experience of Linux kernel and system progra...,- Develop and support the software running on ...,- An early finish on Fridays \n - Bonus \n...


### Calculate fuzzy scores

1. Token Sort Ratio (Handling Word Order Differences)<br>
If two sentences contain the same words but in different order, Levenshtein distance alone might give a low similarity score. Instead, we:
- Split both sentences into words.
- Sort them alphabetically.
- Recalculate Levenshtein Distance.

2. Token Set Ratio (Handling Partial Overlaps)<br>
If one string is a subset of another, Token Set Ratio helps:
- Convert both sentences into sets of unique words.
- Compare only the common words.

In [23]:
tests_helpers.compare_fuzzy_sections(df_split)

Average Token Set Ratio per section:
Platform: 98.33
Salary: 99.98
Requirements: 98.61
Nice to have: 93.38
Responsibilities: 91.42
Benefits: 91.85

Overall Average Token Set Ratio: 95.60


## Extract technologies and tools

In [24]:
df_filtered = pd.read_csv('../data/csv/df_filtered.csv').copy()
df_filtered.head()

Unnamed: 0,Location,Region,EU Member,Schengen Agreement,Google Domain Type,Google Domain Used,Job Title,Company Name,Job Location,Apply Options,...,Search Query,Language,Job Description English,Job Description Extracted,Platform,Salary_E,Requirements,Nice to have,Responsibilities,Benefits
0,Austria,Europe,True,True,default,google.com,"Android Developer – Kotlin (Austria based, Hyb...",Bitcoin Devs Company,"Vienna, Austria",Jobs3,...,Android developer,en,Overview:\r\nThe Android Developer – Kotlin po...,1. Platform: Android \r\n2. Salary: Not menti...,Android,Not mentioned,• Passionate about mobile platforms and transl...,Not mentioned,• Ensure that the app meets our quality standa...,Not mentioned
1,Austria,Europe,True,True,default,google.com,ios entwickler 80–100% w/m/d,CHANCENLAND VORARLBERG,"Dornbirn, Austria","IT-Career.at, STEMJOBS.AT, IT-JOBS.AT",...,iOS developer,de,**iOS Developer 80–100% w/m/d**\r\n\r\n**Job D...,1. Platform: iOS \r\n2. Salary: Not mentioned...,iOS,Not mentioned,"- **Motivation over experience:** Curiosity, i...",Not mentioned,- You will work with us on exciting projects f...,"- A compact, battle-tested team and flat hiera..."
2,Austria,Europe,True,True,default,google.com,Middle iOS developer,Processica,"Vienna, Austria",JOBITT,...,iOS developer,en,Looking for a iOS Developer. Playing well in a...,1. Platform: iOS \r\n2. Salary: Not mentioned...,iOS,Not mentioned,Playing well in a team and has strong analytic...,Not mentioned,Not mentioned,Not mentioned
3,Austria,Europe,True,True,default,google.com,iOS Developer,Raiffeisen Gruppe,"Linz, Austria","Jooble, Trabajo.org - Stellenangebote, Arbeit",...,iOS developer,de,**Your Role in the Team** \r\n- You contribut...,1. Platform: iOS \r\n2. Salary: Not mentioned...,iOS,Not mentioned,- You are familiar with Continuous Integration...,Not mentioned,- You contribute to the further development of...,- Home Office \r\n - Flexible working hours...
4,Austria,Europe,True,True,default,google.com,iOS Software Engineer,Cybermoth,"Vienna, Austria","Expertini, Talent.com",...,iOS developer,en,We are searching for iOS Software Engineers wi...,1. Platform: iOS \r\n2. Salary: Depending on ...,iOS,Depending on qualifications and professional e...,• Worked on at least one native Swift applicat...,Not mentioned,• In this position you will be part of one or ...,• You will be part of a company with an inspir...


### Merge 3 columns into 1

In [25]:
df_sample = df_filtered.sample(n=40, random_state=42)

df_sample["Full Requirements"] = (
    "3. Requirements:\n" + df_sample["Requirements"].astype(str) + "\n\n" +
    "4. Nice to have:\n" + df_sample["Nice to have"].astype(str) + "\n\n" +
    "5. Responsibilities:\n" + df_sample["Responsibilities"].astype(str)
)
df_sample.head()

Unnamed: 0,Location,Region,EU Member,Schengen Agreement,Google Domain Type,Google Domain Used,Job Title,Company Name,Job Location,Apply Options,...,Language,Job Description English,Job Description Extracted,Platform,Salary_E,Requirements,Nice to have,Responsibilities,Benefits,Full Requirements
282,Canada,Northern America,False,False,default,google.com,Développeur iOS senior -Senior iOS Developer,OneSpan,"Montreal, Quebec, Canada","Greenhouse, Indeed, Kovasys IT Recruitment Age...",...,fr,The English translation for the job descriptio...,1. Platform: iOS \r\n2. Salary: Not mentioned...,iOS,Not mentioned,Not mentioned,Not mentioned,Not mentioned,Not mentioned,3. Requirements:\nNot mentioned\n\n4. Nice to ...
479,France,Europe,True,True,default,google.com,Senior Android Developer - Video Expert (All G...,Dailymotion,"Paris, France",Ivy Exec,...,en,Company Description\r\n\r\nDailymotion is the ...,1. Platform: Android \r\n2. Salary: Not menti...,Android,Not mentioned,• Excellent knowledge of Android SDK \r\n •...,• Proven experience in developing video-based ...,• Design and build advanced applications for t...,Not mentioned,3. Requirements:\n• Excellent knowledge of And...
1307,Romania,Europe,True,True,local,google.ro,iOS Developer,ASSIST SOFTWARE,"Suceava, Romania",IOS Jobs,...,en,Are you an iOS mobile innovator? We need you! ...,1. Platform: iOS \r\n2. Salary: Competitive s...,iOS,Competitive salary,• Ability to design and implement iOS applicat...,"• Experience with Phonegap, HTML5 \r\n • Ad...",Not mentioned,Relocation package for 2 months \r\n Employ...,3. Requirements:\n• Ability to design and impl...
1254,Portugal,Europe,True,True,default,google.com,Principal iOS Software Engineer,Sky,"Aveiro, Portugal",Empregos Trabajo.org,...,en,Role Overview\r\n\r\nSky Portugal is a leading...,"1. Platform: iOS \r\n2. Salary: $120,000 - $1...",iOS,"$120,000 - $180,000 per annum",• 5+ years of experience in native iOS develop...,Not mentioned,"• Design, develop, and deliver high-quality so...",• Above market salary \r\n • Yearly bonus ...,3. Requirements:\n• 5+ years of experience in ...
507,Germany,Europe,True,True,default,google.com,iOS Developer - Health Care App (m/w/d) Trier,HIBA GmbH,"Trier, Germany",XING,...,de,**iOS Developer - Health Care App (m/f/d) Trie...,1. Platform: iOS \r\n2. Salary: Not mentioned...,iOS,Not mentioned,- Completed degree in computer science or a co...,Not mentioned,- Development and maintenance of iOS applicati...,- Continuous training opportunities to ensure ...,3. Requirements:\n- Completed degree in comput...


### Columns into text

In [26]:
tests_helpers.column_to_txt(column='Full Requirements', output="../data/ground_truth/Full_requirements_sample.txt", df=df_sample)

###  Load ground truth into a DataFrame

In [27]:
df_sample = tests_helpers.txt_to_column(txt_path="../data/ground_truth/Full_requirements_ground_truth.txt", df=df_sample)
df_sample.head()

Unnamed: 0,Location,Region,EU Member,Schengen Agreement,Google Domain Type,Google Domain Used,Job Title,Company Name,Job Location,Apply Options,...,Job Description English,Job Description Extracted,Platform,Salary_E,Requirements,Nice to have,Responsibilities,Benefits,Full Requirements,Ground Truth
282,Canada,Northern America,False,False,default,google.com,Développeur iOS senior -Senior iOS Developer,OneSpan,"Montreal, Quebec, Canada","Greenhouse, Indeed, Kovasys IT Recruitment Age...",...,The English translation for the job descriptio...,1. Platform: iOS \r\n2. Salary: Not mentioned...,iOS,Not mentioned,Not mentioned,Not mentioned,Not mentioned,Not mentioned,3. Requirements:\nNot mentioned\n\n4. Nice to ...,{}
479,France,Europe,True,True,default,google.com,Senior Android Developer - Video Expert (All G...,Dailymotion,"Paris, France",Ivy Exec,...,Company Description\r\n\r\nDailymotion is the ...,1. Platform: Android \r\n2. Salary: Not menti...,Android,Not mentioned,• Excellent knowledge of Android SDK \r\n •...,• Proven experience in developing video-based ...,• Design and build advanced applications for t...,Not mentioned,3. Requirements:\n• Excellent knowledge of And...,"{\n ""languages_and_runtimes"": [""Kotlin"", ""co..."
1307,Romania,Europe,True,True,local,google.ro,iOS Developer,ASSIST SOFTWARE,"Suceava, Romania",IOS Jobs,...,Are you an iOS mobile innovator? We need you! ...,1. Platform: iOS \r\n2. Salary: Competitive s...,iOS,Competitive salary,• Ability to design and implement iOS applicat...,"• Experience with Phonegap, HTML5 \r\n • Ad...",Not mentioned,Relocation package for 2 months \r\n Employ...,3. Requirements:\n• Ability to design and impl...,"{\n ""languages_and_runtimes"": [""Objective-C""..."
1254,Portugal,Europe,True,True,default,google.com,Principal iOS Software Engineer,Sky,"Aveiro, Portugal",Empregos Trabajo.org,...,Role Overview\r\n\r\nSky Portugal is a leading...,"1. Platform: iOS \r\n2. Salary: $120,000 - $1...",iOS,"$120,000 - $180,000 per annum",• 5+ years of experience in native iOS develop...,Not mentioned,"• Design, develop, and deliver high-quality so...",• Above market salary \r\n • Yearly bonus ...,3. Requirements:\n• 5+ years of experience in ...,"{\n ""languages_and_runtimes"":[""Swift""],\n ..."
507,Germany,Europe,True,True,default,google.com,iOS Developer - Health Care App (m/w/d) Trier,HIBA GmbH,"Trier, Germany",XING,...,**iOS Developer - Health Care App (m/f/d) Trie...,1. Platform: iOS \r\n2. Salary: Not mentioned...,iOS,Not mentioned,- Completed degree in computer science or a co...,Not mentioned,- Development and maintenance of iOS applicati...,- Continuous training opportunities to ensure ...,3. Requirements:\n- Completed degree in comput...,"{\n ""languages_and_runtimes"": [""Objective-C""..."


### Chat GPT's version

In [32]:
system_prompt_1 = """
You are a senior extraction assistant.

GOAL
Given a natural-language job-vacancy text, return one and only one
valid, minified JSON object that maps every technology term found in
the input to exactly ONE key from ALLOWED_KEYS.

STRICT RULES
1. Output = raw JSON only (no Markdown, no comments, no extra text).
2. Use keys from ALLOWED_KEYS exactly as written. Omit any key whose array would be empty.
3. Each value is an **array of unique strings**, sorted alphabetically and preserving the term’s original spelling/case from the input.
4. Ignore soft skills and vague nouns (e.g. communication, documentation, performance, detail, English, collaboration, problem-solving, university).
5. **IGNORE** overly generic technology words/phrases (e.g. "design patterns", "android ui", "json", "xml", "clean code", "API integration", "unit testing", "Continuous integration", "CI", "CD", "CI/CD", "cryptography", "application testing" ect.).
6. DO NOT hallucinate. Include a term **only** if it appears verbatim in the input text.
7. If a technology term matches **exactly** (case-insensitive) one of the values in ALLOWED_KEYS, then it MUST be assigned to that exact key — even if the name could hypothetically fit other categories.
8. The order of keys in the output JSON MUST strictly follow the order of keys as they appear in the ALLOWED_KEYS section below. Only include keys that have at least one matched term. If a key has no matched terms, it MUST be completely omitted from the output, even if this breaks the visual continuity of the ALLOWED_KEYS order.


ALLOWED_KEYS (with examples)
{
    "languages_and_runtimes": ["Swift", "Kotlin", "Java", "Dart", "Objective-C", "Coroutines", "RxSwift", "Combine", "GCD", "RxJava", "Flows", "JavaScript", "TypeScript", "C", "C++", "Python", "Golang", "PHP"],
    "ui_and_cross_platform_frameworks": ["SwiftUI", "UIKit", "Jetpack Compose", "Flutter", "React Native", "Xamarin", "Ionic", "WatchKit", "Cocoa Touch", "PhoneGap", "Cordova", "Kotlin Multiplatform"],
    "architectural_patterns": ["MVVM", "VIPER", "Clean Architecture", "MVI", "MVC", "MVP", "Redux"],
    "dependency_injection_frameworks": ["Dagger", "Hilt", "Koin", "Swinject"],
    "build_and_dependency_management": ["Gradle", "CocoaPods", "SPM", "Bazel", "Buck", "Xcode", "Android Studio", "CircleCI", "Bamboo", "CocoaPods"],
    "data_and_caching": ["Core Data", "Room", "Realm", "SQLite", "Firestore", "MongoDB", "SAP UltraLite", "MySQL", "NoSQL"],
    "networking_and_api": ["Retrofit", "OkHttp", "URLSession", "Alamofire", "GraphQL", "REST API", "WebSockets", "gRPC", "Protocol Buffers"],
    "backend_or_baas_integration": ["Firebase", "AWS Amplify", "Azure Mobile", "Parse", "AWS Mobile Hub", "AWS Cognito", "AWS S3"],
    "device_and_platform_services_and_third_party_sdks": ["ARKit", "HealthKit", "CoreML", "Core Animation", "Android SDK", "Android NDK", "Push Notifications", "BLE", "NFC", "Camera", "Location", "Sensors", "Stripe SDK", "Facebook SDK", "AdMob", "Google Maps", "ExoPlayer", "Glide", "Stripe", "PayPal SDK", "Binder", "AIDL", "JNI", "CTS", "HAL", "LoRa", "CarPlay", "Android Auto", "CydiaSubstrate", "Frida", "WebViews"],
    "security_and_cryptography": ["Keychain", "TLS pinning", "OAuth2", "Veracode", "Checkmarx", "OWASP"],
    "testing_frameworks": ["XCTest", "JUnit", "Espresso", "Mockito", "Robolectric"],
    "debugging_or_profiling": ["Instruments", "Android Profiler"],
    "version_control": ["Git", "GIT", "git", "SVN", "Mercurial", "Gerrit", "GitFlow", "SourceTree", "Fork", "Bitbucket"],
    "ci_cd_and_release_automation": ["GitHub Actions", "Jenkins", "Bitrise", "fastlane", "CircleCI", "Bamboo", "GitLab CI", "Docker"],
    "monitoring_analytics_and_crash_reporting": ["Crashlytics", "Sentry", "Datadog", "Firebase Analytics", "App Center"],
    "development_methodologies": ["Scrum", "Kanban", "Agile", "SAFe", "TDD", "BDD", "DevOps"],
    "testing_process_and_qa": ["test coverage", "regression testing"],
    "code_quality_and_static_analysis": ["SonarQube", "SwiftLint", "Veracode", "Checkmarx"],
    "documentation_and_knowledge_sharing": ["Swagger", "OpenAPI", "Javadoc", "Confluence", "HIG"],
    "collaboration_pm_and_design_handoff": ["Jira", "Trello", "Figma", "Zeplin", "Rally/AgileCentral"],
    "distribution_and_store_operations": ["TestFlight", "App Store Connect", "Google Play Console"],
    "compliance_and_certifications": ["SOC 2", "GDPR"]
}

EXAMPLE RESPONSE
{
    "languages_and_runtimes":["Swift","Java"],
    "version_control":["Git"]
}
"""


user_prompt_1 = """
Extract every technology, tool, framework, library, service or formal
methodology mentioned in the text below and output the JSON exactly as
specified in the system prompt.

---
INPUT TEXT:
"""


df_extracted_1 = await chat_gpt.chatgpt_async(
    input_column_name="Full Requirements", 
    output_column_name="Extracted Technologies GPT",
    input_text_length=None,
    output_text_length=None,
    num_rows=None,  
    df=df_sample.copy(), 
    system_prompt=system_prompt_1,
    user_prompt=user_prompt_1,
    gpt_model="gpt-4o-2024-11-20",
    client=client_async,
    batch_size=40,
    concurrency_limit=40,
    cache_file= "../data/cache/Full_requirements_cache.json"
)
df_extracted_1.head()

Processing Batches: 100%|██████████| 1/1 [00:00<00:00, 25.07it/s]


Unnamed: 0,Location,Region,EU Member,Schengen Agreement,Google Domain Type,Google Domain Used,Job Title,Company Name,Job Location,Apply Options,...,Job Description Extracted,Platform,Salary_E,Requirements,Nice to have,Responsibilities,Benefits,Full Requirements,Ground Truth,Extracted Technologies GPT
282,Canada,Northern America,False,False,default,google.com,Développeur iOS senior -Senior iOS Developer,OneSpan,"Montreal, Quebec, Canada","Greenhouse, Indeed, Kovasys IT Recruitment Age...",...,1. Platform: iOS \r\n2. Salary: Not mentioned...,iOS,Not mentioned,Not mentioned,Not mentioned,Not mentioned,Not mentioned,3. Requirements:\nNot mentioned\n\n4. Nice to ...,{},{}
479,France,Europe,True,True,default,google.com,Senior Android Developer - Video Expert (All G...,Dailymotion,"Paris, France",Ivy Exec,...,1. Platform: Android \r\n2. Salary: Not menti...,Android,Not mentioned,• Excellent knowledge of Android SDK \r\n •...,• Proven experience in developing video-based ...,• Design and build advanced applications for t...,Not mentioned,3. Requirements:\n• Excellent knowledge of And...,"{\n ""languages_and_runtimes"": [""Kotlin"", ""co...","{\n ""languages_and_runtimes"":[""Kotlin"",""Cor..."
1307,Romania,Europe,True,True,local,google.ro,iOS Developer,ASSIST SOFTWARE,"Suceava, Romania",IOS Jobs,...,1. Platform: iOS \r\n2. Salary: Competitive s...,iOS,Competitive salary,• Ability to design and implement iOS applicat...,"• Experience with Phonegap, HTML5 \r\n • Ad...",Not mentioned,Relocation package for 2 months \r\n Employ...,3. Requirements:\n• Ability to design and impl...,"{\n ""languages_and_runtimes"": [""Objective-C""...","{\n ""languages_and_runtimes"":[""Objective-C""..."
1254,Portugal,Europe,True,True,default,google.com,Principal iOS Software Engineer,Sky,"Aveiro, Portugal",Empregos Trabajo.org,...,"1. Platform: iOS \r\n2. Salary: $120,000 - $1...",iOS,"$120,000 - $180,000 per annum",• 5+ years of experience in native iOS develop...,Not mentioned,"• Design, develop, and deliver high-quality so...",• Above market salary \r\n • Yearly bonus ...,3. Requirements:\n• 5+ years of experience in ...,"{\n ""languages_and_runtimes"":[""Swift""],\n ...","{\n ""languages_and_runtimes"":[""Swift""],\n ..."
507,Germany,Europe,True,True,default,google.com,iOS Developer - Health Care App (m/w/d) Trier,HIBA GmbH,"Trier, Germany",XING,...,1. Platform: iOS \r\n2. Salary: Not mentioned...,iOS,Not mentioned,- Completed degree in computer science or a co...,Not mentioned,- Development and maintenance of iOS applicati...,- Continuous training opportunities to ensure ...,3. Requirements:\n- Completed degree in comput...,"{\n ""languages_and_runtimes"": [""Objective-C""...","{\n ""languages_and_runtimes"":[""Objective-C""..."


In [33]:
tests_helpers.column_to_txt(column='Extracted Technologies GPT', output="../data/ground_truth/Full_requirements_ground_chatgpt.txt", df=df_extracted_1)

### Calculate fuzzy scores


In [34]:
df_extracted_1["Token Set Ratio"] = (df_extracted_1.apply(lambda row: tests_helpers.technologies_token_set_ratio(row["Ground Truth"], row["Extracted Technologies GPT"]),axis=1,).round(2))
print("Average Token Set Ratio :", df_extracted_1["Token Set Ratio"].mean().round(2))

Average Token Set Ratio : 98.71
