# AI requirements: yearly summary (v6)

Load all `ai_job_requirements_all_YYYY_v6.json` outputs, build a single DataFrame, and summarize counts per year and class.

In [7]:
from pathlib import Path
import json
import pandas as pd

def find_root(start: Path) -> Path:
    # Walk up until we find the folder that contains "Results Datasets"
    for p in [start] + list(start.parents):
        if (p / "Results Datasets").exists():
            return p
    return start

try:
    ROOT = find_root(Path(__file__).resolve())
except NameError:
    ROOT = find_root(Path.cwd().resolve())

RESULTS_DIR = ROOT / "Results Datasets" / "ai_mentions" / "results" / "requirements"

def load_results():
    rows = []
    for path in sorted(RESULTS_DIR.glob("ai_job_requirements_all_*_v6.json")):
        try:
            data = json.loads(path.read_text(encoding="utf-8"))
        except Exception:
            continue
        for ys, ads in data.items():
            try:
                year = int(ys)
            except Exception:
                continue
            for ad_id, meta in ads.items():
                ar = str(meta.get("ai_requirement") or "False")
                if ar.lower() in ("true", "t", "yes"):
                    ar = "True"
                elif ar.lower() == "maybe":
                    ar = "Maybe"
                else:
                    ar = "False"
                rows.append({
                    "year": year,
                    "ad_id": ad_id,
                    "ai_requirement": ar,
                    "keywords": meta.get("keywords", []),
                    "reason": meta.get("reason", ""),
                })
    return pd.DataFrame(rows)

df = load_results()
print("ROOT=", ROOT)
print("Results dir exists:", RESULTS_DIR.exists())
print("Files found:", len(list(RESULTS_DIR.glob('ai_job_requirements_all_*_v6.json'))))
print("Rows loaded:", len(df))
df.head()

ROOT= /Users/miguel/Documents/Master Thesis/Thesis
Results dir exists: True
Files found: 15
Rows loaded: 57329


Unnamed: 0,year,ad_id,ai_requirement,keywords,reason
0,2010,sjmm_suf-1-01-2010-03-01118-0-000000001,False,[],No AI/ML or AI-adjacent skills mentioned; role...
1,2010,sjmm_suf-1-01-2010-03-01118-0-000000002,False,[],No explicit AI/ML mention; only general EDV an...
2,2010,sjmm_suf-1-01-2010-03-01119-0-000000001,False,[],No AI/ML terms or AI-adjacent skills appear in...
3,2010,sjmm_suf-1-01-2010-03-01120-0-000000001,False,[],No AI/ML terms or AI-adjacent requirements; on...
4,2010,sjmm_suf-1-01-2010-03-01120-0-000000002,False,[],No AI/ML terms or AI-adjacent skills appear in...


In [8]:
# Counts per year and class
if df.empty:
    display(pd.DataFrame())
else:
    year_class = df.pivot_table(index="year", columns="ai_requirement", values="ad_id", aggfunc="count", fill_value=0)
    year_totals = df.groupby("year")["ad_id"].count().rename("total")
    summary = year_class.join(year_totals)
    display(summary)

Unnamed: 0_level_0,False,Maybe,True,total
year,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
2010,3771,91,1,3863
2011,3858,90,3,3951
2012,3901,92,3,3996
2013,3885,104,4,3993
2014,3387,62,3,3452
2015,3304,73,8,3385
2016,3607,97,9,3713
2017,3549,105,19,3673
2018,3681,118,26,3825
2019,3830,119,44,3993


In [9]:
# Share of True/Maybe per year
if df.empty:
    display(pd.DataFrame())
else:
    share = (year_class.div(year_totals, axis=0) * 100).round(2)
    share.rename(columns={c: f"{c}_pct" for c in share.columns}, inplace=True)
    display(pd.concat([summary, share], axis=1))

Unnamed: 0_level_0,False,Maybe,True,total,False_pct,Maybe_pct,True_pct
year,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
2010,3771,91,1,3863,97.62,2.36,0.03
2011,3858,90,3,3951,97.65,2.28,0.08
2012,3901,92,3,3996,97.62,2.3,0.08
2013,3885,104,4,3993,97.3,2.6,0.1
2014,3387,62,3,3452,98.12,1.8,0.09
2015,3304,73,8,3385,97.61,2.16,0.24
2016,3607,97,9,3713,97.15,2.61,0.24
2017,3549,105,19,3673,96.62,2.86,0.52
2018,3681,118,26,3825,96.24,3.08,0.68
2019,3830,119,44,3993,95.92,2.98,1.1
