## NAV - Job Offers (from Difi Datahotell API)

* [NO] Ledige stillinger meldt til NAV (fra 2002 til 2017)
* [EN] Available job offers reported by NAV (from 2002 to 2017)

Dataset e.g.: https://data.norge.no/data/arbeids-og-velferdsetaten-nav/ledige-stillinger-meldt-til-nav-2017

In [1]:
import requests
from pprint import pprint

In [2]:
data2017 = requests.get('https://hotell.difi.no/api/json/nav/ledige-stillinger/2017?page=1')

In [3]:
jobOffers = data2017.json()
jobOffers.keys()

dict_keys(['entries', 'page', 'pages', 'posts'])

In [4]:
# Number fo pages
jobOffers['pages']

1304

In [5]:
# Number of job offers
jobOffers['posts']

130377

In [6]:
# First job offer
jobOffers['entries'][0]

{'antall_stillinger': '1',
 'arbeidssted_fylke': 'Østfold',
 'arbeidssted_fylkesnummer': '01',
 'arbeidssted_kommune': 'Halden',
 'arbeidssted_kommunenummer': '0101',
 'arbeidssted_land': 'Norge',
 'arbeidssted_landkode': 'NO',
 'isco_versjon': 'ISCO-08',
 'nav_enhet_kode': '0101',
 'offisiell_statistikk_flagg': '1',
 'registrert_dato': '12.12.2016',
 'sistepubl_dato': '15.01.2017',
 'statistikk_aar_mnd': '201701',
 'stilling_kilde': 'Reg av arb.giver på nav.no',
 'stillingsnummer': '0101201612000013',
 'stillingstittel': 'Pedagogisk leder',
 'virksomhet_navn': 'YNGLINGEN BARNEHAGE',
 'virksomhet_organisasjonsnr': '974122545',
 'yrke': 'Førskolelærere',
 'yrke_grovgruppe': 'Undervisning',
 'yrkesbetegnelse': 'Barnehagelærer',
 'yrkeskode': '2342'}

## Load data into MongoDB

In [7]:
from pymongo import MongoClient
client = MongoClient()
db = client['nav']
# if 'job-offers' in db.collection_names(include_system_collections=False):
#     db['job-offers'].delete_many({})

In [None]:
years = [i for i in range(2002,2018)]
url_base = "https://hotell.difi.no/api/json/nav/ledige-stillinger/"

last_year = years[0]
for year in years:
    req_page1 = requests.get('https://hotell.difi.no/api/json/nav/ledige-stillinger/'+str(year)+'?page=1')
    pages = req_page1.json()['pages']
    print("year: "+str(year)+" ("+str(pages)+" pages)        ", end='\n')
    lastDownload = db['downloads'].find_one({'owner':'NAV', 'dataset':'job-offers', 'year': year})
    startPage = 1
    if lastDownload != None:
        if lastDownload['last_downloaded_page'] == pages:
            continue
        startPage = lastDownload['last_downloaded_page']+1
    for page in range(startPage,pages+1):
        if year > last_year:
            last_year = year
        print("year: "+str(year)+" ("+str(page)+"/"+str(pages)+" pages)", end='\r')
        url = 'https://hotell.difi.no/api/json/nav/ledige-stillinger/'+str(year)+'?page='+str(page)
        req = requests.get(url)
        jobOffers = req.json()['entries']
        if any(jobOffers):
            result = db['job-offers'].insert_many(jobOffers)
        db['downloads'].replace_one({'owner':'NAV', 'dataset':'job-offers', 'year': year},
                                    {'owner':'NAV', 'dataset':'job-offers', 'year': year,
                                     'last_downloaded_page': page}, upsert=True)


year: 2002 (1498 pages)        
year: 2003 (1300 pages)        
year: 2004 (1328 pages)        
year: 2005 (1577 pages)        
year: 2006 (2148 pages)        
year: 2007 (2555 pages)        
year: 2008 (2305 pages)        
year: 2009 (1678 pages)        
year: 2010 (1582 pages)        
year: 2011 (1678 pages)        
year: 2012 (1601 pages)        
year: 2013 (1433 pages)        
year: 2013 (259/1433 pages)

## visualize distribution of job offers on the map

* per commune or district
* per group of profession (`yrke_grovgruppe`)
* per designation of profession (`yrkesbetegnelse`)

## visualize distribution of job offers over time