### Raw data function

In [4]:
from sqlalchemy import create_engine
from functools import reduce
import pandas as pd

In [5]:
def raw_data():
    sqlitepath = '/Users/Nonis/desktop/ih_datamadpt0420_project_m1/data/raw/raw_data_project_m1.db'
    engine = create_engine(f'sqlite:///{sqlitepath}')
    tables = pd.read_sql_query("""SELECT name FROM sqlite_master WHERE type = 'table'""",engine)
    tables_list = tables['name'].to_list()
    x = [pd.read_sql_query(f'SELECT * FROM {i}', engine) for i in tables_list]
    df_raw = reduce(lambda left,right: pd.merge(left,right, on = 'uuid'), x)
    df_raw_data = pd.DataFrame(df_raw)
    
    return df_raw_data

In [6]:
raw_data()

Unnamed: 0,uuid,age,gender,dem_has_children,age_group,country_code,rural,dem_education_level,dem_full_time_job,normalized_job_code,question_bbi_2016wave4_basicincome_awareness,question_bbi_2016wave4_basicincome_vote,question_bbi_2016wave4_basicincome_effect,question_bbi_2016wave4_basicincome_argumentsfor,question_bbi_2016wave4_basicincome_argumentsagainst
0,f6e7ee00-deac-0133-4de8-0a81e8b09a82,61 years old,male,NO,40_65,AT,countryside,no,no,,I know something about it,I would not vote,None of the above,None of the above,None of the above
1,54f0f1c0-dda1-0133-a559-0a81e8b09a82,57 years old,male,yES,40_65,AT,urban,high,yes,861a9b9151e11362eb3c77ca914172d0,I understand it fully,I would probably vote for it,A basic income would not affect my work choices,It increases appreciation for household work a...,It might encourage people to stop working
2,83127080-da3d-0133-c74f-0a81e8b09a82,32 years old,male,nO,26_39,AT,city,,no,,I have heard just a little about it,I would not vote,‰Û_ gain additional skills,It creates more equality of opportunity,Foreigners might come to my country and take a...
3,15626d40-db13-0133-ea5c-0a81e8b09a82,45 years old,Male,YES,40_65,AT,Country,high,yes,049a3f3a2b5f85cb2971ba77ad66e10c,I have heard just a little about it,I would probably vote for it,‰Û_ work less,It reduces anxiety about financing basic needs,None of the above
4,24954a70-db98-0133-4a64-0a81e8b09a82,41 years old,Fem,yES,40_65,AT,city,high,yes,f4b2fb1aa40f661488e2782b6d57ad2f,I have heard just a little about it,I would probably vote for it,None of the above,It reduces anxiety about financing basic needs,It is impossible to finance | It might encoura...
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
9644,7d1ac020-dcb4-0133-817a-0a81e8b09a82,37 years old,FeMale,nO,26_39,SK,urban,high,yes,847165cfda6b1dc82ae22b967da8af2f,I understand it fully,I would probably vote for it,‰Û_ spend more time with my family,It reduces bureaucracy and administrative expe...,It is impossible to finance
9645,39f989f0-db52-0133-8482-0a81e8b09a82,53 years old,Male,yES,40_65,SK,urban,high,yes,a4d5b8b38f9513825d0d94a981ebe962,I have never heard of it,I would probably vote against it,A basic income would not affect my work choices,It reduces bureaucracy and administrative expe...,It might encourage people to stop working | On...
9646,70ce4a90-d965-0133-f5e4-0a81e8b09a82,1992,male,NO,juvenile,SK,Non-Rural,low,no,,I have heard just a little about it,I would not vote,‰Û_ spend more time with my family,It reduces anxiety about financing basic needs,None of the above
9647,2896e440-db3c-0133-5b67-0a81e8b09a82,47 years old,male,yES,40_65,SK,city,low,yes,775190277a849cba701b306a7b374c0a,I understand it fully,I would vote for it,A basic income would not affect my work choices,It reduces bureaucracy and administrative expe...,Foreigners might come to my country and take a...


### Web scraping function

In [7]:
import requests
from bs4 import BeautifulSoup
import lxml.html as lh
import pandas as pd
import re


In [16]:
def get_info():
    url = 'https://ec.europa.eu/eurostat/statistics-explained/index.php/Glossary:Country_codes'
    html = requests.get(url).content
    soup = BeautifulSoup(html, 'lxml')
    table = soup.find('table')
    list_a = []
    
    rows = table.find_all('tr')
    
    for tr in rows:
        columns = tr.find_all('td')
        for td in columns:
            list_a.append(td.text)
    
    list_cleaned = [i.replace('\n', '').replace('(', '').replace(')', '') for i in list_a]
    row_split = 2
    rows_refactored = [list_cleaned[x:x+row_split] for x in range(0, len(list_cleaned), row_split)]
    df_countries =pd.DataFrame(rows_refactored,columns = {'country_name','country_code'})
    df_countries['country_code'].replace({'EL':'GR'}, inplace=True)
    return df_countries

In [17]:
get_info()

Unnamed: 0,country_name,country_code
0,Belgium,BE
1,Greece,GR
2,Lithuania,LT
3,Portugal,PT
4,Bulgaria,BG
5,Spain,ES
6,Luxembourg,LU
7,Romania,RO
8,Czechia,CZ
9,France,FR


### API function

In [None]:
def get_jobs(job_id):
    response = requests.get(f'http://api.dataatwork.org/v1/jobs/{job_id}')
    jobs_json = response.json()
    return jobs_json

In [None]:
def jobs(job_ids):
    jobs_title = []
    job_ids = raw_data()

    for ids in job_ids:
        job_title = get_jobs(ids)
        jobs_title.append(job_title)
    
    df_jobs = pd.DataFrame(jobs_title)
    df_jobs = jobs.rename(columns = {'uuid':'normalized_job_code'})
    data_jobs_api = df_jobs[['normalized_job_code', 'title', 'normalized_job_title']]
    
    return df_jobs

In [None]:
jobs(job_ids)

In [24]:
def jobs_api():
    jobs_api = []
    job_codes = set(raw_data['normalized_job_code'])

    for i in job_codes:
        response = requests.get(f'http://api.dataatwork.org/v1/jobs/{i}')
        jobs_json = response.json()
        jobs_api.append(jobs_json)
    df_jobs = pd.DataFrame(jobs_title)
    df_jobs = jobs.rename(columns = {'uuid':'normalized_job_code'})
    data_jobs_api = df_jobs[['normalized_job_code', 'title', 'normalized_job_title']]

In [25]:
jobs_api()

TypeError: 'function' object is not subscriptable

In [20]:
jobs(job_ids)

NameError: name 'jobs' is not defined