In [283]:
import pandas as pd
import json
import requests

In [284]:
df = pd.read_csv('../data/raw/full_data.csv')

In [285]:
data = [df['country_code'], df['normalized_job_code'], df['rural']]
headers = ['Country', 'Job Title', 'Rural']
df_processed = pd.concat(data, axis=1, keys=headers)

## Processing Data

In [286]:
df_processed['Rural'] = df_processed['Rural'].str.capitalize()

In [287]:
df_processed = df_processed.replace(['Countryside', 'Country'], 'Rural')
df_processed = df_processed.replace(['City', 'Urban'], 'Non-rural')

In [288]:
df3.Rural.unique()

array(['Rural', 'Non-rural'], dtype=object)

## Obtaining API data

In [289]:
def obtain_API(job):
    """
    Make a request to the API and receive the json where the job title is stored
    """
    
    response = requests.get(f'http://api.dataatwork.org/v1/jobs/{job}')
    
    results = response.json()
    
    if pd.isna(job):
        return 'Not job found'
    else:
        return results

In [290]:
df_processed['job_extracted'] = df_processed.head(20).apply(lambda x: obtain_API(x['Job Title']), axis=1)

In [291]:
def acces_job(job):
    """
    Get the job title stored in data obtained from the API
    """
    
    if 'title' in job:
        return job['title']
    else:
        return job

In [292]:
df_processed['Job Title'] = df_processed.head(20).apply(lambda x: acces_job(x.job_extracted), axis=1)

In [293]:
df_API_collected = df_processed.drop(columns='job_extracted')

## DATA ANALYSIS

In [295]:
df_quantity_country = df_API_collected.groupby(['Country', 'Rural', 'Job Title'])['Job Title'].count().reset_index(name="Quantity")

In [296]:
df_quantity_country

Unnamed: 0,Country,Rural,Job Title,Quantity
0,AT,Non-rural,Automatic Data Processing Planner,1
1,AT,Non-rural,Crime Data Specialist,1
2,AT,Non-rural,Data Communications Software Consultant,1
3,AT,Non-rural,Data Security Analyst,1
4,AT,Non-rural,Database Architect,1
5,AT,Non-rural,Database Developer,1
6,AT,Non-rural,Geographic Information Systems Database Admini...,1
7,AT,Non-rural,Not job found,6
8,AT,Rural,Data Coordinator,1
9,AT,Rural,Data Entry Specialist,1


In [297]:
def percentage(quantity):
    """
    calculate the percentage of a job (rural or non-rural) in a country with respect to the total of all countries 
    """
    
    percentage = quantity / df_quantity_country['Quantity'].sum() * 100
    
    return f'{int(percentage)}%'

In [298]:
df_quantity_country['Percentage'] = df_quantity_country.apply(lambda x: percentage(x.Quantity), axis=1)

In [299]:
df_quantity_country

Unnamed: 0,Country,Rural,Job Title,Quantity,Percentage
0,AT,Non-rural,Automatic Data Processing Planner,1,5%
1,AT,Non-rural,Crime Data Specialist,1,5%
2,AT,Non-rural,Data Communications Software Consultant,1,5%
3,AT,Non-rural,Data Security Analyst,1,5%
4,AT,Non-rural,Database Architect,1,5%
5,AT,Non-rural,Database Developer,1,5%
6,AT,Non-rural,Geographic Information Systems Database Admini...,1,5%
7,AT,Non-rural,Not job found,6,30%
8,AT,Rural,Data Coordinator,1,5%
9,AT,Rural,Data Entry Specialist,1,5%
