In [1]:
from data_collector import JobScraper
from data_transformer import DataTransformer, load_extracted_organizations
from connect_mongo import MongoDBConnector

from config import countries, pages, export_file_name, extracted_organizations_json, uri, db_name, collection_name

import json

In [2]:
def load_extracted_organizations(json_file):
    with open(f"{json_file}") as file:
        data = json.loads(file.read()) 
        return data

In [3]:
class DataTransformer:
    def __init__(self, extracted_data):
        self.extracted_data = extracted_data
        self.organization_centric_data = {}

    def transform_data(self):
        for country, organizations in self.extracted_data.items():
            for organization, job_list in organizations.items():
                if organization not in self.organization_centric_data:
                    self.organization_centric_data[organization] = {
                        "organization_name": organization, 
                        "countries": []
                    }
                
                    formatted_job_list = []
                    for job in job_list:
                        formatted_job_list.append({
                            "job_title": job[0],
                            "closing_date": job[1],
                            "job_description": job[2]
                        })

                country_job_data = {"country_name": country, "jobs": formatted_job_list}
                self.organization_centric_data[organization]["countries"].append(country_job_data)
                

    def get_transformed_data(self):
        return self.organization_centric_data

In [4]:
transformer = DataTransformer(load_extracted_organizations(extracted_organizations_json))
transformer.transform_data()
transformer.get_transformed_data().keys()

dict_keys(['IOM - International Organization for Migration', 'UNHCR - United Nations High Commissioner for Refugees', 'UNICEF - United Nations Children’s Fund', 'United Nations Mission for the Referendum in Western Sahara', 'Resident Coordinator System', 'UNDGC - United Nations Department of Global Communications', 'UNDP - United Nations Development Programme', 'FAO - Food and Agriculture Organization of the United Nations', 'ILO - International Labour Organization', 'MFO - Multinational Force & Observers', 'UNDSS - United Nations Department of Safety and Security', 'UNESCO - United Nations Educational, Scientific and Cultural Organization', 'UNHABITAT - United Nations Human Settlements Programme', 'UNIDO - United Nations Industrial Development Organization', 'UNV - United Nations Volunteers', 'UNWOMEN - United Nations Entity for Gender Equality and the Empowerment of Women', 'WFP - World Food Programme', 'WHO - World Health Organization', 'DRC - Danish Refugee Council', 'IRC - Interna

In [5]:
testdata = transformer.get_transformed_data()

In [15]:
testdata['DRC - Danish Refugee Council']['countries'][1]['jobs'][0]['job_description']

"\n\n\nProgramme Officer Social Protection\n\n\nJordan\n\npublished till: 2023-11-13\n\n\n\n\n\n\n\nAbout CBM\nCBM (registered as CBM Christoffel-Blindenmission Christian Blind Mission e.V.) is an international development organisation committed to improving the quality of life of persons with disabilities in the persons communities of the world. Based on our Christian values and more than 100 years of professional expertise, we address poverty as a cause and a consequence of disability. We work in partnership to create an inclusive society for all.\nTo expand our Eastern Mediterranean Team we are looking for a\n\n\xa0\xa0\xa0\xa0\xa0\xa0\xa0\xa0\xa0\xa0\xa0\xa0\xa0\xa0\xa0\xa0\xa0\xa0\xa0\xa0\xa0\xa0\xa0\xa0\xa0\xa0\xa0\xa0\xa0\xa0\xa0\xa0\xa0\xa0\xa0 Programme Officer Social Protection\n\nThe position is a fulltime position with\xa040\xa0h/week.\n\nThis position is only filled locally. With contractual conditions of the respective country. An expatriation is not intended.\n\n\n\nThes