## **Dependencies**

In [73]:
import pandas as pd
from tqdm.notebook import tqdm
import numpy as np

## **Load JSON**

Choose respective json file to transform to data frame
Json be found in /src/response_[ORG]

### Define Params

In [74]:
##########
# PARAMS #
##########

abbreviation = "bmz" # abbreviation of development bank
iati_orga_id = "DE-1"
orga_full_name = "Bundesministerium für wirtschaftliche Zusammenarbeit und Entwicklung"
response_folder = "../../../src/responses/"
transformed_folder = "../../../src/transformed/"
response_file = response_folder + f"response_{abbreviation}.json" # json format -> fetched in iati.ipynb
output_file = transformed_folder + f"transformed_{abbreviation}.csv"

In [75]:
#Read in data into df from json
df = pd.read_json(response_file)
df.head(1)

Unnamed: 0,sector_code,iati_identifier,title_narrative,document_link_url,reporting_org_ref,sector_vocabulary,activity_date_type,activity_status_code,description_narrative,last_updated_datetime,participating_org_ref,activity_date_iso_date,recipient_country_code,location_name_narrative,reporting_org_narrative,title_narrative_xml_lang,participating_org_narrative,description_narrative_xml_lang,contact_info_organisation_narrative,recipient_region_code
0,[41010],DE-1-201420207,"[Umwelt- und Klimagovernance, Environmental an...",[https://www.giz.de/projektdaten/projects.acti...,DE-1,[1],"[1, 2, 3, 4]",3,"[Öffentliche, private und zivile Akteure setze...",2024-02-29T00:00:00Z,"[XM-DAC-5-52, XM-DAC-5-52, DE-1]","[2015-09-08T00:00:00Z, 2015-09-08T00:00:00Z, 2...",[MA],[Rabat],[Bundesministerium für wirtschaftliche Zusamme...,"[de, en]",[Deutsche Gesellschaft für Internationale Zusa...,"[de, en, de, en]",[Bundesministerium für wirtschaftliche Zusamme...,


In [76]:
print(f"Entries: {len(df)}")
df.reporting_org_ref.value_counts()

Entries: 31628


DE-1    31628
Name: reporting_org_ref, dtype: int64

In [77]:
# create new empty df to fill with transformed data

trans_df = pd.DataFrame()

## **Feature Transformation & Engineering**

### IATI ID

In [78]:
trans_df["iati_id"] = df["iati_identifier"].values

trans_df.head(1)

Unnamed: 0,iati_id
0,DE-1-201420207


### IATI Organization Identifyer

In [79]:
trans_df["iati_orga_id"] = iati_orga_id

### Organization Abbeviation

In [80]:
trans_df["orga_abbreviation"] = abbreviation

### Orga full Name

In [81]:
trans_df["orga_full_name"] = orga_full_name

### Orga Project Number

In [82]:
"""
try:
    project_number = iati_orga_id.split("-")[2]

    trans_df["project_number"] = project_number
except:
    trans_df["project_number"] = "NaN"
"""

'\ntry:\n    project_number = iati_orga_id.split("-")[2]\n\n    trans_df["project_number"] = project_number\nexcept:\n    trans_df["project_number"] = "NaN"\n'

### Secondary Organization

#### Auswärtiges Amt

In [83]:
trans_df["client"] = abbreviation.upper()

# For Auswärtiges Amt
if iati_orga_id == "XM-DAC-5-7":
    for index, row in df.iterrows():
        part_orga_row = row['participating_org_narrative'][0]
        try:
            if part_orga_row == "Deutsche Gesellschaft für Internationale Zusammenarbeit (GIZ) GmbH":
                trans_df["orga_abbreviation"][index] = "giz"
                trans_df["orga_full_name"][index] = "Deutsche Gesellschaft für Internationale Zusammenarbeit GmbH"
            elif part_orga_row == "Kreditanstalt für Wiederaufbau":
                trans_df["orga_abbreviation"][index] = "kfw"
                trans_df["orga_full_name"][index] = "Kreditanstalt für Wiederaufbau"
            # AA Other
            else:
                trans_df["orga_abbreviation"][index] = "aa-other"
                trans_df["orga_full_name"][index] = "Auswärtiges Amt - Other"

        except:
            pass

#### BMZ

- GIZ: XM-DAC-5-52
- KfW: XM-DAC-5-2

In [84]:
# For BMZ
if iati_orga_id == "DE-1":
    for index, row in df.iterrows():
        part_orga_row = row['participating_org_ref']
        try:
            # GIZ
            if "XM-DAC-5-52" in part_orga_row:
                trans_df["orga_abbreviation"][index] = "giz"
                trans_df["orga_full_name"][index] = "Deutsche Gesellschaft für Internationale Zusammenarbeit GmbH"
            # KfW
            elif "XM-DAC-5-2" in part_orga_row:
                trans_df["orga_abbreviation"][index] = "kfw"
                trans_df["orga_full_name"][index] = "Kreditanstalt für Wiederaufbau"

        except Exception as e:
            print(f"error: {e}")

#### NON-BMZ-GIZ

In [85]:
if iati_orga_id == "XM-DAC-5-52":
    trans_df["orga_abbreviation"] = "giz"
    trans_df["orga_full_name"] = "Deutsche Gesellschaft für Internationale Zusammenarbeit GmbH"

### Organization

In [86]:
trans_df['organization'] = df['reporting_org_narrative'].apply(lambda x: x[0])

print(trans_df.organization.value_counts())
trans_df.head(2)

Bundesministerium für wirtschaftliche Zusammenarbeit und Entwicklung (BMZ)    31588
Federal Ministry for Economic Cooperation and Development (BMZ)                  40
Name: organization, dtype: int64


Unnamed: 0,iati_id,iati_orga_id,orga_abbreviation,orga_full_name,client,organization
0,DE-1-201420207,DE-1,giz,Deutsche Gesellschaft für Internationale Zusam...,BMZ,Bundesministerium für wirtschaftliche Zusammen...
1,DE-1-201516970-0,DE-1,aa-other,Auswärtiges Amt - Other,BMZ,Bundesministerium für wirtschaftliche Zusammen...


### Title

#### Title EN

In [87]:
############
# EN title #
############

pbar = tqdm(total=len(df))
trans_df["title_en"] = "NaN"

for index, row in df.iterrows():
    title_row = row['title_narrative']
    try:
        if 'title_narrative_xml_lang' in df.columns:
            lang_list = row['title_narrative_xml_lang']

            # nan in pandas is type float
            # check if nan and if yes take first entry in lang
            if isinstance(lang_list, float):
                if isinstance(title_row, float):
                    trans_df["title_en"][index] = "NaN"
                else:
                    trans_df["title_en"][index] = title_row[0]
            elif len(lang_list) == len(title_row):
                for j in range(0, len(lang_list)):
                    if "en" or "EN" in lang_list:
                        if lang_list[j].lower() == "en":
                            title = title_row[j]
                            trans_df["title_en"][index] = title
                    else:
                        trans_df["title_en"][index] = "NaN"
            else:
                pass
        else:
            trans_df["title_en"][index] = row['title_narrative'][0]
    except:
        print(f"Error: Index: {index}, Row: {lang_list}, {title_row}")
    
    pbar.update(1)

pbar.close()

trans_df.head(1)

  0%|          | 0/31628 [00:00<?, ?it/s]

Unnamed: 0,iati_id,iati_orga_id,orga_abbreviation,orga_full_name,client,organization,title_en
0,DE-1-201420207,DE-1,giz,Deutsche Gesellschaft für Internationale Zusam...,BMZ,Bundesministerium für wirtschaftliche Zusammen...,Environmental and climate governance


#### Title Other

In [88]:
###################
# Add other title #
###################

pbar = tqdm(total=len(df))

trans_df["title_other"] = "NaN"

for index, row in df.iterrows():
    title_row = row['title_narrative']
    try:
        if 'title_narrative_xml_lang' in df.columns:
            lang_list = row['title_narrative_xml_lang']

            # every title which is has no lang attribute is classified as English and therefore not in other
            if isinstance(lang_list, float):
                trans_df["title_other"][index] = "NaN"
            elif len(lang_list) == len(title_row):
                for j in range(0, len(lang_list)):
                    if lang_list[j].lower() != "en":
                        title = row['title_narrative'][j]
                        if trans_df["title_other"][index] == "NaN":
                            trans_df["title_other"][index] = title
                        else:
                            trans_df["title_other"][index] = f"{trans_df['title_other'][index]}; {title}"
            else:
                trans_df["title_other"][index] = title
            
            pbar.update(1)

        else:
            trans_df["title_other"][index] = "NaN"
        
    except Exception as e:
        print(f"Error: Index: {index} \n Row: {row} \n Exception: {e}")

pbar.close()

trans_df.head(5)

  0%|          | 0/31628 [00:00<?, ?it/s]

Unnamed: 0,iati_id,iati_orga_id,orga_abbreviation,orga_full_name,client,organization,title_en,title_other
0,DE-1-201420207,DE-1,giz,Deutsche Gesellschaft für Internationale Zusam...,BMZ,Bundesministerium für wirtschaftliche Zusammen...,Environmental and climate governance,Umwelt- und Klimagovernance
1,DE-1-201516970-0,DE-1,aa-other,Auswärtiges Amt - Other,BMZ,Bundesministerium für wirtschaftliche Zusammen...,Acquisition of a chateau d´eau for water stock,Anschaffung eines Wasserturms zur Wasserspeich...
2,DE-1-201601228-1705,DE-1,aa-other,Auswärtiges Amt - Other,BMZ,Bundesministerium für wirtschaftliche Zusammen...,Multisectoral food and nutrition security for ...,Multisektorale Ernährungssicherung für junge K...
3,DE-1-201674324-0,DE-1,aa-other,Auswärtiges Amt - Other,BMZ,Bundesministerium für wirtschaftliche Zusammen...,Enviromental education and participative devel...,Umwelterziehung in Bezug auf die Ressource Was...
4,DE-1-201515741-0,DE-1,aa-other,Auswärtiges Amt - Other,BMZ,Bundesministerium für wirtschaftliche Zusammen...,Construction of a Lycée (secondary II) at Nado...,Bau eines Lycée (Gymnasium) in Nadoba in Togo


#### Main Title

In [89]:
trans_df['title_main'] = trans_df["title_en"]
trans_df.loc[trans_df['title_main'] == "NaN", 'title_main'] = trans_df['title_other']

trans_df.head(5)

Unnamed: 0,iati_id,iati_orga_id,orga_abbreviation,orga_full_name,client,organization,title_en,title_other,title_main
0,DE-1-201420207,DE-1,giz,Deutsche Gesellschaft für Internationale Zusam...,BMZ,Bundesministerium für wirtschaftliche Zusammen...,Environmental and climate governance,Umwelt- und Klimagovernance,Environmental and climate governance
1,DE-1-201516970-0,DE-1,aa-other,Auswärtiges Amt - Other,BMZ,Bundesministerium für wirtschaftliche Zusammen...,Acquisition of a chateau d´eau for water stock,Anschaffung eines Wasserturms zur Wasserspeich...,Acquisition of a chateau d´eau for water stock
2,DE-1-201601228-1705,DE-1,aa-other,Auswärtiges Amt - Other,BMZ,Bundesministerium für wirtschaftliche Zusammen...,Multisectoral food and nutrition security for ...,Multisektorale Ernährungssicherung für junge K...,Multisectoral food and nutrition security for ...
3,DE-1-201674324-0,DE-1,aa-other,Auswärtiges Amt - Other,BMZ,Bundesministerium für wirtschaftliche Zusammen...,Enviromental education and participative devel...,Umwelterziehung in Bezug auf die Ressource Was...,Enviromental education and participative devel...
4,DE-1-201515741-0,DE-1,aa-other,Auswärtiges Amt - Other,BMZ,Bundesministerium für wirtschaftliche Zusammen...,Construction of a Lycée (secondary II) at Nado...,Bau eines Lycée (Gymnasium) in Nadoba in Togo,Construction of a Lycée (secondary II) at Nado...


### Country

In [90]:
country_codes_df = pd.read_csv("../../../src/codelists/country_codes_ISO3166-1alpha-2.csv")
country_codes_df.head(2)

Unnamed: 0,Country,Alpha-2 code,Alpha-3 code,Numeric code,Latitude (average),Longitude (average)
0,Afghanistan,"""AF""","""AFG""","""4""","""33""","""65"""
1,Åland Islands,"""AX""","""ALA""","""248""","""60.116667""","""19.9"""


#### Read in country code csv

In [91]:
trans_df["country_code"] = df["recipient_country_code"]
trans_df["country"] = "NaN"

for index, row in df.iterrows():
    country_list = row["recipient_country_code"]

    if isinstance(country_list, float):
        trans_df["country"][index] = "NaN"
    else:
        country_str = ""
        for i in country_list:
            country_str += f"{i}; "
        
        trans_df["country"][index] = country_str

trans_df.head(5)

Unnamed: 0,iati_id,iati_orga_id,orga_abbreviation,orga_full_name,client,organization,title_en,title_other,title_main,country_code,country
0,DE-1-201420207,DE-1,giz,Deutsche Gesellschaft für Internationale Zusam...,BMZ,Bundesministerium für wirtschaftliche Zusammen...,Environmental and climate governance,Umwelt- und Klimagovernance,Environmental and climate governance,[MA],MA;
1,DE-1-201516970-0,DE-1,aa-other,Auswärtiges Amt - Other,BMZ,Bundesministerium für wirtschaftliche Zusammen...,Acquisition of a chateau d´eau for water stock,Anschaffung eines Wasserturms zur Wasserspeich...,Acquisition of a chateau d´eau for water stock,[SN],SN;
2,DE-1-201601228-1705,DE-1,aa-other,Auswärtiges Amt - Other,BMZ,Bundesministerium für wirtschaftliche Zusammen...,Multisectoral food and nutrition security for ...,Multisektorale Ernährungssicherung für junge K...,Multisectoral food and nutrition security for ...,[MW],MW;
3,DE-1-201674324-0,DE-1,aa-other,Auswärtiges Amt - Other,BMZ,Bundesministerium für wirtschaftliche Zusammen...,Enviromental education and participative devel...,Umwelterziehung in Bezug auf die Ressource Was...,Enviromental education and participative devel...,[EC],EC;
4,DE-1-201515741-0,DE-1,aa-other,Auswärtiges Amt - Other,BMZ,Bundesministerium für wirtschaftliche Zusammen...,Construction of a Lycée (secondary II) at Nado...,Bau eines Lycée (Gymnasium) in Nadoba in Togo,Construction of a Lycée (secondary II) at Nado...,[TG],TG;


### Region

In [92]:
trans_df['region'] = df['recipient_region_code']
trans_df.head(5)

Unnamed: 0,iati_id,iati_orga_id,orga_abbreviation,orga_full_name,client,organization,title_en,title_other,title_main,country_code,country,region
0,DE-1-201420207,DE-1,giz,Deutsche Gesellschaft für Internationale Zusam...,BMZ,Bundesministerium für wirtschaftliche Zusammen...,Environmental and climate governance,Umwelt- und Klimagovernance,Environmental and climate governance,[MA],MA;,
1,DE-1-201516970-0,DE-1,aa-other,Auswärtiges Amt - Other,BMZ,Bundesministerium für wirtschaftliche Zusammen...,Acquisition of a chateau d´eau for water stock,Anschaffung eines Wasserturms zur Wasserspeich...,Acquisition of a chateau d´eau for water stock,[SN],SN;,
2,DE-1-201601228-1705,DE-1,aa-other,Auswärtiges Amt - Other,BMZ,Bundesministerium für wirtschaftliche Zusammen...,Multisectoral food and nutrition security for ...,Multisektorale Ernährungssicherung für junge K...,Multisectoral food and nutrition security for ...,[MW],MW;,
3,DE-1-201674324-0,DE-1,aa-other,Auswärtiges Amt - Other,BMZ,Bundesministerium für wirtschaftliche Zusammen...,Enviromental education and participative devel...,Umwelterziehung in Bezug auf die Ressource Was...,Enviromental education and participative devel...,[EC],EC;,
4,DE-1-201515741-0,DE-1,aa-other,Auswärtiges Amt - Other,BMZ,Bundesministerium für wirtschaftliche Zusammen...,Construction of a Lycée (secondary II) at Nado...,Bau eines Lycée (Gymnasium) in Nadoba in Togo,Construction of a Lycée (secondary II) at Nado...,[TG],TG;,


### Location

In [93]:
try: 
    if 'title_narrative_xml_lang' in df.columns:
        trans_df['location'] = df['location_name_narrative']
    else:
        trans_df['location'] = "NaN"
except:
        trans_df['location'] = "NaN"    
trans_df.head(5)

Unnamed: 0,iati_id,iati_orga_id,orga_abbreviation,orga_full_name,client,organization,title_en,title_other,title_main,country_code,country,region,location
0,DE-1-201420207,DE-1,giz,Deutsche Gesellschaft für Internationale Zusam...,BMZ,Bundesministerium für wirtschaftliche Zusammen...,Environmental and climate governance,Umwelt- und Klimagovernance,Environmental and climate governance,[MA],MA;,,[Rabat]
1,DE-1-201516970-0,DE-1,aa-other,Auswärtiges Amt - Other,BMZ,Bundesministerium für wirtschaftliche Zusammen...,Acquisition of a chateau d´eau for water stock,Anschaffung eines Wasserturms zur Wasserspeich...,Acquisition of a chateau d´eau for water stock,[SN],SN;,,[Dakar]
2,DE-1-201601228-1705,DE-1,aa-other,Auswärtiges Amt - Other,BMZ,Bundesministerium für wirtschaftliche Zusammen...,Multisectoral food and nutrition security for ...,Multisektorale Ernährungssicherung für junge K...,Multisectoral food and nutrition security for ...,[MW],MW;,,[Lilongwe]
3,DE-1-201674324-0,DE-1,aa-other,Auswärtiges Amt - Other,BMZ,Bundesministerium für wirtschaftliche Zusammen...,Enviromental education and participative devel...,Umwelterziehung in Bezug auf die Ressource Was...,Enviromental education and participative devel...,[EC],EC;,,[Quito]
4,DE-1-201515741-0,DE-1,aa-other,Auswärtiges Amt - Other,BMZ,Bundesministerium für wirtschaftliche Zusammen...,Construction of a Lycée (secondary II) at Nado...,Bau eines Lycée (Gymnasium) in Nadoba in Togo,Construction of a Lycée (secondary II) at Nado...,[TG],TG;,,[Lome]


### Descriptions

descr1, descri2, ... can have overlap. Doppelte Beschriebungen entfernen! Es gibt lib um overlap heruaszufinden!

In [94]:
#################################
# Description english and other #
#################################

trans_df["description_en"] = "NaN"
trans_df["description_other"] = "NaN"

for index, row in df.iterrows():

    try:
        if 'description_narrative_xml_lang' in df.columns:
            descr_list = row['description_narrative_xml_lang'] # list with languages provided 
            descr_row = row['description_narrative'] # list with the despription narrative of all languages provided

            # nan in pandas is type float
            # check if nan and if yes take first entry in descr
            if isinstance(descr_list, float):
                if isinstance(descr_row, float):
                    trans_df["description_en"][index] = "NaN"
                else:
                    trans_df["description_en"][index] = descr_row[0]
            else:
                if len(descr_list) == len(descr_row):
                    descr_len = len(descr_list)
                else:
                    descr_len = len(descr_row)
                # iterate throug description list
                for j in range(0, descr_len):
                    # if description english
                    if descr_list[j].lower() == "en":
                        if type(descr_row) == float:
                            descr = "NaN"
                        else:
                            descr = descr_row[j]
                        if trans_df["description_en"][index] == "NaN":
                            trans_df["description_en"][index] = descr
                        else:
                            trans_df["description_en"][index] = f"{trans_df['description_en'][index]}; {descr}"
                    else:
                        if type(descr_row) == float:
                            descr = "NaN"
                        else:
                            descr = descr_row[j]
                        if trans_df["description_other"][index] == "NaN":
                            trans_df["description_other"][index] = descr
                        else:
                            trans_df["description_other"][index] = f"{trans_df['description_other'][index]}; {descr}"
        else:
            descr_str = ""
            for d in row['description_narrative']:
                descr_str += f"{d}; "
            trans_df["description_en"][index] = descr_str

    except Exception as e:
        print(f"⚠︎ Error {e}: Index: {index}")
        print(row)
        
trans_df.head(5)

Unnamed: 0,iati_id,iati_orga_id,orga_abbreviation,orga_full_name,client,organization,title_en,title_other,title_main,country_code,country,region,location,description_en,description_other
0,DE-1-201420207,DE-1,giz,Deutsche Gesellschaft für Internationale Zusam...,BMZ,Bundesministerium für wirtschaftliche Zusammen...,Environmental and climate governance,Umwelt- und Klimagovernance,Environmental and climate governance,[MA],MA;,,[Rabat],"Public, private and civil actors implement ele...","Öffentliche, private und zivile Akteure setzen..."
1,DE-1-201516970-0,DE-1,aa-other,Auswärtiges Amt - Other,BMZ,Bundesministerium für wirtschaftliche Zusammen...,Acquisition of a chateau d´eau for water stock,Anschaffung eines Wasserturms zur Wasserspeich...,Acquisition of a chateau d´eau for water stock,[SN],SN;,,[Dakar],Acquisition of a chateau d´eau for water stock...,Anschaffung eines Wasserturms zur Wasserspeich...
2,DE-1-201601228-1705,DE-1,aa-other,Auswärtiges Amt - Other,BMZ,Bundesministerium für wirtschaftliche Zusammen...,Multisectoral food and nutrition security for ...,Multisektorale Ernährungssicherung für junge K...,Multisectoral food and nutrition security for ...,[MW],MW;,,[Lilongwe],Multisectoral food and nutrition security for ...,Multisektorale Ernährungssicherung für junge K...
3,DE-1-201674324-0,DE-1,aa-other,Auswärtiges Amt - Other,BMZ,Bundesministerium für wirtschaftliche Zusammen...,Enviromental education and participative devel...,Umwelterziehung in Bezug auf die Ressource Was...,Enviromental education and participative devel...,[EC],EC;,,[Quito],Enviromental education and participative devel...,Umwelterziehung in Bezug auf die Ressource Was...
4,DE-1-201515741-0,DE-1,aa-other,Auswärtiges Amt - Other,BMZ,Bundesministerium für wirtschaftliche Zusammen...,Construction of a Lycée (secondary II) at Nado...,Bau eines Lycée (Gymnasium) in Nadoba in Togo,Construction of a Lycée (secondary II) at Nado...,[TG],TG;,,[Lome],Construction of a Lycée (secondary II) at Nado...,Bau eines Lycée (Gymnasium) in Nadoba in Togo;...


#### Main desription

In [95]:
trans_df['description_main'] = trans_df.description_en
trans_df.loc[trans_df['description_main'] == "NaN", 'description_main'] = trans_df['description_other']

trans_df.head(5)

Unnamed: 0,iati_id,iati_orga_id,orga_abbreviation,orga_full_name,client,organization,title_en,title_other,title_main,country_code,country,region,location,description_en,description_other,description_main
0,DE-1-201420207,DE-1,giz,Deutsche Gesellschaft für Internationale Zusam...,BMZ,Bundesministerium für wirtschaftliche Zusammen...,Environmental and climate governance,Umwelt- und Klimagovernance,Environmental and climate governance,[MA],MA;,,[Rabat],"Public, private and civil actors implement ele...","Öffentliche, private und zivile Akteure setzen...","Public, private and civil actors implement ele..."
1,DE-1-201516970-0,DE-1,aa-other,Auswärtiges Amt - Other,BMZ,Bundesministerium für wirtschaftliche Zusammen...,Acquisition of a chateau d´eau for water stock,Anschaffung eines Wasserturms zur Wasserspeich...,Acquisition of a chateau d´eau for water stock,[SN],SN;,,[Dakar],Acquisition of a chateau d´eau for water stock...,Anschaffung eines Wasserturms zur Wasserspeich...,Acquisition of a chateau d´eau for water stock...
2,DE-1-201601228-1705,DE-1,aa-other,Auswärtiges Amt - Other,BMZ,Bundesministerium für wirtschaftliche Zusammen...,Multisectoral food and nutrition security for ...,Multisektorale Ernährungssicherung für junge K...,Multisectoral food and nutrition security for ...,[MW],MW;,,[Lilongwe],Multisectoral food and nutrition security for ...,Multisektorale Ernährungssicherung für junge K...,Multisectoral food and nutrition security for ...
3,DE-1-201674324-0,DE-1,aa-other,Auswärtiges Amt - Other,BMZ,Bundesministerium für wirtschaftliche Zusammen...,Enviromental education and participative devel...,Umwelterziehung in Bezug auf die Ressource Was...,Enviromental education and participative devel...,[EC],EC;,,[Quito],Enviromental education and participative devel...,Umwelterziehung in Bezug auf die Ressource Was...,Enviromental education and participative devel...
4,DE-1-201515741-0,DE-1,aa-other,Auswärtiges Amt - Other,BMZ,Bundesministerium für wirtschaftliche Zusammen...,Construction of a Lycée (secondary II) at Nado...,Bau eines Lycée (Gymnasium) in Nadoba in Togo,Construction of a Lycée (secondary II) at Nado...,[TG],TG;,,[Lome],Construction of a Lycée (secondary II) at Nado...,Bau eines Lycée (Gymnasium) in Nadoba in Togo;...,Construction of a Lycée (secondary II) at Nado...


### Status

In [96]:
# https://iatistandard.org/en/iati-standard/203/codelists/activitystatus/
activity_status = {
    1: "Pipeline/identification",
    2: "Implementation",
    3: "Finalisation",
    4: "Closed",
    5: "Cancelled",
    6: "Suspended"
}

trans_df["status"] = df.activity_status_code
trans_df['status'] = trans_df['status'].replace(activity_status)

trans_df.head(2)
    

Unnamed: 0,iati_id,iati_orga_id,orga_abbreviation,orga_full_name,client,organization,title_en,title_other,title_main,country_code,country,region,location,description_en,description_other,description_main,status
0,DE-1-201420207,DE-1,giz,Deutsche Gesellschaft für Internationale Zusam...,BMZ,Bundesministerium für wirtschaftliche Zusammen...,Environmental and climate governance,Umwelt- und Klimagovernance,Environmental and climate governance,[MA],MA;,,[Rabat],"Public, private and civil actors implement ele...","Öffentliche, private und zivile Akteure setzen...","Public, private and civil actors implement ele...",Finalisation
1,DE-1-201516970-0,DE-1,aa-other,Auswärtiges Amt - Other,BMZ,Bundesministerium für wirtschaftliche Zusammen...,Acquisition of a chateau d´eau for water stock,Anschaffung eines Wasserturms zur Wasserspeich...,Acquisition of a chateau d´eau for water stock,[SN],SN;,,[Dakar],Acquisition of a chateau d´eau for water stock...,Anschaffung eines Wasserturms zur Wasserspeich...,Acquisition of a chateau d´eau for water stock...,Closed


### Date

In [97]:
#############
# Date Type #
#############

# One Hot
# 1 -> Yes
# 0 -> No

# Codes:
# 1 Planned start
# 2 Actual start
# 3 Planned end
# 4 Actual end

trans_df["planned_start"] = "NaN"
trans_df["actual_start"] = "NaN"
trans_df["planned_end"] = "NaN"
trans_df["actual_end"] = "NaN"

date_types = {
    1: "planned_start",
    2: "actual_start",
    3: "planned_end",
    4: "actual_end"
}

for index, row in df.iterrows():
    dtype_list = row["activity_date_type"]
    iso_date_list = row["activity_date_iso_date"]

    combined_list = list(zip(dtype_list, iso_date_list))

    # replace nums with column names from date_types
    combined_list = [(date_types[int(t[0])], t[1]) for t in combined_list]

    for i in combined_list:
        trans_df[i[0]] = i[1]

trans_df.head(1800)


Unnamed: 0,iati_id,iati_orga_id,orga_abbreviation,orga_full_name,client,organization,title_en,title_other,title_main,country_code,...,region,location,description_en,description_other,description_main,status,planned_start,actual_start,planned_end,actual_end
0,DE-1-201420207,DE-1,giz,Deutsche Gesellschaft für Internationale Zusam...,BMZ,Bundesministerium für wirtschaftliche Zusammen...,Environmental and climate governance,Umwelt- und Klimagovernance,Environmental and climate governance,[MA],...,,[Rabat],"Public, private and civil actors implement ele...","Öffentliche, private und zivile Akteure setzen...","Public, private and civil actors implement ele...",Finalisation,2012-05-01T00:00:00Z,2012-05-01T00:00:00Z,2015-04-30T00:00:00Z,2015-04-30T00:00:00Z
1,DE-1-201516970-0,DE-1,aa-other,Auswärtiges Amt - Other,BMZ,Bundesministerium für wirtschaftliche Zusammen...,Acquisition of a chateau d´eau for water stock,Anschaffung eines Wasserturms zur Wasserspeich...,Acquisition of a chateau d´eau for water stock,[SN],...,,[Dakar],Acquisition of a chateau d´eau for water stock...,Anschaffung eines Wasserturms zur Wasserspeich...,Acquisition of a chateau d´eau for water stock...,Closed,2012-05-01T00:00:00Z,2012-05-01T00:00:00Z,2015-04-30T00:00:00Z,2015-04-30T00:00:00Z
2,DE-1-201601228-1705,DE-1,aa-other,Auswärtiges Amt - Other,BMZ,Bundesministerium für wirtschaftliche Zusammen...,Multisectoral food and nutrition security for ...,Multisektorale Ernährungssicherung für junge K...,Multisectoral food and nutrition security for ...,[MW],...,,[Lilongwe],Multisectoral food and nutrition security for ...,Multisektorale Ernährungssicherung für junge K...,Multisectoral food and nutrition security for ...,Closed,2012-05-01T00:00:00Z,2012-05-01T00:00:00Z,2015-04-30T00:00:00Z,2015-04-30T00:00:00Z
3,DE-1-201674324-0,DE-1,aa-other,Auswärtiges Amt - Other,BMZ,Bundesministerium für wirtschaftliche Zusammen...,Enviromental education and participative devel...,Umwelterziehung in Bezug auf die Ressource Was...,Enviromental education and participative devel...,[EC],...,,[Quito],Enviromental education and participative devel...,Umwelterziehung in Bezug auf die Ressource Was...,Enviromental education and participative devel...,Finalisation,2012-05-01T00:00:00Z,2012-05-01T00:00:00Z,2015-04-30T00:00:00Z,2015-04-30T00:00:00Z
4,DE-1-201515741-0,DE-1,aa-other,Auswärtiges Amt - Other,BMZ,Bundesministerium für wirtschaftliche Zusammen...,Construction of a Lycée (secondary II) at Nado...,Bau eines Lycée (Gymnasium) in Nadoba in Togo,Construction of a Lycée (secondary II) at Nado...,[TG],...,,[Lome],Construction of a Lycée (secondary II) at Nado...,Bau eines Lycée (Gymnasium) in Nadoba in Togo;...,Construction of a Lycée (secondary II) at Nado...,Closed,2012-05-01T00:00:00Z,2012-05-01T00:00:00Z,2015-04-30T00:00:00Z,2015-04-30T00:00:00Z
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1795,DE-1-201622489,DE-1,giz,Deutsche Gesellschaft für Internationale Zusam...,BMZ,Bundesministerium für wirtschaftliche Zusammen...,Stärkung der ökologischen Konnektivität im Geb...,Stärkung der ökologischen Konnektivität im Geb...,Stärkung der ökologischen Konnektivität im Geb...,,...,[289],,State institutions and local actors in Cote d'...,Staatliche Institutionen und lokale Akteure in...,State institutions and local actors in Cote d'...,Implementation,2012-05-01T00:00:00Z,2012-05-01T00:00:00Z,2015-04-30T00:00:00Z,2015-04-30T00:00:00Z
1796,DE-1-201628254-0,DE-1,aa-other,Auswärtiges Amt - Other,BMZ,Bundesministerium für wirtschaftliche Zusammen...,Socio-Political Advice in Sub-Saharan Africa,Gesellschaftliche Beratung in Subsahara-Afrika...,Socio-Political Advice in Sub-Saharan Africa,,...,[289],,Socio-Political Advice in Sub-Saharan Africa; ...,Gesellschaftliche Beratung in Subsahara-Afrika...,Socio-Political Advice in Sub-Saharan Africa; ...,Finalisation,2012-05-01T00:00:00Z,2012-05-01T00:00:00Z,2015-04-30T00:00:00Z,2015-04-30T00:00:00Z
1797,DE-1-201628270-0,DE-1,aa-other,Auswärtiges Amt - Other,BMZ,Bundesministerium für wirtschaftliche Zusammen...,Regional Programme Sub-Saharan Afrika,Regionalprogramm Afrika südlich der Sahara,Regional Programme Sub-Saharan Afrika,,...,[289],,Regional Programme Sub-Saharan Afrika; The pro...,Regionalprogramm Afrika südlich der Sahara; Di...,Regional Programme Sub-Saharan Afrika; The pro...,Finalisation,2012-05-01T00:00:00Z,2012-05-01T00:00:00Z,2015-04-30T00:00:00Z,2015-04-30T00:00:00Z
1798,DE-1-201635044,DE-1,giz,Deutsche Gesellschaft für Internationale Zusam...,BMZ,Bundesministerium für wirtschaftliche Zusammen...,Studies and Experts Fund COMIFAC,Studien- und Fachkräftefonds COMIFAC,Studies and Experts Fund COMIFAC,,...,[289],,Preparation and appraisal of technical coopera...,Vorbereitung und Prüfung von Vorhaben der Tech...,Preparation and appraisal of technical coopera...,Finalisation,2012-05-01T00:00:00Z,2012-05-01T00:00:00Z,2015-04-30T00:00:00Z,2015-04-30T00:00:00Z


### Last Update

In [98]:
trans_df['last_update'] = df['last_updated_datetime']
trans_df.head(2)

Unnamed: 0,iati_id,iati_orga_id,orga_abbreviation,orga_full_name,client,organization,title_en,title_other,title_main,country_code,...,location,description_en,description_other,description_main,status,planned_start,actual_start,planned_end,actual_end,last_update
0,DE-1-201420207,DE-1,giz,Deutsche Gesellschaft für Internationale Zusam...,BMZ,Bundesministerium für wirtschaftliche Zusammen...,Environmental and climate governance,Umwelt- und Klimagovernance,Environmental and climate governance,[MA],...,[Rabat],"Public, private and civil actors implement ele...","Öffentliche, private und zivile Akteure setzen...","Public, private and civil actors implement ele...",Finalisation,2012-05-01T00:00:00Z,2012-05-01T00:00:00Z,2015-04-30T00:00:00Z,2015-04-30T00:00:00Z,2024-02-29T00:00:00Z
1,DE-1-201516970-0,DE-1,aa-other,Auswärtiges Amt - Other,BMZ,Bundesministerium für wirtschaftliche Zusammen...,Acquisition of a chateau d´eau for water stock,Anschaffung eines Wasserturms zur Wasserspeich...,Acquisition of a chateau d´eau for water stock,[SN],...,[Dakar],Acquisition of a chateau d´eau for water stock...,Anschaffung eines Wasserturms zur Wasserspeich...,Acquisition of a chateau d´eau for water stock...,Closed,2012-05-01T00:00:00Z,2012-05-01T00:00:00Z,2015-04-30T00:00:00Z,2015-04-30T00:00:00Z,2024-03-06T00:00:00Z


### Sector Codes (CRS & SGDs)

In [99]:
sector_codes = {
    1: "OECD DAC CRS Purpose Codes (5 digit)",
    2: "OECD DAC CRS Purpose Codes (3 digit)",
    3: "Classification of the Functions of Government (UN)",
    4: "Statistical classification of economic activities in the European Community",
    5: "National Taxonomy for Exempt Entities (USA)",
    6: "AidData",
    7: "SDG Goal",
    8: "SDG Target",
    9: "SDG Indicator",
    10: "Humanitarian Global Clusters (Inter-Agency Standing Committee)",
    11: "North American Industry Classification System (NAICS)",
    12: "UN System Function",
    99: "Reporting Organisation", # The sector reported corresponds to a sector vocabulary maintained by the reporting organisation for this activity
    98: "Reporting Organisation 2" # The sector reported corresponds to a sector vocabulary maintained by the reporting organisation for this activity (if they are referencing more than one)
}

In [100]:
crs5_df = pd.read_csv("../../../src/codelists/crs5_codes.csv")
crs5_df.head(2)

Unnamed: 0,code,name,description,language,category,category-name,category-description
0,11110,Education policy and administrative management,"Education sector policy, planning and programm...",en,111,"Education, Level Unspecified",The codes in this category are to be used only...
1,11120,Education facilities and training,"Educational buildings, equipment, materials; s...",en,111,"Education, Level Unspecified",The codes in this category are to be used only...


In [101]:
crs3_df = pd.read_csv("../../../src/codelists/crs3_codes.csv")
crs3_df.head(2)

Unnamed: 0,code,name,description,language,category,category-name,category-description
0,111,"Education, Level Unspecified",The codes in this category are to be used only...,en,,,
1,112,Basic Education,,en,,,


#### Get CRS5 & derive crs3 tags from crs5 tags

- other tags are not given by organizations in IATI data; need other ways to find out these tags


- It is possible to derive CRS3 tags from CRS5 tags by looking at the first 3 characters.

In [102]:
# to process different variants of codes (crs3, crs5 etc.)

def process_codes(combined_list, translation_df, code_index):
    code_text = ""
    codes_nums = ""
    if any(item[0] == code_index for item in combined_list):
        for i in combined_list:
            if i[0] == code_index:
                translation = translation_df.loc[translation_df['code'] == int(i[1]), 'name'].values[0]
                code_text += f"{translation}; "
                codes_nums += f"{i[1]}; "
        return code_text, codes_nums
    else:
        return "NaN", "NaN"
    
def derive_crs3(combined_list, translation_df, code_index="1"):
    code_text = ""
    codes_nums = ""
    if any(item[0] == code_index for item in combined_list):
        for i in combined_list:
            if i[0] == code_index:
                translation = translation_df.loc[translation_df['code'] == int(i[1][:3]), 'name'].values[0]
                code_text += f"{translation}; "
                codes_nums += f"{i[1][:3]}; "
        return code_text, codes_nums
    else:
        return "NaN", "NaN"

In [103]:
###############
# ECTRACT CRS #
###############
trans_df["crs_5_code"] = "NaN"
trans_df["crs_5_name"] = "NaN"

# Most Project dont have information on crs3 -> crs3 derived from crs5 tags
trans_df["crs_3_code"] = "NaN"
trans_df["crs_3_name"] = "NaN"


for index, row in df.iterrows():
    crs_voc_list = row['sector_vocabulary']
    crs_code_list = row['sector_code']

    if type(crs_voc_list) == float:
        pass
    else:
        try:
            combined_list = list(zip(crs_voc_list, crs_code_list))

            # CRS 5
            crs5_str, crs5_codes = process_codes(combined_list, crs5_df, "1")
            trans_df["crs_5_code"][index] = crs5_codes
            trans_df["crs_5_name"][index] = crs5_str
            
            # CRS 3
            crs3_str, crs3_codes = derive_crs3(combined_list, crs3_df)
            trans_df["crs_3_code"][index] = crs3_codes
            trans_df["crs_3_name"][index] = crs3_str

        except:
            print(f"Error on Index {index}, {crs_code_list}")
            pass

trans_df.head(5)

Unnamed: 0,iati_id,iati_orga_id,orga_abbreviation,orga_full_name,client,organization,title_en,title_other,title_main,country_code,...,status,planned_start,actual_start,planned_end,actual_end,last_update,crs_5_code,crs_5_name,crs_3_code,crs_3_name
0,DE-1-201420207,DE-1,giz,Deutsche Gesellschaft für Internationale Zusam...,BMZ,Bundesministerium für wirtschaftliche Zusammen...,Environmental and climate governance,Umwelt- und Klimagovernance,Environmental and climate governance,[MA],...,Finalisation,2012-05-01T00:00:00Z,2012-05-01T00:00:00Z,2015-04-30T00:00:00Z,2015-04-30T00:00:00Z,2024-02-29T00:00:00Z,41010;,Environmental policy and administrative manage...,410;,General Environment Protection;
1,DE-1-201516970-0,DE-1,aa-other,Auswärtiges Amt - Other,BMZ,Bundesministerium für wirtschaftliche Zusammen...,Acquisition of a chateau d´eau for water stock,Anschaffung eines Wasserturms zur Wasserspeich...,Acquisition of a chateau d´eau for water stock,[SN],...,Closed,2012-05-01T00:00:00Z,2012-05-01T00:00:00Z,2015-04-30T00:00:00Z,2015-04-30T00:00:00Z,2024-03-06T00:00:00Z,14030;,Basic drinking water supply and basic sanitati...,140;,Water Supply & Sanitation;
2,DE-1-201601228-1705,DE-1,aa-other,Auswärtiges Amt - Other,BMZ,Bundesministerium für wirtschaftliche Zusammen...,Multisectoral food and nutrition security for ...,Multisektorale Ernährungssicherung für junge K...,Multisectoral food and nutrition security for ...,[MW],...,Closed,2012-05-01T00:00:00Z,2012-05-01T00:00:00Z,2015-04-30T00:00:00Z,2015-04-30T00:00:00Z,2024-03-06T00:00:00Z,31120;,Agricultural development;,311;,Agriculture;
3,DE-1-201674324-0,DE-1,aa-other,Auswärtiges Amt - Other,BMZ,Bundesministerium für wirtschaftliche Zusammen...,Enviromental education and participative devel...,Umwelterziehung in Bezug auf die Ressource Was...,Enviromental education and participative devel...,[EC],...,Finalisation,2012-05-01T00:00:00Z,2012-05-01T00:00:00Z,2015-04-30T00:00:00Z,2015-04-30T00:00:00Z,2024-03-06T00:00:00Z,14015;,Water resources conservation (including data c...,140;,Water Supply & Sanitation;
4,DE-1-201515741-0,DE-1,aa-other,Auswärtiges Amt - Other,BMZ,Bundesministerium für wirtschaftliche Zusammen...,Construction of a Lycée (secondary II) at Nado...,Bau eines Lycée (Gymnasium) in Nadoba in Togo,Construction of a Lycée (secondary II) at Nado...,[TG],...,Closed,2012-05-01T00:00:00Z,2012-05-01T00:00:00Z,2015-04-30T00:00:00Z,2015-04-30T00:00:00Z,2024-03-06T00:00:00Z,11320;,Upper Secondary Education (modified and includ...,113;,Secondary Education;


### Documents

In [104]:
#df.document_link_url.value_counts()

try:
    trans_df['docs'] = df['document_link_url']
except:
    trans_df["docs"] = "NaN"
trans_df.head(2)

Unnamed: 0,iati_id,iati_orga_id,orga_abbreviation,orga_full_name,client,organization,title_en,title_other,title_main,country_code,...,planned_start,actual_start,planned_end,actual_end,last_update,crs_5_code,crs_5_name,crs_3_code,crs_3_name,docs
0,DE-1-201420207,DE-1,giz,Deutsche Gesellschaft für Internationale Zusam...,BMZ,Bundesministerium für wirtschaftliche Zusammen...,Environmental and climate governance,Umwelt- und Klimagovernance,Environmental and climate governance,[MA],...,2012-05-01T00:00:00Z,2012-05-01T00:00:00Z,2015-04-30T00:00:00Z,2015-04-30T00:00:00Z,2024-02-29T00:00:00Z,41010;,Environmental policy and administrative manage...,410;,General Environment Protection;,[https://www.giz.de/projektdaten/projects.acti...
1,DE-1-201516970-0,DE-1,aa-other,Auswärtiges Amt - Other,BMZ,Bundesministerium für wirtschaftliche Zusammen...,Acquisition of a chateau d´eau for water stock,Anschaffung eines Wasserturms zur Wasserspeich...,Acquisition of a chateau d´eau for water stock,[SN],...,2012-05-01T00:00:00Z,2012-05-01T00:00:00Z,2015-04-30T00:00:00Z,2015-04-30T00:00:00Z,2024-03-06T00:00:00Z,14030;,Basic drinking water supply and basic sanitati...,140;,Water Supply & Sanitation;,"[https://www.bmz.de/de/laender/senegal, https:..."


### Added title and description

In [105]:
trans_df["title_and_description"] = ""

for index, row in trans_df.iterrows():
    try:
        title_row = row['title_main']
        descr_row = row['description_main']

        trans_df["title_and_description"][index] = str(title_row + ". " + descr_row).replace(";", " ").replace("..", ".")
    except:
        trans_df["title_and_description"][index] = ""



## **Save final DF as CSV**

In [107]:
trans_df.head()

Unnamed: 0,iati_id,iati_orga_id,orga_abbreviation,orga_full_name,client,organization,title_en,title_other,title_main,country_code,...,actual_start,planned_end,actual_end,last_update,crs_5_code,crs_5_name,crs_3_code,crs_3_name,docs,title_and_description
0,DE-1-201420207,DE-1,giz,Deutsche Gesellschaft für Internationale Zusam...,BMZ,Bundesministerium für wirtschaftliche Zusammen...,Environmental and climate governance,Umwelt- und Klimagovernance,Environmental and climate governance,[MA],...,2012-05-01T00:00:00Z,2015-04-30T00:00:00Z,2015-04-30T00:00:00Z,2024-02-29T00:00:00Z,41010;,Environmental policy and administrative manage...,410;,General Environment Protection;,[https://www.giz.de/projektdaten/projects.acti...,"Environmental and climate governance. Public, ..."
1,DE-1-201516970-0,DE-1,aa-other,Auswärtiges Amt - Other,BMZ,Bundesministerium für wirtschaftliche Zusammen...,Acquisition of a chateau d´eau for water stock,Anschaffung eines Wasserturms zur Wasserspeich...,Acquisition of a chateau d´eau for water stock,[SN],...,2012-05-01T00:00:00Z,2015-04-30T00:00:00Z,2015-04-30T00:00:00Z,2024-03-06T00:00:00Z,14030;,Basic drinking water supply and basic sanitati...,140;,Water Supply & Sanitation;,"[https://www.bmz.de/de/laender/senegal, https:...",Acquisition of a chateau d´eau for water stock...
2,DE-1-201601228-1705,DE-1,aa-other,Auswärtiges Amt - Other,BMZ,Bundesministerium für wirtschaftliche Zusammen...,Multisectoral food and nutrition security for ...,Multisektorale Ernährungssicherung für junge K...,Multisectoral food and nutrition security for ...,[MW],...,2012-05-01T00:00:00Z,2015-04-30T00:00:00Z,2015-04-30T00:00:00Z,2024-03-06T00:00:00Z,31120;,Agricultural development;,311;,Agriculture;,"[https://www.bmz.de/de/laender/malawi, https:/...",Multisectoral food and nutrition security for ...
3,DE-1-201674324-0,DE-1,aa-other,Auswärtiges Amt - Other,BMZ,Bundesministerium für wirtschaftliche Zusammen...,Enviromental education and participative devel...,Umwelterziehung in Bezug auf die Ressource Was...,Enviromental education and participative devel...,[EC],...,2012-05-01T00:00:00Z,2015-04-30T00:00:00Z,2015-04-30T00:00:00Z,2024-03-06T00:00:00Z,14015;,Water resources conservation (including data c...,140;,Water Supply & Sanitation;,"[https://www.bmz.de/de/laender/ecuador, https:...",Enviromental education and participative devel...
4,DE-1-201515741-0,DE-1,aa-other,Auswärtiges Amt - Other,BMZ,Bundesministerium für wirtschaftliche Zusammen...,Construction of a Lycée (secondary II) at Nado...,Bau eines Lycée (Gymnasium) in Nadoba in Togo,Construction of a Lycée (secondary II) at Nado...,[TG],...,2012-05-01T00:00:00Z,2015-04-30T00:00:00Z,2015-04-30T00:00:00Z,2024-03-06T00:00:00Z,11320;,Upper Secondary Education (modified and includ...,113;,Secondary Education;,"[https://www.bmz.de/de/laender/togo, https://w...",Construction of a Lycée (secondary II) at Nado...


In [108]:
trans_df.to_csv(output_file, index=False) 