In this file, we:

- Use the API to extract meta-data for every project

**Result: project_data.csv**

In [8]:
import requests
import numpy as np

ids = np.arange(1, 1500, 1)

# API Url
base_url = "https://www.kfw-entwicklungsbank.de/kfw-ideal-service/api/projects/detail/DE/highRes/"

# Function to get project details for a given ID
def get_project_details(project_id):
    url = f"{base_url}{project_id}"
    response = requests.get(url)
    if response.status_code == 200:
        try:
            return response.json()
        except ValueError:
            print(f"Failed to decode JSON for ID {project_id}. Response content: {response.content}")
            return None
    else:
        print(f"Failed to retrieve data for ID {project_id}. Status code: {response.status_code}")
        return None

# Function to calculate the percentages of each indicator/status
def calculate_status_percentages(indicators):
    total_goals = len(indicators)
    if total_goals == 0:
        return (0, 0, 0)  # Handle case with no goals

    status_counts = {"erfüllt": 0, "teils erfüllt": 0, "nicht erfüllt": 0}
    for indicator in indicators:
        status = indicator["status"].lower()
        if status in status_counts:
            status_counts[status] += 1

    # Calculate percentages
    percent_fully = status_counts["erfüllt"] / total_goals * 100
    percent_partial = status_counts["teils erfüllt"] / total_goals * 100
    percent_not = status_counts["nicht erfüllt"] / total_goals * 100

    return percent_fully, percent_partial, percent_not


project_name_list = []
country_list = []
year_list = []
sector_list = []
subsector_list = []
investment_list = []
fund_source_list = []
carrier_type_list = []
overall_list = []
relevance_list = []
coherence_list = []
effectivity_list = []
efficiency_list = []
impact_list = []
sustainability_list = []
url_list = []
outcome_fully = []
outcome_partially = []
outcome_not = []

for project_id in ids:
    print(project_id,"out of",ids)
    project_details = get_project_details(project_id)
    if project_details:

        project_name = project_details.get('projectName', 'N/A')
        country = project_details.get('country', 'N/A')
        year = project_details.get('year', 'N/A')
        sector = project_details.get('sector', 'N/A')
        subsector = project_details.get('sector2', 'N/A')
        investment = project_details.get('investment', 'N/A')
        fund_source = project_details.get('sourceOfFounds', 'N/A')
        carrier_type = project_details.get('carrierType', 'N/A')
        rating = project_details.get('rating', {})
        overall = rating.get('overall', 'N/A')
        relevance = rating.get('relevance', 'N/A')
        coherence = rating.get('coherence', 'N/A')
        effectivity = rating.get('effectivity', 'N/A')
        efficiency = rating.get('efficiency', 'N/A')
        impact = rating.get('impact', 'N/A')
        sustainability = rating.get('sustainability', 'N/A')
        url = project_details.get('reportUrl', 'N/A')

        indicators = project_details.get('outcome', {})
        indicators = indicators.get('indicators', [])
        percent_fully, percent_partial, percent_not = calculate_status_percentages(indicators)

        project_name_list.append(project_name)
        country_list.append(country)
        year_list.append(year)
        sector_list.append(sector)
        subsector_list.append(subsector)
        investment_list.append(investment)
        fund_source_list.append(fund_source)
        carrier_type_list.append(carrier_type)
        overall_list.append(overall)
        relevance_list.append(relevance)
        coherence_list.append(coherence)
        effectivity_list.append(effectivity)
        efficiency_list.append(efficiency)
        impact_list.append(impact)
        sustainability_list.append(sustainability)
        url_list.append(url)

        outcome_fully.append(percent_fully)
        outcome_partially.append(percent_partial)
        outcome_not.append(percent_not)


1 out of [   1    2    3 ... 1497 1498 1499]
2 out of [   1    2    3 ... 1497 1498 1499]
3 out of [   1    2    3 ... 1497 1498 1499]
4 out of [   1    2    3 ... 1497 1498 1499]
5 out of [   1    2    3 ... 1497 1498 1499]
6 out of [   1    2    3 ... 1497 1498 1499]
7 out of [   1    2    3 ... 1497 1498 1499]
8 out of [   1    2    3 ... 1497 1498 1499]
9 out of [   1    2    3 ... 1497 1498 1499]
10 out of [   1    2    3 ... 1497 1498 1499]
11 out of [   1    2    3 ... 1497 1498 1499]
12 out of [   1    2    3 ... 1497 1498 1499]
13 out of [   1    2    3 ... 1497 1498 1499]
14 out of [   1    2    3 ... 1497 1498 1499]
15 out of [   1    2    3 ... 1497 1498 1499]
16 out of [   1    2    3 ... 1497 1498 1499]
17 out of [   1    2    3 ... 1497 1498 1499]
18 out of [   1    2    3 ... 1497 1498 1499]
19 out of [   1    2    3 ... 1497 1498 1499]
20 out of [   1    2    3 ... 1497 1498 1499]
21 out of [   1    2    3 ... 1497 1498 1499]
22 out of [   1    2    3 ... 1497 1498 149

In [9]:
import pandas as pd

data = {
    "project_name": project_name_list,
    "country": country_list,
    "year": year_list,
    "sector": sector_list,
    "subsector": subsector_list,
    "investment": investment_list,
    "fund_source": fund_source_list,
    "carrier_type": carrier_type_list,
    "overall_rating": overall_list,
    "relevance": relevance_list,
    "coherence": coherence_list,
    "effectivity": effectivity_list,
    "efficiency": efficiency_list,
    "impact": impact_list,
    "sustainability": sustainability_list,
    "url": url_list,
    "outcome_fully": outcome_fully,
    "outcome_partially": outcome_partially,
    "outcome_not": outcome_not
}

project_data = pd.DataFrame(data)

In [10]:
# Add a column for pdf name from url column
project_data['pdf'] = project_data['url'].str.extract(r'/([^/]+\.pdf)$')

In [11]:
project_data.head()

Unnamed: 0,project_name,country,year,sector,subsector,investment,fund_source,carrier_type,overall_rating,relevance,coherence,effectivity,efficiency,impact,sustainability,url,outcome_fully,outcome_partially,outcome_not,pdf
0,Renaturierung und nachhaltiges Management von ...,Ukraine,2019,Umweltschutz,Ressourcenschutz inkl. Wasser,2678000.0,"Bundesministerium für Umwelt, Naturschutz, nuk...",NRO,5,4.0,4.0,5.0,5.0,5,4.0,https://www.kfw-entwicklungsbank.de/PDF/Evalui...,0.0,0.0,100.0,IKI_Ukraine_Mooren_2019_D.pdf
1,Sektorreformprogramm Umwelt (PGF),Peru,2018,Umweltschutz,Ressourcenschutz inkl. Wasser,87000000.0,Bundesministerium für wirtschaftliche Zusammen...,Staatlich,2,2.0,,2.0,3.0,2,2.0,https://www.kfw-entwicklungsbank.de/PDF/Evalui...,80.0,20.0,0.0,Peru_SBHUmwelt_2018_D.pdf
2,Schutzgebiete <span lang='en'>Guiana Shield</s...,Lateinamerika (regional),2017,Umweltschutz,Ressourcenschutz inkl. Wasser,2970000.0,"Bundesministerium für Umwelt, Naturschutz, nuk...",NRO,3,3.0,3.0,3.0,3.0,3,3.0,https://www.kfw-entwicklungsbank.de/PDF/Evalui...,16.666667,83.333333,0.0,IKI_Guyana_Shield_Initiative_2017_D.pdf
3,Tropenwaldschutz,Guyana,2015,Umweltschutz,Ressourcenschutz inkl. Wasser,2750000.0,Bundesministerium für wirtschaftliche Zusammen...,Staatlich,3,2.0,,3.0,2.0,3,3.0,https://www.kfw-entwicklungsbank.de/PDF/Evalui...,33.333333,50.0,16.666667,Guyana_Tropenwaldschutz_2015_D.pdf
4,Bwabwata Mudumu Mamili National Parks (BMMP) II,Namibia,2018,Umweltschutz,Ressourcenschutz inkl. Wasser,4410000.0,Bundesministerium für wirtschaftliche Zusammen...,Staatlich,2,2.0,,1.0,2.0,1,2.0,https://www.kfw-entwicklungsbank.de/PDF/Evalui...,75.0,25.0,0.0,Namibia_BMMP_2018_D.pdf


In [12]:
project_data.to_csv("project_data.csv", index=False)