In [1]:
import requests
import pandas as pd

def collect_world_bank_data(indicator, country='SN', start_year=1960, end_year=2024):
    url = f'http://api.worldbank.org/v2/country/{country}/indicator/{indicator}?format=json&date={start_year}:{end_year}'
    response = requests.get(url)
    data = response.json()[1]  # The data is in the second element of the response JSON
    
    records = [{'Year': entry['date'], indicator: entry['value']} for entry in data]
    df = pd.DataFrame(records)
    return df

gdp_per_capita_df = collect_world_bank_data('NY.GDP.PCAP.CD')
gdp_growth_df = collect_world_bank_data('NY.GDP.MKTP.KD.ZG')
inflation_df = collect_world_bank_data('FP.CPI.TOTL.ZG')

# Saving the collected data
gdp_per_capita_df.to_csv('../data/processed/senegal_gdp_per_capita.csv', index=False)
gdp_growth_df.to_csv('../data/processed/senegal_gdp_growth.csv', index=False)
inflation_df.to_csv('../data/processed/senegal_inflation.csv', index=False)




In [26]:
import requests
import pandas as pd

def collect_hdi_data():
    url = 'https://hdrdata.org/api/CompositeIndices/query?apikey=HDR-PAZVKzfcEOhHxPHMFkkzi4WyAbtTasB6&countryOrAggregation=SEN&year=2020'
    response = requests.get(url)
    
    # Check if the request was successful
    if response.status_code == 200:
        print("Raw Response:", response.text)  # Print the raw response text for inspection
        try:
            data = response.json()
            # Check if the response contains the expected data format
            if isinstance(data, list) and len(data) > 0:
                # Filter for HDI indicator data only
                records = [{'Year': entry.get('year'), 'HDI - Human Development Index': entry.get('value')}
                           for entry in data if 'hdi' in entry.get('indicator').lower()]
                
                if records:  # Ensure records are not empty
                    df = pd.DataFrame(records)
                    df.to_csv('../data/processed/senegal_hdi.csv', index=False)
                    print("HDI data saved to ../data/processed/senegal_hdi.csv")
                    return df
                else:
                    print("No HDI data found in the response.")
                    return None
            else:
                print("Unexpected data format or empty data received.")
                return None
        except ValueError as e:
            print("Failed to parse JSON response:", e)
            print("Response text:", response.text)
            return None
    else:
        print(f"Failed to retrieve data: HTTP {response.status_code}")
        print("Response text:", response.text)
        return None

# Example usage
hdi_df = collect_hdi_data()

Raw Response: [{"country":"SEN - Senegal","dimension":"","index":"GII - Gender Inequality Index","indicator":"abr - Adolescent Birth Rate (births per 1,000 women ages 15-19)","year":"2020","value":"67.617"},{"country":"SEN - Senegal","dimension":"","index":"PHDI - Planetary pressures–adjusted Human Development Index","indicator":"co2_prod - Carbon dioxide emissions per capita (production) (tonnes)","year":"2020","value":"0.658"},{"country":"SEN - Senegal","dimension":"","index":"IHDI - Inequality-adjusted Human Development Index","indicator":"coef_ineq - Coefficient of human inequality","year":"2020","value":"34.974"},{"country":"SEN - Senegal","dimension":"","index":"PHDI - Planetary pressures–adjusted Human Development Index","indicator":"diff_hdi_phdi - Difference from HDI value (%)","year":"2020","value":"2.724"},{"country":"SEN - Senegal","dimension":"","index":"HDI - Human Development Index","indicator":"eys - Expected Years of Schooling (years)","year":"2020","value":"9.053"},{"

In [27]:
import requests
import pandas as pd

def collect_world_bank_data(indicator, country='SN'):
    url = f'http://api.worldbank.org/v2/country/{country}/indicator/{indicator}?format=json&date=2000:2024&per_page=1000'
    response = requests.get(url)
    if response.status_code == 200:
        data = response.json()
        if len(data) > 1 and isinstance(data[1], list):
            records = [{'Year': entry['date'], indicator: entry['value']} for entry in data[1]]
            df = pd.DataFrame(records)
            return df
        else:
            print("No data available in the response.")
            return None
    else:
        print(f"Failed to retrieve data: HTTP {response.status_code}")
        return None

# Example usage
literacy_df = collect_world_bank_data('SE.ADT.LITR.ZS')
poverty_df = collect_world_bank_data('SI.POV.DDAY')

# Save data to CSV
if literacy_df is not None:
    literacy_df.to_csv('../data/processed/senegal_literacy.csv', index=False)
if poverty_df is not None:
    poverty_df.to_csv('../data/processed/senegal_poverty.csv', index=False)


In [28]:
import pandas as pd

# Population using at least basic sanitation services (%)
sanitation_data = {
    "Year": [2022, 2021, 2020, 2019, 2018, 2017, 2016, 2015, 2014, 2013, 2012, 2011, 2010, 2009, 2008],
    "Sanitation (%)": [60, 59, 58, 57, 56, 55, 54, 53, 52, 51, 50, 49, 47, 46, 45]
}
sanitation_df = pd.DataFrame(sanitation_data)

# GDP Growth Rate (NY.GDP.MKTP.KD.ZG)
gdp_growth_data = {
    "Year": [2023, 2022, 2021, 2020, 2019, 2018, 2017, 2016, 2015, 2014, 2013, 2012, 2011, 2010, 2009, 2008],
    "GDP Growth Rate (%)": [3.7, 3.8, 6.5, 1.3, 4.6, 6.2, 7.4, 6.4, 6.3, 6.2, 2.4, 4.0, 1.3, 3.4, 2.8, 3.7]
}
gdp_growth_df = pd.DataFrame(gdp_growth_data)

# GDP per Capita (NY.GDP.PCAP.CD)
gdp_per_capita_data = {
    "Year": [2023, 2022, 2021, 2020, 2019, 2018, 2017, 2016, 2015, 2014, 2013, 2012, 2011, 2010, 2009, 2008],
    "GDP per Capita (USD)": [1745.97, 1594.99, 1630.70, 1492.48, 1462.68, 1484.23, 1385.20, 1290.75, 1238.13, 1417.09, 1391.53, 1334.73, 1383.54, 1286.60, 1323.97, 1419.53]
}
gdp_per_capita_df = pd.DataFrame(gdp_per_capita_data)

# HDI Data
hdi_data = {
    "Year": [2020],
    "HDI - Human Development Index": [0.514]
}
hdi_df = pd.DataFrame(hdi_data)

# Inflation Rate (FP.CPI.TOTL.ZG)
inflation_rate_data = {
    "Year": [2023, 2022, 2021, 2020, 2019, 2018, 2017, 2016, 2015, 2014, 2013, 2012, 2011, 2010, 2009, 2008],
    "Inflation Rate (%)": [None, 9.7, 2.2, 2.5, 1.8, 0.5, 1.3, 0.8, 0.1, -1.1, 0.7, 1.4, 3.4, 1.2, -2.2, 7.3]
}
inflation_rate_df = pd.DataFrame(inflation_rate_data)

# Life Expectancy Data
life_expectancy_data = {
    "Year": [2019, 2015, 2010, 2000],
    "Life Expectancy (years)": [68.6, 67.2, 65.5, 58.6]
}
life_expectancy_df = pd.DataFrame(life_expectancy_data)

# Literacy Rate (SE.ADT.LITR.ZS)
literacy_rate_data = {
    "Year": [2023, 2022, 2017, 2013, 2011, 2009, 2006, 2002],
    "Literacy Rate (%)": [None, 57.67, 51.90, 43.53, 51.81, 48.04, 41.89, 39.28]
}
literacy_rate_df = pd.DataFrame(literacy_rate_data)

# Poverty Rate (SI.POV.DDAY)
poverty_rate_data = {
    "Year": [2023, 2021, 2018, 2011, 2005, 2001],
    "Poverty Rate (%)": [None, 9.9, 9.2, 41.0, 41.1, 52.4]
}
poverty_rate_df = pd.DataFrame(poverty_rate_data)


In [29]:
# Merge all dataframes on 'Year'
merged_df = sanitation_df.merge(gdp_growth_df, on='Year', how='outer') \
                         .merge(gdp_per_capita_df, on='Year', how='outer') \
                         .merge(hdi_df, on='Year', how='outer') \
                         .merge(inflation_rate_df, on='Year', how='outer') \
                         .merge(life_expectancy_df, on='Year', how='outer') \
                         .merge(literacy_rate_df, on='Year', how='outer') \
                         .merge(poverty_rate_df, on='Year', how='outer')

# Sort the merged dataframe by Year
merged_df = merged_df.sort_values('Year')

# Save to a CSV file
merged_df.to_csv('../data/processed/senegal_combined_data.csv', index=False)
