In [None]:
import requests
from pathlib import Path
from typing import Dict

In [16]:
def download_data(file_map: Dict[str, str], target_dir: str = "downloads") -> None:
    """
    Downloads required datasets into the specified directory.
    
    Args:
        file_map: A dictionary mapping filenames to their download URLs.
        target_dir: The local directory where files will be saved.
    """
    # Create the downloads directory
    path = Path(target_dir)
    path.mkdir(parents=True, exist_ok=True)

    # Browser headers to prevent 406/500 errors
    headers = {
        "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) "
                      "AppleWebKit/537.36 (KHTML, like Gecko) "
                      "Chrome/91.0.4472.124 Safari/537.36"
    }

    for filename, url in file_map.items():
        print(f"Fetching: {filename}...")
        try:
            # Execute the request
            response = requests.get(url, timeout=30, headers=headers)
            response.raise_for_status()
            
            # Define file path and save binary content
            file_path = path / filename
            with open(file_path, "wb") as f:
                f.write(response.content)
            print(f"Saved to {file_path}")
            
        except Exception as e:
            print(f"Failed to download {filename}: {e}")

In [None]:
# --- Testing ---
links = {
    "annual_forest_change.csv": "https://ourworldindata.org/grapher/annual-change-forest-area.csv?v=1&csvType=full&useColumnShortNames=true",
    "annual_deforestation.csv": "https://ourworldindata.org/grapher/annual-deforestation.csv?v=1&csvType=full&useColumnShortNames=true",
    "protected_land.csv": "https://ourworldindata.org/grapher/terrestrial-protected-areas.csv?v=1&csvType=full&useColumnShortNames=true",
    "degraded_land.csv": "https://ourworldindata.org/grapher/share-degraded-land.csv?v=1&csvType=full&useColumnShortNames=true",
    "red_list_index.csv": "https://ourworldindata.org/grapher/red-list-index.csv?v=1&csvType=full&useColumnShortNames=true",
    "countries_map.zip": "https://www.naturalearthdata.com/http//www.naturalearthdata.com/download/110m/cultural/ne_110m_admin_0_countries.zip"
}
download_data(links)

Fetching: annual_forest_change.csv...
Saved to downloads\annual_forest_change.csv
Fetching: annual_deforestation.csv...
Saved to downloads\annual_deforestation.csv
Fetching: protected_land.csv...
Saved to downloads\protected_land.csv
Fetching: degraded_land.csv...
Saved to downloads\degraded_land.csv
Fetching: red_list_index.csv...
Saved to downloads\red_list_index.csv
Fetching: countries_map.zip...
Failed to download countries_map.zip: 500 Server Error: Internal Server Error for url: https://www.naturalearthdata.com/http//www.naturalearthdata.com/download/110m/cultural/ne_110m_admin_0_countries.zip


In [18]:
import pandas as pd
import geopandas as gpd
from pathlib import Path
from typing import Optional

In [None]:
def merge_geodata(map_path: str, csv_path: str) -> gpd.GeoDataFrame:
    """
    Merges the map with an environmental dataset.
    Requirement: The geopandas dataframe must be the left dataframe.
    """
    # Load the map (the left dataframe)
    # Natural Earth ZIP files contain the shapefile logic geopandas needs
    world = gpd.read_file(map_path)
    
    # Load the environmental CSV
    data = pd.read_csv(csv_path)
    
    # Perform the merge
    # Natural Earth map uses 'ISO_A3' for country codes.
    # Our World in Data (OWID) CSVs typically use 'code'.
    merged = world.merge(data, left_on="ISO_A3", right_on="code", how="left")
    
    return merged

In [None]:
# Define the paths
downloads_dir = Path("downloads")
map_file = downloads_dir / "ne_110m_admin_0_countries.zip"
forest_csv = downloads_dir / "annual_forest_change.csv"

# Run the merge
test_gdf = merge_geodata(str(map_file), str(forest_csv))

#    Verification Checks 

# Check the type (Must be a GeoDataFrame)
print(f"Object type: {type(test_gdf)}")

# Check for the 'geometry' column (Required for Streamlit plotting)
if 'geometry' in test_gdf.columns:
    print("Success: Geometry column preserved.")
else:
    print("Error: Geometry lost. Ensure the map is on the left.")

# Check for the data column
# Usually OWID data columns have names like 'Annual change in forest area'
print(f"Columns in merged data: {test_gdf.columns.tolist()[:10]}...")

# 4. Preview the top 5 rows
test_gdf.head()

Object type: <class 'geopandas.geodataframe.GeoDataFrame'>
Success: Geometry column preserved.
Columns in merged data: ['featurecla', 'scalerank', 'LABELRANK', 'SOVEREIGNT', 'SOV_A3', 'ADM0_DIF', 'LEVEL', 'TYPE', 'TLC', 'ADMIN']...


Unnamed: 0,featurecla,scalerank,LABELRANK,SOVEREIGNT,SOV_A3,ADM0_DIF,LEVEL,TYPE,TLC,ADMIN,...,FCLASS_IT,FCLASS_NL,FCLASS_SE,FCLASS_BD,FCLASS_UA,geometry,entity,code,year,net_change_forest_area
0,Admin-0 country,1,6,Fiji,FJI,0,2,Sovereign country,1,Fiji,...,,,,,,"MULTIPOLYGON (((180 -16.06713, 180 -16.55522, ...",Fiji,FJI,1991.0,6677.997
1,Admin-0 country,1,6,Fiji,FJI,0,2,Sovereign country,1,Fiji,...,,,,,,"MULTIPOLYGON (((180 -16.06713, 180 -16.55522, ...",Fiji,FJI,1992.0,6677.997
2,Admin-0 country,1,6,Fiji,FJI,0,2,Sovereign country,1,Fiji,...,,,,,,"MULTIPOLYGON (((180 -16.06713, 180 -16.55522, ...",Fiji,FJI,1993.0,6677.997
3,Admin-0 country,1,6,Fiji,FJI,0,2,Sovereign country,1,Fiji,...,,,,,,"MULTIPOLYGON (((180 -16.06713, 180 -16.55522, ...",Fiji,FJI,1994.0,6677.997
4,Admin-0 country,1,6,Fiji,FJI,0,2,Sovereign country,1,Fiji,...,,,,,,"MULTIPOLYGON (((180 -16.06713, 180 -16.55522, ...",Fiji,FJI,1995.0,6677.997
