<a href="https://colab.research.google.com/github/BarhaPritha/Data-Engineering-of-Rest-Countries-API/blob/main/Rest_Countries.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
# Author: Barha Meherun Pritha

"""
Dataset: REST Countries API with selected fields

https://restcountries.com/v3.1/all?fields=cca3,name,capital,continents,languages,area,borders,population,startOfWeek,timezones

Fields extracted:
  cca3, name.common, capital, continents, languages, area, borders, population, startOfWeek, timezones
"""


import os
import sys
import logging
import sqlite3
import requests
import pandas as pd
from typing import List, Dict, Any


logging.basicConfig(level=logging.INFO, format="%(asctime)s [%(levelname)s] %(message)s")
logger = logging.getLogger(__name__)


API_URL = os.getenv("REST_COUNTRIES_URL",
                    "https://restcountries.com/v3.1/all?fields=cca3,name,capital,continents,languages,area,borders,population,startOfWeek,timezones")
DB_PATH = os.getenv("DB_PATH", "data.db")



def extract() -> List[Dict[str, Any]]:
    try:
        logger.info(f"Extract: requesting data from API")
        resp = requests.get(API_URL, timeout=15)
        resp.raise_for_status()
        return resp.json()
    except Exception as e:
        logger.warning(f"API request failed: {e}.")


def transform(data: List[Dict[str, Any]]):
    countries, languages, country_languages = [], {}, []

    for country in data:
        countryCode = country.get("cca3")
        if not countryCode:
            continue

        name = country.get("name", {}).get("common", "").upper()
        capital = ", ".join(country.get("capital") or []).upper()
        continents = ", ".join(country.get("continents") or []).upper()
        population = int(country.get("population") or 0)
        area = float(country.get("area") or 0.0)
        density = population / area if area else None
        borders = ", ".join(country.get("borders") or [])
        weekStart = country.get("startOfWeek")
        timezones = ", ".join(country.get("timezones") or [])

        countries.append({
            "Code": countryCode,
            "Name": name,
            "Capital": capital,
            "Continent": continents,
            "Population": population,
            "Area": area,
            "Density": density,
            "Borders": borders,
            "Week Start": weekStart,
            "Timezones": timezones
        })

        for code, lname in (country.get("languages") or {}).items():
            languages[code] = lname
            country_languages.append((countryCode, code))

    logger.info(f"Transform: {len(countries)} countries, {len(languages)} languages total")
    return countries, languages, country_languages