In [1]:
import polars as pl


In [2]:
vdem = pl.read_parquet("../vdemData/V-Dem-CY-Full+Others-v15.parquet")
vdem.shape

(27913, 4607)

In [None]:
# Define the list of EU member states plus USA and UK.
subset_members = [
    "Austria", "Belgium", "Bulgaria", "Croatia", "Cyprus",
    "Czech Republic", "Denmark", "Estonia", "Finland", "France",
    "Germany", "Greece", "Hungary", "Ireland", "Italy",
    "Latvia", "Lithuania", "Luxembourg", "Malta", "Netherlands",
    "Poland", "Portugal", "Romania", "Slovakia", "Slovenia",
    "Spain", "Sweden", "United States of America", "United Kingdom"
]
target_countries = eu_members + ["United States", "United Kingdom"]

# Filter the dataset for only those countries.
# (Assuming the country column is named "country_name". If it’s different in your data, update accordingly.)
subset = vdem.filter(pl.col("country_name").is_in(target_countries))

# Select the columns for country, year, and GDP.
# (Assuming your GDP column is labeled "gdp". Adjust this column name if needed.)
gdp_over_time = subset.select(["country_name", "year", "gdp"]).sort(["country_name", "year"])

# Display the resulting dataset.
print(gdp_over_time)


In [8]:
import requests
import polars as pl

# Mapping of target countries (EU member states, USA, UK) to their World Bank country codes.
countries = {
    "Austria": "AUT",
    "Belgium": "BEL",
    "Bulgaria": "BGR",
    "Croatia": "HRV",
    "Cyprus": "CYP",
    "Czech Republic": "CZE",
    "Denmark": "DNK",
    "Estonia": "EST",
    "Finland": "FIN",
    "France": "FRA",
    "Germany": "DEU",
    "Greece": "GRC",
    "Hungary": "HUN",
    "Ireland": "IRL",
    "Italy": "ITA",
    "Latvia": "LVA",
    "Lithuania": "LTU",
    "Luxembourg": "LUX",
    "Malta": "MLT",
    "Netherlands": "NLD",
    "Poland": "POL",
    "Portugal": "PRT",
    "Romania": "ROU",
    "Slovakia": "SVK",
    "Slovenia": "SVN",
    "Spain": "ESP",
    "Sweden": "SWE",
    "United States": "USA",
    "United Kingdom": "GBR"
}

# Combine all country codes into one string (separated by semicolons as required by the API)
country_codes = ";".join(countries.values())

# Base URL for the World Bank API GDP indicator (current US dollars)
base_url = f"https://api.worldbank.org/v2/country/{country_codes}/indicator/NY.GDP.MKTP.CD"
params = {
    "format": "json",
    "per_page": 1000,  # Maximum number of records per page.
    "date": "1960:2021"  # You can adjust this range as needed.
}

# First call to determine the total number of pages.
response = requests.get(base_url, params=params)
data = response.json()

if not data or len(data) < 2:
    raise ValueError("Unexpected API response structure.")

meta = data[0]
total_pages = meta.get("pages", 1)

# Collect records from all pages.
records = []
for page in range(1, total_pages + 1):
    params["page"] = page
    response = requests.get(base_url, params=params)
    page_data = response.json()
    
    if page_data and len(page_data) > 1:
        for entry in page_data[1]:
            # Only include records that have a GDP value.
            if entry.get("value") is not None:
                records.append({
                    "country": entry["country"]["value"],  # e.g., "United States"
                    "year": int(entry["date"]),            # Year as an integer
                    "gdp": entry["value"]                    # GDP value in current US dollars
                })

# Create a Polars DataFrame from the records.
gdp_df = pl.DataFrame(records)

# Sort the data for clarity (first by country, then by year).
gdp_df = gdp_df.sort(["country", "year"])

# Display the resulting GDP DataFrame.
print(gdp_df)


shape: (1_465, 3)
┌───────────────┬──────┬───────────┐
│ country       ┆ year ┆ gdp       │
│ ---           ┆ ---  ┆ ---       │
│ str           ┆ i64  ┆ f64       │
╞═══════════════╪══════╪═══════════╡
│ Austria       ┆ 1960 ┆ 6.6241e9  │
│ Austria       ┆ 1961 ┆ 7.3466e9  │
│ Austria       ┆ 1962 ┆ 7.7930e9  │
│ Austria       ┆ 1963 ┆ 8.4141e9  │
│ Austria       ┆ 1964 ┆ 9.2136e9  │
│ …             ┆ …    ┆ …         │
│ United States ┆ 2017 ┆ 1.9612e13 │
│ United States ┆ 2018 ┆ 2.0657e13 │
│ United States ┆ 2019 ┆ 2.1540e13 │
│ United States ┆ 2020 ┆ 2.1354e13 │
│ United States ┆ 2021 ┆ 2.3681e13 │
└───────────────┴──────┴───────────┘


In [6]:
gdp_df.rows

<bound method DataFrame.rows of shape: (1_465, 3)
┌───────────────┬──────┬───────────┐
│ country       ┆ year ┆ gdp       │
│ ---           ┆ ---  ┆ ---       │
│ str           ┆ i64  ┆ f64       │
╞═══════════════╪══════╪═══════════╡
│ Austria       ┆ 1960 ┆ 6.6241e9  │
│ Austria       ┆ 1961 ┆ 7.3466e9  │
│ Austria       ┆ 1962 ┆ 7.7930e9  │
│ Austria       ┆ 1963 ┆ 8.4141e9  │
│ Austria       ┆ 1964 ┆ 9.2136e9  │
│ …             ┆ …    ┆ …         │
│ United States ┆ 2017 ┆ 1.9612e13 │
│ United States ┆ 2018 ┆ 2.0657e13 │
│ United States ┆ 2019 ┆ 2.1540e13 │
│ United States ┆ 2020 ┆ 2.1354e13 │
│ United States ┆ 2021 ┆ 2.3681e13 │
└───────────────┴──────┴───────────┘>

In [9]:
unique_countries = gdp_df.select(pl.col("country")).unique()
print(unique_countries)


shape: (29, 1)
┌────────────────┐
│ country        │
│ ---            │
│ str            │
╞════════════════╡
│ Austria        │
│ Belgium        │
│ Bulgaria       │
│ Croatia        │
│ Cyprus         │
│ …              │
│ Slovenia       │
│ Spain          │
│ Sweden         │
│ United Kingdom │
│ United States  │
└────────────────┘
