In [1]:
%load_ext autoreload
%autoreload 2

from pathlib import Path
from io import BytesIO
from urllib.request import urlopen
from zipfile import ZipFile, ZIP_DEFLATED
import pandas as pd
import geopandas as gpd
from datetime import date
import pyproj
from census import Census
import requests
import lxml.html as lh
import numpy as np
from bs4 import BeautifulSoup
from fuzzywuzzy import fuzz
from fuzzywuzzy import process
import shutil

from codes import utils as cutil

idx = pd.IndexSlice

datestamp = "20200320"
sacredentials_fpath = "/Users/ianbolliger/service-accounts/bolliger32.json"

adm1_shp_path = (
    cutil.DATA_RAW
    / "multi_country"
    / f"ne_10m_admin_1_states_provinces_{datestamp}.zip"
)
adm_url_fmt = (
    "https://biogeo.ucdavis.edu/data/gadm3.6/{ftype}/gadm36_{iso3}_{ftype}.zip"
)
adm3_gpkg_fmt = "gadm36_{iso3}.gpkg"

In [2]:
def download_zip(url, out_path, overwrite=False):
    out_path = Path(out_path)
    if not out_path.exists() or overwrite:
        r = requests.get(url, allow_redirects=True)
        with open(out_path, "wb") as f:
            f.write(r.content)


def process_gadm(in_gdf):
    cols_to_load = ["GID_0", "NAME_1", "NAME_2", "geometry"]
    col_map = {"GID_0": "adm0_name", "NAME_1": "adm1_name", "NAME_2": "adm2_name"}
    if "NAME_3" in in_gdf.columns:
        cols_to_load.append("NAME_3")
        col_map["NAME_3"] = "adm3_name"

    in_gdf = in_gdf[cols_to_load]
    in_gdf = in_gdf.rename(columns=col_map)

    cent = in_gdf["geometry"].centroid
    in_gdf["latitude"] = cent.y
    in_gdf["longitude"] = cent.x

    in_gdf = in_gdf.set_index(["adm0_name", "adm1_name", "adm2_name"])
    if "adm3_name" in in_gdf.columns:
        in_gdf = in_gdf.set_index("adm3_name", append=True)

    return in_gdf

## Global adm1

In [3]:
# get file
adm1_url = "https://www.naturalearthdata.com/http//www.naturalearthdata.com/download/10m/cultural/ne_10m_admin_1_states_provinces.zip"
download_zip(adm1_url, adm1_shp_path, overwrite=False)

In [4]:
# process
in_gdf = gpd.read_file(cutil.zipify_path(adm1_shp_path))
adm_gdf = in_gdf[
    ["adm0_a3", "name", "geometry", "latitude", "longitude", "gadm_level", "name_alt"]
]
adm_gdf = adm_gdf.rename(
    columns={"adm0_a3": "adm0_name", "name": "adm1_name"}
).set_index(["adm0_name", "adm1_name", "gadm_level"])

# for now, when there are duplicates, just drop the second one without any better information
# could not find a data dictionary for the shapefile
adm_gdf = adm_gdf[~adm_gdf.index.duplicated(keep="first")].reset_index(
    drop=False, level="gadm_level"
)

# we know france and italy are actually admin 2
adm_gdf.loc[idx[["FRA", "ITA"], :], "gadm_level"] = 2

# separate into levels
adm1_gdf = adm_gdf[adm_gdf.gadm_level == 1].drop(columns="gadm_level")
adm2_gdf = adm_gdf[adm_gdf.gadm_level == 2].drop(columns="gadm_level")
adm2_gdf.index = adm2_gdf.index.set_names("adm2_name", level="adm1_name")

# Set up an adm3 dataset that is currently empty
adm3_gdf = gpd.GeoDataFrame(
    columns=adm2_gdf.reset_index(drop=False).columns, crs=adm_gdf.crs
)
adm3_gdf["adm3_name"] = []
adm3_gdf["adm1_name"] = []
adm3_gdf = adm3_gdf.set_index(["adm0_name", "adm1_name", "adm2_name", "adm3_name"])

## adm2+

### FRA

In [5]:
adm2_fr_fpath = cutil.DATA / "interim" / "france" / "departement_info.dta"
adm2_fr = pd.read_stata(
    adm2_fr_fpath,
    index_col="departement_name",
    columns=[
        "departement_name",
        "adm1_name",
        "cheflieu",
        "densitehabitantskm2",
        "superficiekmâ",
        "population",
    ],
)
adm2_fr.index = adm2_fr.index.str.encode("ISO-8859-1").str.decode("utf-8")
adm2_fr.cheflieu = adm2_fr.cheflieu.str.encode("ISO-8859-1").str.decode("utf-8")
adm2_fr.index.name = "adm2_name"
adm2_fr = adm2_fr.rename(
    columns={
        "cheflieu": "capital",
        "densitehabitantskm2": "pop_density_km2",
        "superficiekmâ": "area_km2",
    }
)

# manually correct some differences in naming btwn 2 datasets
name_map = {
    "Guyane française": "Guyane",
    "Haute-Rhin": "Haut-Rhin",
    "Vendée": "Vandée",
    "Côtes-d'Armor": "Côtes d'Armor",
    "Seine-Saint-Denis": "Seine-St-Denis",
    "Val-d'Oise": "Val-D'Oise",
    "Seien-et-Marne": "Seine-et-Marne",
}
adm2_gdf = adm2_gdf.rename(index=name_map, level="adm2_name")

# merge back in
adm2_gdf = (
    adm2_gdf.join(adm2_fr, on="adm2_name", how="outer")
    .reset_index(drop=False)
    .set_index(["adm0_name", "adm1_name", "adm2_name"])
)

# collapse to adm1 level and add that onto list
adm1_fr = adm2_gdf.loc[["FRA"], ["geometry", "area_km2", "population"]].dissolve(
    by=["adm0_name", "adm1_name"], aggfunc="sum"
)
adm1_fr["latitude"] = adm1_fr.geometry.centroid.y
adm1_fr["longitude"] = adm1_fr.geometry.centroid.x
adm1_gdf = adm1_gdf.append(adm1_fr)

### Others

All of these are from the same source but:
- some work with the gpkg file others with the shapefile
- some are adm3 some are adm2

In [6]:
isos = ["ITA", "USA", "CHN", "KOR", "IRN"]
adm2_name_maps = {
    "ITA": {
        "Firenze": "Florence",
        "Reggio Emilia": "Reggio Nell'Emilia",
        "Reggio Calabria": "Reggio Di Calabria",
        "Pesaro e Urbino": "Pesaro E Urbino",
        "Barletta-Andria Trani": "Barletta-Andria-Trani",
        "Crotene": "Crotone",
        "Aoste": "Aosta",
        "Bozen": "Bolzano",
        "Turin": "Torino",
        "Padova": "Padua",
        "Forlì-Cesena": "Forli' - Cesena",
        "Siracusa": "Syracuse",
        "Oristrano": "Oristano",
        "Mantova": "Mantua",
        "Monza e Brianza": "Monza and Brianza",
        "Massa-Carrara": "Massa Carrara",
    }
}
for iso3 in isos:
    # download if needed
    if iso3 == "CHN":
        ftype = "shp"
    else:
        ftype = "gpkg"
    zip_path = cutil.get_adm_zip_path(iso3, datestamp)
    if not zip_path.exists():
        download_zip(adm_url_fmt.format(iso3=iso3, ftype=ftype), zip_path)
    if ftype == "gpkg":
        to_open = zip_path / f"gadm36_{iso3}.gpkg"
    else:
        to_open = zip_path / f"gadm36_{iso3}_3.shp"

    # load gdf
    in_gdf = process_gadm(gpd.read_file(cutil.zipify_path(to_open)))

    if "adm3_name" in in_gdf.index.names:
        adm3_gdf = adm3_gdf.append(in_gdf)

        # now aggregate to level 2 to insert into that level
        in_gdf = in_gdf.dissolve(by=["adm0_name", "adm1_name", "adm2_name"])
        in_gdf["latitude"] = in_gdf.geometry.centroid.y
        in_gdf["longitude"] = in_gdf.geometry.centroid.x

    # insert into level 2 dataset
    if iso3 in adm2_gdf.index.get_level_values("adm0_name").unique():
        adm2_gdf = adm2_gdf.rename(index=adm2_name_maps[iso3], level="adm2_name")
        res = pd.merge(
            adm2_gdf.loc[idx[iso3, :, :]],
            in_gdf.reset_index(drop=False),
            on="adm2_name",
            how="outer",
            indicator=True,
        ).set_index(["adm0_name", "adm1_name", "adm2_name"])
        assert (res._merge == "both").all()
        del res["_merge"]
        for i in ["geometry", "latitude", "longitude"]:
            res[i] = res[i + "_y"].fillna(res[i + "_x"])
            res = res.drop(columns=[i + "_x", i + "_y"])
        adm2_gdf = adm2_gdf.loc[
            adm2_gdf.index.get_level_values("adm0_name") != iso3
        ].append(res)
    else:
        adm2_gdf = adm2_gdf.append(in_gdf)

    # now aggregate to level 1 to replace that level with better/more consistent data
    in_gdf = in_gdf.dissolve(by=["adm0_name", "adm1_name"])
    in_gdf["latitude"] = in_gdf.geometry.centroid.y
    in_gdf["longitude"] = in_gdf.geometry.centroid.x
    adm1_gdf = adm1_gdf[adm1_gdf.index.get_level_values("adm0_name") != iso3]
    adm1_gdf = adm1_gdf.append(in_gdf)

## Manual name adjustments

Some manual adjustments to make this match with the naming of the data produced by country teams

### ITA

In [7]:
## get new regions/provinces
region_dict = {
    "Emilia-Romagna": "Emilia Romagna",
    "Friuli-Venezia Giulia": "Friuli Venezia Giulia",
    "Apulia": "Puglia",
    "Sicily": "Sicilia",
}
add_regions = ["P.A. Bolzano", "P.A. Trento"]
drop_regions = ["Trentino-Alto Adige"]
province_dict = {
    "Forli' - Cesena": "Forlì-Cesena",
    "Reggio Nell'Emilia": "Reggio nell'Emilia",
    "Padua": "Padova",
    "Reggio Di Calabria": "Reggio di Calabria",
    "Pesaro E Urbino": "Pesaro e Urbino",
    "Syracuse": "Siracusa",
    "Florence": "Firenze",
    "Mantua": "Mantova",
    "Monza and Brianza": "Monza e della Brianza",
}
add_provinces = ["Sud Sardegna"]
add_provinces_reg = ["Sardegna"]
n_reg = len(add_regions)
new_reg = pd.DataFrame(
    dict(adm0_name=["ITA"] * n_reg, adm1_name=add_regions)
).set_index(["adm0_name", "adm1_name"])
n_prov = len(add_provinces)
new_prov = pd.DataFrame(
    dict(
        adm0_name=["ITA"] * n_prov, adm2_name=add_provinces, adm1_name=add_provinces_reg
    )
).set_index(["adm0_name", "adm1_name", "adm2_name"])


## update regions for 2 provinces that are treated as
## autonomous regions in the italy repo used for ITA_processed
tmp = adm2_gdf.reset_index(level="adm1_name", drop=False)
for i in ["Bolzano", "Trento"]:
    tmp.loc[idx["ITA", i], "adm1_name"] = f"P.A. {i}"
adm2_gdf = tmp.reset_index(drop=False).set_index(
    ["adm0_name", "adm1_name", "adm2_name"]
)


## fix names
adm1_gdf = adm1_gdf.rename(index=region_dict, level="adm1_name")
adm2_gdf = adm2_gdf.rename(index=region_dict, level="adm1_name")
adm3_gdf = adm3_gdf.rename(index=region_dict, level="adm1_name")
adm2_gdf = adm2_gdf.rename(index=province_dict, level="adm2_name")
adm3_gdf = adm3_gdf.rename(index=province_dict, level="adm2_name")


## split Trentino- into two provinces
adm1_gdf = adm1_gdf.append(new_reg)
adm1_gdf = adm1_gdf.drop(index=drop_regions, level="adm1_name")


## add additional province of sardegna
adm2_gdf = adm2_gdf.append(new_prov)

  result = self._run_cell(
  coro.send(None)


## Pop

### US

In [8]:
census_apikey = "24f4f2dc127d1386d07db9af73526aa052c9c41f"

In [9]:
c = Census(census_apikey)
pop_city = pd.DataFrame(
    c.acs5.state_place(("NAME", "B01003_001E"), Census.ALL, Census.ALL)
)
pop_cty = pd.DataFrame(
    c.acs5.state_county(("NAME", "B01003_001E"), Census.ALL, Census.ALL)
)

#### Place-level

In [10]:
# save the place-level populations
pop_city[["adm3_name", "adm_1_name"]] = pd.DataFrame(
    pop_city.NAME.str.split(", ").values.tolist(), index=pop_city.index
)
pop_city = pop_city.rename(columns={"B01003_001E": "pop"}).drop(columns="NAME")
pop_city = pop_city.set_index(["adm3_name", "adm_1_name"])
pop_city.to_csv(cutil.DATA / "interim" / "usa" / "adm3_pop.csv", index=True)

#### County-level

In [11]:
## get county-level populations
hasc_fips_url = "http://www.statoids.com/yus.html"
# Create a handle, page, to handle the contents of the website
page = requests.get(hasc_fips_url)

# Store the contents of the website under doc
doc = lh.fromstring(page.content)

# Parse data
tr_elements = doc.xpath('//*[@id="yui-main"]/div/div/pre/text()[1]')
row_list = tr_elements[0].split("\r\n")[1:-1]
headers = row_list[0].split()
valid_rows = [r for r in row_list if r != "" and r[:4] not in ["Name", "----"]]
name = [r[:23].rstrip() for r in valid_rows]
t = [r[23] for r in valid_rows]
hasc = [r[25:33] for r in valid_rows]
fips = [r[34:39] for r in valid_rows]
pop = [int(r[40:49].lstrip().replace(",", "")) for r in valid_rows]
area_km2 = [int(r[50:57].lstrip().replace(",", "")) for r in valid_rows]
area_mi2 = [int(r[58:65].lstrip().replace(",", "")) for r in valid_rows]
z = [r[66] for r in valid_rows]
capital = [r[68:] for r in valid_rows]

# turn into dataframe
us_county_df = pd.DataFrame(
    {
        "name": name,
        "type": t,
        "hasc": hasc,
        "fips": fips,
        "population": pop,
        "area_km2": area_km2,
        "capital": capital,
    }
).set_index("hasc")

##### Merge in us adm2 dataset

In [12]:
us_gdf = in_gdf = gpd.read_file(
    cutil.zipify_path(cutil.get_adm_zip_path("USA", datestamp) / "gadm36_USA.gpkg")
)
us_gdf = us_gdf[us_gdf.HASC_2.notnull()]

In [13]:
us_pops = us_gdf.join(us_county_df, on="HASC_2", how="left")
us_pops = us_pops[["NAME_1", "NAME_2", "fips", "population", "area_km2", "capital"]]
us_pops = us_pops.rename(columns={"NAME_1": "adm1_name", "NAME_2": "adm2_name"})
us_pops["pop_density_km2"] = us_pops["population"] / us_pops["area_km2"]
us_pops["adm0_name"] = "USA"
us_pops = us_pops.set_index(["adm0_name", "adm1_name", "adm2_name"])

##### Merge back into global adm datasets

Doing this for France as well, b/c we haven't merged in adm2 pops to adm1 for france yet either.

In [14]:
adm2_gdf = adm2_gdf.fillna(us_pops)
st_pops = (
    adm2_gdf.loc[:, "population"].groupby(["adm0_name", "adm1_name"]).sum(min_count=1)
)
adm1_gdf["population"] = adm1_gdf.population.fillna(st_pops)

### ITA

In [15]:
url_fmt = "http://demo.istat.it/pop2019/dati/{lvl}.zip"
ita_pop_dir = cutil.DATA_RAW / "italy" / "population"
for u in ["province", "regioni", "comuni"]:
    if not (ita_pop_dir / f"{u}.csv").exists():
        download_and_extract(url_fmt.format(lvl=u), ita_pop_dir, overwrite=True)

In [16]:
replace_provinces = {
    "Bolzano/Bozen": "Bolzano",
    "Massa-Carrara": "Massa Carrara",
    "Valle d'Aosta/Vallée d'Aoste": "Aosta",
}
replace_regions = {
    "Emilia-Romagna": "Emilia Romagna",
    "Friuli-Venezia Giulia": "Friuli Venezia Giulia",
    "Valle d'Aosta/Vallée d'Aoste": "Valle d'Aosta",
    "Bolzano": "P.A. Bolzano",
    "Trento": "P.A. Trento",
}
replace_munis = {"Vo'": "Vò"}

#### adm1 and 2

In [17]:
df = pd.read_csv(
    ita_pop_dir / "province.csv",
    skiprows=1,
    usecols=["Provincia", "Totale Maschi", "Totale Femmine", "Età"],
)
df["adm0_name"] = "ITA"
df = df.rename(columns={"Provincia": "adm2_name"}).set_index(["adm0_name", "adm2_name"])
pop2 = df.loc[df["Età"] == "Totale", ["Totale Maschi", "Totale Femmine"]].sum(axis=1)
pop2 = pop2.rename(index=replace_provinces)
pop2.name = "population"

provinces_as_regions = pop2.loc[idx[:, ["Bolzano", "Trento"]]]
pop2 = pop2.drop(index=["Bolzano", "Trento"], level="adm2_name")
provinces_as_regions.index = provinces_as_regions.index.set_names(
    "adm1_name", level="adm2_name"
)
provinces_as_regions = provinces_as_regions.rename(
    index=replace_regions, level="adm1_name"
)
adm2_gdf.population = (
    adm2_gdf.reset_index(level="adm1_name").population.fillna(pop2).values
)

In [18]:
df = pd.read_csv(
    ita_pop_dir / "regioni.csv",
    skiprows=1,
    usecols=["Regione", "Totale Maschi", "Totale Femmine", "Età"],
)
df["adm0_name"] = "ITA"
df = df.rename(columns={"Regione": "adm1_name"}).set_index(["adm0_name", "adm1_name"])
pop1 = df.loc[df["Età"] == "Totale", ["Totale Maschi", "Totale Femmine"]].sum(axis=1)
pop1 = pop1.rename(index=replace_regions)
pop1.name = "population"

pop1 = pop1.append(provinces_as_regions)
adm1_gdf.population = adm1_gdf.population.fillna(pop1)

#### adm3

In [19]:
df = pd.read_csv(
    ita_pop_dir / "comuni.csv",
    skiprows=1,
    usecols=["Denominazione", "Totale Maschi", "Totale Femmine", "Età"],
)
df["adm0_name"] = "ITA"
df = df.rename(columns={"Denominazione": "adm3_name"}).set_index(
    ["adm0_name", "adm3_name"]
)
pop3 = df.loc[df["Età"] == 999, ["Totale Maschi", "Totale Femmine"]].sum(axis=1)
pop3.name = "population"

# don't know what to do with same-named cities so we'll just keep those pops as missing
# except Vo'Eugane which is used for pop weighting
this_city = pop3.loc[idx[:, ["Vo'"]]].rename(index={"Vo'": "Vo'Eugane"})

In [20]:
df = pd.read_csv(
    ita_pop_dir / "comuni.csv",
    skiprows=1,
    usecols=["Denominazione", "Totale Maschi", "Totale Femmine", "Età"],
)
df["adm0_name"] = "ITA"
df = df.rename(columns={"Denominazione": "adm3_name"}).set_index(
    ["adm0_name", "adm3_name"]
)
pop3 = df.loc[df["Età"] == 999, ["Totale Maschi", "Totale Femmine"]].sum(axis=1)
pop3.name = "population"
pop3 = pop3.rename(index=replace_munis)


## making sure we match the important cities (ones that are used in pop weighting)
adm3_gdf = adm3_gdf.rename(
    lambda x: x.replace("d' Adda", "d'Adda").replace(
        "Terranova Dei Passerini", "Terranova dei Passerini"
    ),
    level="adm3_name",
)

# these two municipalities merged
castel = gpd.GeoDataFrame(
    adm3_gdf.loc[idx[:, :, :, ["Cavacurta", "Camairago"]], ["geometry"]]
).dissolve(by=["adm0_name", "adm1_name", "adm2_name"])
castel["adm3_name"] = ["Castelgerundo"]
castel["latitude"] = castel.geometry.centroid.y
castel["longitude"] = castel.geometry.centroid.x
castel = castel.set_index("adm3_name", append=True)
adm3_gdf = adm3_gdf[
    ~adm3_gdf.index.get_level_values("adm3_name").isin(["Cavacurta", "Camairago"])
].append(castel)


## don't know what to do with same-named cities so we'll just keep those pops as missing
pop3 = pop3[~pop3.index.duplicated(keep=False)]


## merge
adm3_gdf = (
    adm3_gdf.reset_index(level=["adm1_name", "adm2_name"], drop=False)
    .join(pop3, how="left")
    .reset_index(drop=False)
    .set_index(["adm0_name", "adm1_name", "adm2_name", "adm3_name"])
)

### IRN

In [21]:
irn_url = r"https://www.citypopulation.de/en/iran/admin/"
r = requests.get(irn_url)
data = r.text
soup = BeautifulSoup(data, "lxml")
table = soup.table

adm1s = table.find_all("tbody", {"class": "admin1"})
adm2s = table.find_all("tbody", {"class": "admin2"})

# just want name and latest census pop
adm1_rows = []
for a in adm1s:
    rows = a.find_all("tr")
    for r in rows:
        td = r.find_all("td")
        row = [i.text for i in td]
        adm1_rows.append([row[0], int(row[-2].replace(",", ""))])
adm1_irn = pd.DataFrame(adm1_rows, columns=["adm1_name", "population"]).set_index(
    "adm1_name"
)

adm2_rows = []
for a in adm2s:
    # complicated way to get province from previous admin1 level
    prov = "".join(list(a.previous_sibling.previous_sibling.strings)[1:-6])
    rows = a.find_all("tr")
    for r in rows:
        td = r.find_all("td")
        row = [i.text for i in td]
        adm2_rows.append([prov, row[0], int(row[-2].replace(",", ""))])
adm2_irn = pd.DataFrame(
    adm2_rows, columns=["adm1_name", "adm2_name", "population"]
).set_index(["adm1_name", "adm2_name"])

In [22]:
def checker(wrong_options, correct_options):
    """Fuzzy matching for names"""
    names_array = []
    ratio_array = []
    for wrong_option in wrong_options:
        if wrong_option in correct_options:
            names_array.append(wrong_option)
            ratio_array.append("100")
        else:
            x = process.extractOne(
                wrong_option, correct_options, scorer=fuzz.token_set_ratio
            )
            names_array.append(x[0])
            ratio_array.append(x[1])
    return names_array, ratio_array

#### adm1

In [23]:
adm1_orig = adm1_irn.index.values
cleaned_names = checker(
    adm1_orig, adm1_gdf.loc[idx["IRN"]].index.get_level_values("adm1_name")
)[0]

# I know that the 3rd one is mapping to East rather than West :(
cleaned_names[2] = "West Azarbaijan"

adm1_irn["adm1_name"] = cleaned_names
adm1_irn["adm0_name"] = "IRN"
adm1_irn = adm1_irn.set_index(["adm0_name", "adm1_name"], drop=True)

# merge in pops
adm1_gdf.population = adm1_gdf.population.fillna(adm1_irn.population)

#### adm2

There's going to be some challenges in fuzzy merging adm2 level populations, but we're not running analyses on adm2 yet, so I'm holding off on this part.

## Area

In [24]:
def finishing_touches(df):
    # area
    area_km2_mercator = df[df.geometry.notna()].to_crs("EPSG:3395").geometry.area / 1e6
    if "area_km2" in df.columns:
        df["area_km2"] = df.area_km2.fillna(area_km2_mercator)
    else:
        df["area_km2"] = area_km2_mercator

    # pop density
    if "pop_density_km2" in df.columns:
        df.pop_density_km2 = df.pop_density_km2.fillna(
            df.population.astype(float) / df.area_km2
        )
    else:
        df["pop_density_km2"] = df.population.astype(float) / df.area_km2

    # lat/lon
    df.longitude = df.longitude.fillna(df.geometry.centroid.x)
    df.latitude = df.latitude.fillna(df.geometry.centroid.y)

    df = df.sort_index()
    return df


adm1_gdf = finishing_touches(adm1_gdf)
adm2_gdf = finishing_touches(adm2_gdf)
adm3_gdf = finishing_touches(adm3_gdf)

## Save

In [25]:
for ix, i in enumerate([adm1_gdf, adm2_gdf, adm3_gdf]):
    fname = f"adm{ix+1}"
    out_dir = cutil.DATA_INTERIM / "adm" / fname
    out_dir.mkdir(parents=True, exist_ok=True)
    i.to_file(out_dir / f"{fname}.shp", index=True)
    i.drop(columns="geometry").to_csv(out_dir / f"{fname}.csv", index=True)