In [97]:
import pandas as pd

Here we would like to ensure that the population of the entire Faroe Islands matches with the sum of the populations of all municipalities. This comes from the following source, a government resource of the Faroe Islands - https://statbank.hagstova.fo/pxweb/en/H2/H2__IB__IB01/fo_abgd_md.px/table/tableViewLayout2/ - Data was pulled for July 2024.

In [98]:
pop_df = pd.read_csv("faroe-population.csv")
pop_df = (pop_df
          .pipe(lambda x: x.assign(Population=pd.to_numeric(x["Population"], errors='coerce'))) 
          .dropna()                                             
          .pipe(lambda x: x.assign(Population=x["Population"].astype(int)))                     
          .rename(columns = {"Place": "PlaceName"}))
print("Total Faroe Islands: ", pop_df.query("PlaceName == 'Total Faroe Islands'").Population[0])
print("Sum of All Faroe Island Locations: ", pop_df.query("PlaceName != 'Total Faroe Islands'").Population.sum())

Total Faroe Islands:  54815
Sum of All Faroe Island Locations:  54815


In [99]:
coord_df = pd.read_csv("overpass-faroes.csv")
faroe_df = pd.concat([coord_df, coord_df.iloc[:, 0].str.split('\t', expand=True)], axis=1)
faroe_df = faroe_df.rename(columns={0: "_", 
                                    1: "Latitude", 
                                    2: "Longitude", 
                                    3: "PlaceName", 
                                    4: "PlaceType"})
faroe_df = faroe_df.drop(faroe_df.columns[[0,1]], axis=1)
faroe_df.PlaceName = faroe_df.PlaceName.astype(str)
faroe_df = pd.merge(faroe_df, pop_df, on="PlaceName", how='outer')
faroe_df

Unnamed: 0,Latitude,Longitude,PlaceName,PlaceType,Population
0,61.4557460,-6.7590335,Akrar,village,16.0
1,62.2547392,-6.5793808,Ánirnar,village,51.0
2,62.2552377,-6.5363614,Árnafjørður,village,66.0
3,62.0863166,-7.3696034,Bøur,village,68.0
4,61.7843221,-6.6785038,Dalur,village,36.0
...,...,...,...,...,...
128,,,"Nes, Eysturoy",,380.0
129,,,"Syðradalur, Streymoy",,8.0
130,,,Skúgvoy,,27.0
131,,,Stóra Dímun,,5.0


After data wrangling there are still a lot of missing coordinate values - the query struggled a bit and there are a few places with several names. OpenStreetMap was manually searched for the remaining coordinate values and the dataframe is subsequently updated.

In [105]:
def manual_data_insert(placename, lati, longi, placetype):
    faroe_df.loc[faroe_df["PlaceName"] == placename, 'Latitude'] = lati
    faroe_df.loc[faroe_df["PlaceName"] == placename, 'Longitude'] = longi
    faroe_df.loc[faroe_df["PlaceName"] == placename, 'PlaceType'] = placetype
PlaceList = faroe_df[faroe_df['Latitude'].isna()]['PlaceName'].tolist()
LatLongDict = {
    "62":"7",
    "62.2451801": "-6.6678361",
    "62.1753702": "-6.7752761",
    "62.1939598": "-6.8540808",
    "62.0811545": "-6.7294839",
    "62.0187017": "-6.9123819",
    "61.7706823": "-6.8066251",
    "61.6849993": "-6.7558718",
    "61.5509988": "-6.8308499"
}
TypeList = ["country"] + ["village"]*8
for placename, lati, longi, placetype in zip(PlaceList, list(LatLongDict.keys()), list(LatLongDict.values()), TypeList):
    manual_data_insert(placename, lati, longi, placetype)
faroe_df["Latitude"] = pd.to_numeric(faroe_df["Latitude"])
faroe_df["Longitude"] = pd.to_numeric(faroe_df["Longitude"])
faroe_df = faroe_df.dropna(subset=["Population"])
faroe_df

Unnamed: 0,Latitude,Longitude,PlaceName,PlaceType,Population
0,61.455746,-6.759034,Akrar,village,16.0
1,62.254739,-6.579381,Ánirnar,village,51.0
2,62.255238,-6.536361,Árnafjørður,village,66.0
3,62.086317,-7.369603,Bøur,village,68.0
4,61.784322,-6.678504,Dalur,village,36.0
...,...,...,...,...,...
128,62.081154,-6.729484,"Nes, Eysturoy",village,380.0
129,62.018702,-6.912382,"Syðradalur, Streymoy",village,8.0
130,61.770682,-6.806625,Skúgvoy,village,27.0
131,61.684999,-6.755872,Stóra Dímun,village,5.0
