# Data fetcher for flags

Gets data from different places and combines/saves them in a single file. In order to support OsuWorld

In [1]:
import requests
from bs4 import BeautifulSoup
import json
import copy
import urllib.parse


In [2]:
def fetchData(api_url):
    try:
        response = requests.get(api_url)

        if response.status_code == 200:

            data = response.json()
            return data
        else:
            print(f"Request failed with status code: {response.status_code}")
    except requests.exceptions.RequestException as e:
        print("An error occurred:", e)


# Wikimedia flags Fetcher

Retrieves the flags from: https://commons.wikimedia.org/wiki/Flags_of_country_subdivisions


In [42]:
url = "https://commons.wikimedia.org/wiki/Flags_of_country_subdivisions"


In [4]:

def parseUrl(url):
    response = requests.get(url)

    if response.status_code == 200:
        soup = BeautifulSoup(response.content, "html.parser")
        return soup

    else:
        print("Failed to fetch the webpage")


In [4]:
parsed = parseUrl(url)


In [5]:


content = parsed.find(id="mw-content-text").find("div")

flagsRaw = {

}
country = ""
for element in content.children:

    if element.name == "h2":
        titleText = element.find("span").text
        if titleText == "" or titleText == "See also":
            continue
        country = titleText
        flagsRaw[country] = []
    elif element.name == "table":
        if country == "":
            continue
        flagsAnchor = element.findAll(class_="mw-file-description")
        for flagAnchor in flagsAnchor:
            name = flagAnchor.attrs['title']
            flagImg = flagAnchor.find(class_="mw-file-element")
            flagRaw = flagImg.attrs['src']
            flag: str = flagRaw.replace("thumb/", "")

            matchEnd = ".svg"
            svgIndex = flag.find(matchEnd)
            if svgIndex == -1:
                flag = flagRaw
            else:
                flag = flag[:svgIndex+len(matchEnd)]

            flag = {
                "name": name,
                "flag": flag,
                "flagRaw": flagRaw
            }

            flagsRaw[country].append(flag)


## Check the svg links status


In [7]:
json_data = json.dumps(flagsRaw, indent=4, ensure_ascii=False).encode('utf8').decode()
flagsRaw = "out/flags_raw.json"
with open(flagsRaw, "w") as json_file:
    json_file.write(json_data)

print("JSON data has been saved to", file_path)


JSON data has been saved to out/flags_raw.json


In [54]:
flagsRaw = "out/flags_raw.json"

with open(flagsRaw, "r", encoding="utf-8") as json_file:
    flagsRaw = json.load(json_file)


In [55]:
flags = {
    country: [
        {key: value for key, value in county.items() if key != 'flagRaw'}
        for county in counties
    ]
    for country, counties in flagsRaw.items()
}


## Convert names to ISO 3166 codes


In [60]:
countriesJsonPath = "./in/countries.json"

with open(countriesJsonPath, "r", encoding="utf-8") as json_file:
    countriesJson = json.load(json_file)


In [61]:
FlagsCodes = {}

for country, regions in flags.items():
    code = None
    for countryJson in countriesJson:
        countryName: str = countryJson["name"]
        if countryName.find(country) != -1 or country.find(countryName) != -1:
            code = countryJson["alpha-2"]
            break
        
    if code is None:
        print(f"Country {country} not found in countries.json")
        continue

    FlagsCodes[code] = {
        "name": country,
        "regions": regions
    }


In [63]:
flags = FlagsCodes


In [62]:
json_data = json.dumps(FlagsCodes, indent=4, ensure_ascii=False).encode('utf8').decode()
with open("out/flags_codes.json", "w") as json_file:
    json_file.write(json_data)

## Add Native name to countries


In [64]:
countriesNativeJson = fetchData(
    "https://raw.githubusercontent.com/tomeralmog/zipcode-kml/master/countries.json")


In [65]:
for countryCode, country in flags.items():
    countryName = country["name"]
    nativeCountryRows = [
        countryRow for countryRow in countriesNativeJson if countryRow["countryName"] == countryName]

    if len(nativeCountryRows) == 0:
        # Fix Netherlands and The Netherlands mismatch exception
        exceptionCountries = {"NL": "The Netherlands",
                              "PT": "Portugual", "HR": "Czechia", "DO": "Dominica Republic"}
        if countryCode in exceptionCountries.keys():
            countryName = exceptionCountries[countryCode]
            nativeCountryRows = [
                countryRow for countryRow in countriesNativeJson if countryRow["countryName"] == countryName]
        else:
            print(
                f"Country {countryCode} {countryName} not found in countries.json")
            continue

    nativeCountryRow = nativeCountryRows[0]
    nativeCountryName: str = nativeCountryRow["nativeCountryName"]
    nativeCountryName = nativeCountryName.split(",")[0].split("|")[0]
    country["nativeName"] = nativeCountryName
    # Reorder regions to appear after nativeName
    regCopy = country["regions"]
    del country["regions"]
    country["regions"] = regCopy


Country AQ Antarctica not found in countries.json
Country CZ Czech Republic not found in countries.json
Country DM Dominican Republic not found in countries.json
Country LR Liberia not found in countries.json
Country SR Suriname not found in countries.json


## Add ISO 3166-2 codes to regions

Using Open Street Maps API: https://nominatim.openstreetmap.org/ui/search.html


In [33]:
urlSearch = "https://nominatim.openstreetmap.org/search.php?q=$1&countrycodes=$2&format=jsonv2"


def getRegionURL(regionName, countryCode):
    return urlSearch.replace("$1", regionName).replace("$2", countryCode)


urlReverseId = "https://nominatim.openstreetmap.org/details.php?osmtype=R&osmid=$1&format=json"


def getReverseIdURL(id):
    return urlReverseId.replace("$1", id)


In [67]:
flagsRegionCodes = copy.deepcopy(FlagsCodes)


def addResult(regionsMap, region, name, matchData):
    id = matchData['osm_id']
    detailsUrl = getReverseIdURL(str(id))
    print(id)
    detailsData = fetchData(detailsUrl)

    if detailsData is None:
        print("No details data")
        print(region)
        return False

    isoCode = detailsData['names'].get('ISO3166-2')

    if isoCode is None:
        print("No ISO Code")
        print(region)
        return False

    nativeName = detailsData['names'].get('name')

    if isoCode in regionsMap:
        print("ISO code already exists ISO: " + isoCode)
        print(region)
        return False

    regionsMap[isoCode] = {
        "name": name,
        "nativeName": nativeName,
        "flag": region['flag']
    }
    return True


for countryCode, country in FlagsCodes.items():
    print(countryCode)
    regionsMap = {}

    for region in country['regions']:
        name = region['name']
        api_url = getRegionURL(name, countryCode)

        data = fetchData(api_url)
        if data:
            it = (d for d in data if d.get("type") == "administrative")

            matchData = next(it, None)

            while matchData:
                suceed = addResult(regionsMap, region, name, matchData)
                if not suceed:
                    matchData = next(it, None)
                    if matchData is not None:
                        print("Trying next match")
                else:
                    matchData = None

        else:
            print("No match data")

    flagsRegionCodes[countryCode]['regions'] = regionsMap


AL
1252289
1249872
1250609
1252589
1759889
1250098
1255521
AD
2804753
2804754
2804755
2804756
2804757
2804758
No match data
AQ
No match data
No match data
No match data
No match data
No match data
No match data
No match data
No match data
AR
1224652
4683201
No ISO Code
{'name': 'Buenos Aires Province', 'flag': 'https://upload.wikimedia.org/wikipedia/commons/1/15/Bandera_de_la_Provincia_de_Buenos_Aires.svg'}
6587642
No ISO Code
{'name': 'Chaco Province', 'flag': 'https://upload.wikimedia.org/wikipedia/commons/3/33/Bandera_de_la_Provincia_del_Chaco.svg'}
153548
153551
6413250
No ISO Code
{'name': 'Misiones Province', 'flag': 'https://upload.wikimedia.org/wikipedia/commons/c/ce/Bandera_de_la_Provincia_de_Misiones.svg'}
1606727
153547
5836465
No ISO Code
{'name': 'San Juan Province', 'flag': 'https://upload.wikimedia.org/wikipedia/commons/c/c1/Bandera_de_la_Provincia_de_San_Juan.svg'}
Trying next match
9561511
No ISO Code
{'name': 'San Juan Province', 'flag': 'https://upload.wikimedia.org/

## Save final auto flags


In [70]:
json_data = json.dumps(
    flags, indent=4, ensure_ascii=False).encode('utf8').decode()
file_path = "out/flags_auto.json"
with open(file_path, "w") as json_file:
    json_file.write(json_data)

print("JSON data has been saved to", file_path)


JSON data has been saved to out/flags_auto.json


## Clean flags auto leaving only the regions in osu world


In [60]:
flagsPathAuto = "./out/flags_auto.json"

with open(flagsPathAuto, "r", encoding="utf-8") as json_file:
    flagsAuto = json.load(json_file)


Add countries from osuworld

In [61]:
countries: dict = fetchData("https://osuworld.octo.moe/locales/en/countries.json")

In [62]:
for countryCode, country in countries.items():
    if countryCode not in flagsAuto:
        flagsAuto[countryCode] = {}
        flagsAuto[countryCode]['name'] = country
        flagsAuto[countryCode]['regions'] = {}
        print(f"Country {country} not found in flagsAuto.json")

Country Kazakhstan not found in flagsAuto.json
Country Singapore not found in flagsAuto.json
Country Turkey not found in flagsAuto.json
Country Taiwan not found in flagsAuto.json
Country Vietnam not found in flagsAuto.json


add regions from osu world

In [63]:
regions: dict = fetchData("https://osuworld.octo.moe/locales/en/regions.json")


In [64]:
flags: dict = copy.deepcopy(flagsAuto)

for countryCode, country in flagsAuto.items():
    osuWorldRegions: dict = regions.get(countryCode)
    if osuWorldRegions is None:
        flags.pop(countryCode, None)
        continue

    for regionCode, region in country['regions'].items():
        osuWorldRegionKeys = osuWorldRegions.keys()
        if regionCode in osuWorldRegionKeys:
            region['name'] = osuWorldRegions[regionCode]
        else:
            flags[countryCode]['regions'].pop(regionCode, None)

    for osuWorldRegion in osuWorldRegions:
        if osuWorldRegion not in country['regions']:
            flags[countryCode]['regions'][osuWorldRegion] = {
                "name": osuWorldRegions[osuWorldRegion],
                "nativeName": "",
                "flag": ""
            }


Save

In [65]:
json_data = json.dumps(
    flags, indent=4, ensure_ascii=False).encode('utf8').decode()
file_path = "out/flags.json"
with open(file_path, "w") as json_file:
    json_file.write(json_data)

print("JSON data has been saved to", file_path)


JSON data has been saved to out/flags.json


Reload data

In [3]:
flagsPath = "./out/flags.json"

with open(flagsPath, "r", encoding="utf-8") as json_file:
    flags = json.load(json_file)


## Add Manual changes to the automatic flags

In [4]:
flagsAdditionsPath = "./in/flags_additions.json"

with open(flagsAdditionsPath, "r", encoding="utf-8") as json_file:
    flagsAdditions = json.load(json_file)


In [5]:
import collections.abc

def update(d, u):
    for k, v in u.items():
        if isinstance(v, collections.abc.Mapping):
            d[k] = update(d.get(k, {}), v)
        else:
            d[k] = v
    return d

In [6]:
update(flags, flagsAdditions)

{'AR': {'name': 'Argentina',
  'nativeName': 'Argentina',
  'regions': {'AR-C': {'name': 'Buenos Aires',
    'nativeName': 'Buenos Aires',
    'flag': 'https://upload.wikimedia.org/wikipedia/commons/f/f5/Bandera_de_la_Ciudad_de_Buenos_Aires.svg'},
   'AR-U': {'name': 'Chubut Province',
    'nativeName': 'Chubut',
    'flag': 'https://upload.wikimedia.org/wikipedia/commons/8/88/Bandera_de_la_Provincia_del_Chubut.svg'},
   'AR-E': {'name': 'Entre Ríos Province',
    'nativeName': 'Entre Ríos',
    'flag': 'https://upload.wikimedia.org/wikipedia/commons/5/5b/Bandera_de_la_Provincia_de_Entre_Ríos.svg'},
   'AR-Q': {'name': 'Neuquén Province',
    'nativeName': 'Neuquén',
    'flag': 'https://upload.wikimedia.org/wikipedia/commons/b/bc/Bandera_de_la_Provincia_de_Neuquén.svg'},
   'AR-R': {'name': 'Río Negro Province',
    'nativeName': 'Río Negro',
    'flag': 'https://upload.wikimedia.org/wikipedia/commons/5/5d/Bandera_de_la_Provincia_del_Río_Negro.svg'},
   'AR-Z': {'name': 'Santa Cruz Pr

## Unescape characters in flags links

In [7]:

for country in flags:
    regions = flags[country]['regions']
    for region in regions.values():
        flag = region['flag']
        region['flag'] = urllib.parse.unquote(flag)


Save again


In [8]:
json_data = json.dumps(
    flags, indent=4, ensure_ascii=False).encode('utf8').decode()
file_path = "out/flags.json"
with open(file_path, "w") as json_file:
    json_file.write(json_data)

print("JSON data has been saved to", file_path)


JSON data has been saved to out/flags.json


## Fill native names

Using Open Street Maps API: https://nominatim.openstreetmap.org/ui/search.html


In [72]:
urlSearch = "https://nominatim.openstreetmap.org/search.php?q=$1&countrycodes=$2&format=jsonv2"


def getRegionURL(regionName, countryCode):
    return urlSearch.replace("$1", regionName).replace("$2", countryCode)


urlReverseId = "https://nominatim.openstreetmap.org/details.php?osmtype=R&osmid=$1&format=json"


def getReverseIdURL(id):
    return urlReverseId.replace("$1", id)


In [73]:
def addResult(region, matchData):
    localName = matchData['name']
    if not localName:
        return False

    region["nativeName"] = localName
    print(localName)
    return True


def handleSearch(region, data):
    if data:
        it = (d for d in data if d.get("type") == "administrative")

        matchData = next(it, None)

        while matchData:
            suceed = addResult(region, matchData)
            if not suceed:
                matchData = next(it, None)
                if matchData is not None:
                    print("Trying next match")
            else:
                matchData = None
                return True

    else:
        print("No match data")
    return False


for countryCode, country in flags.items():
    print(countryCode)

    for regionCode, region in country['regions'].items():
        if region['nativeName'] != "":
            continue

        api_url = getRegionURL(regionCode, countryCode)
        data = fetchData(api_url)
        searchHandleResult = handleSearch(region, data)
        if not searchHandleResult:
            name = region['name']
            api_url = getRegionURL(name, countryCode)
            data = fetchData(api_url)
            searchHandleResult = handleSearch(region, data)


AR


Salta
Buenos Aires
San Luis
La Rioja
Santiago del Estero
Chaco
San Juan
Catamarca
La Pampa
Mendoza
Misiones
Formosa
Santa Fe
Tucumán
Corrientes
Córdoba
Jujuy
AU
AT
BY
BE
BR
CA
CL
Región de Arica y Parinacota
Región de Magallanes y de la Antártica Chilena
CN
安徽省
北京市
重庆市
福建省
甘肃省
广东省
广西壮族自治区
贵州省
海南省
河北省
黑龙江省
河南省
湖北省
湖南省
内蒙古自治区 ᠦᠪᠦᠷ ᠮᠣᠩᠭᠤᠯ ᠤᠨ ᠥᠪᠡᠷᠲᠡᠭᠡᠨ ᠵᠠᠰᠠᠬᠣ ᠣᠷᠣᠨ
江苏省
江西省
吉林省
辽宁省
宁夏回族自治区
青海省
陕西省
山东省
上海市
山西省
四川省
天津市
西藏自治区 བོད་རང་སྐྱོང་ལྗོངས།
新疆维吾尔自治区 شىنجاڭ ئۇيغۇر ئاپتونوم رايونی
云南省
浙江省
CO
Amazonas
Antioquia
Arauca
Atlántico
Bolívar
Boyacá
Caldas
Caquetá
Casanare
Cauca
Cesar
Chocó
Córdoba
Cundinamarca
Guainía
Guaviare
Huila
La Guajira
Magdalena
Meta
Nariño
Norte de Santander
Putumayo
Quindío
Risaralda
Santander
Archipelago of Saint Andrew, Providence and Saint Catherine
Sucre
Tolima
Valle del Cauca
Vaupés
Vichada
CZ
Jihočeský kraj
Jihomoravský kraj
Karlovarský kraj
Královéhradecký kraj
Liberecký kraj
Moravskoslezský kraj
Olomoucký kraj
Plzeňský kraj
Středočeský kraj
Ústecký kraj
FI
Etelä-K

In [74]:
json_data = json.dumps(
    flags, indent=4, ensure_ascii=False).encode('utf8').decode()
file_path = "out/flags.json"
with open(file_path, "w") as json_file:
    json_file.write(json_data)

print("JSON data has been saved to", file_path)


JSON data has been saved to out/flags.json


## Reload data


In [75]:
flagsPath = "./out/flags.json"

with open(flagsPath, "r", encoding="utf-8") as json_file:
    flags = json.load(json_file)


In [76]:
[region['name'] for region in flags["ES"]["regions"].values()]


['Andalusia',
 'Aragon',
 'Asturias',
 'Basque Country',
 'Canary Islands',
 'Castilla–La Mancha',
 'Castile and León',
 'Catalonia',
 'Extremadura',
 'Galicia',
 'Community of Madrid',
 'Region of Murcia',
 'Navarre',
 'Valencian Community',
 'Ceuta',
 'Melilla',
 'Balearic Islands',
 'Cantabria',
 'La Rioja']

Countries missing regions

In [78]:
printJsons = True

for countryCode, country in flags.items():
    regions = country["regions"]
    flagsFilter = [region['flag'] for region in regions.values()]
    containsEmptyFlag = any(flag == "" for flag in flagsFilter)
    if containsEmptyFlag:
        print("-", country["name"])
        if printJsons:
            print(json.dumps(country, indent=4, ensure_ascii=False))

- Argentina
{
    "name": "Argentina",
    "nativeName": "Argentina",
    "regions": {
        "AR-C": {
            "name": "Buenos Aires",
            "nativeName": "Buenos Aires",
            "flag": "https://upload.wikimedia.org/wikipedia/commons/f/f5/Bandera_de_la_Ciudad_de_Buenos_Aires.svg"
        },
        "AR-U": {
            "name": "Chubut Province",
            "nativeName": "Chubut",
            "flag": "https://upload.wikimedia.org/wikipedia/commons/8/88/Bandera_de_la_Provincia_del_Chubut.svg"
        },
        "AR-E": {
            "name": "Entre Ríos Province",
            "nativeName": "Entre Ríos",
            "flag": "https://upload.wikimedia.org/wikipedia/commons/5/5b/Bandera_de_la_Provincia_de_Entre_Ríos.svg"
        },
        "AR-Q": {
            "name": "Neuquén Province",
            "nativeName": "Neuquén",
            "flag": "https://upload.wikimedia.org/wikipedia/commons/b/bc/Bandera_de_la_Provincia_de_Neuquén.svg"
        },
        "AR-R": {
         