# Wikimedia flags Fetcher

Retrieves the flags from: https://commons.wikimedia.org/wiki/Flags_of_country_subdivisions

In [1]:
import requests
from bs4 import BeautifulSoup
import json
import time
import copy

In [2]:
url = "https://commons.wikimedia.org/wiki/Flags_of_country_subdivisions"

In [3]:

def parseUrl(url):
    response = requests.get(url)


    if response.status_code == 200:
        soup = BeautifulSoup(response.content, "html.parser")
        return soup
        
    else:
        print("Failed to fetch the webpage")

In [4]:
parsed = parseUrl(url)


In [5]:



content = parsed.find(id="mw-content-text").find("div") 

flagsRaw = {

}
country = ""
for element in content.children:

    if element.name == "h2":
        titleText = element.find("span").text
        if titleText == "" or titleText == "See also":
            continue
        country = titleText
        flagsRaw[country] = []
    elif element.name == "table":
        if country == "":
            continue
        flagsAnchor = element.findAll(class_= "mw-file-description")
        for flagAnchor in flagsAnchor:
            name = flagAnchor.attrs['title']
            flagImg = flagAnchor.find(class_="mw-file-element") 
            flagRaw = flagImg.attrs['src']
            flag: str = flagRaw.replace("thumb/", "")

            matchEnd = ".svg"
            svgIndex = flag.find(matchEnd)
            if svgIndex == -1:
                flag = flagRaw
            else:   
                flag = flag[:svgIndex+len(matchEnd)]



            flag = {
                "name": name,
                "flag": flag,
                "flagRaw": flagRaw
            }

            flagsRaw[country].append(flag)


## Check the svg links status

In [6]:
# session = requests.Session()

# for key, value in flags.items():
#     for flag in value:
#         response = session.head(flag['flag'])
#         status_code = response.status_code
#         print(flag)
#         print("Status code:", status_code)
#         time.sleep(1)

In [7]:
json_data = json.dumps(flagsRaw, indent=4, ensure_ascii=False).encode('utf8')
file_path = "out/flags_raw.json"
with open(file_path, "w") as json_file:
    json_file.write(json_data)

print("JSON data has been saved to", file_path)

JSON data has been saved to out/flags_raw.json


In [8]:
flags = {
    country: [
        {key: value for key, value in county.items() if key != 'flagRaw'}
        for county in counties
    ]
    for country, counties in flagsRaw.items()
}

## Convert names to ISO 3166 codes

In [9]:
countriesJsonPath = "./in/countries.json"

with open(countriesJsonPath, "r", encoding="utf-8") as json_file:
    countriesJson = json.load(json_file)

In [10]:
FlagsCodes = {} 

for country, regions in flags.items():

    for countryJson in countriesJson:
        if countryJson["name"] == country:
            code = countryJson["alpha-2"]
            break
    FlagsCodes[code] = {
        "name": country,
        "regions": regions
    }
    

In [11]:
flags = FlagsCodes

## Add ISO 3166-2 codes to regions

Using Open Street Maps API: https://nominatim.openstreetmap.org/ui/search.html

In [12]:
urlSearch = "https://nominatim.openstreetmap.org/search.php?q=$1&countrycodes=$2&format=jsonv2"
def getRegionURL(regionName,countryCode):
    return urlSearch.replace("$1",regionName).replace("$2",countryCode)

urlReverseId = "https://nominatim.openstreetmap.org/details.php?osmtype=R&osmid=$1&format=json"
def getReverseIdURL(id):
    return urlReverseId.replace("$1",id)

def fetchData(api_url):
    try:
        response = requests.get(api_url)

        if response.status_code == 200:

            data = response.json()
            return data
        else:
            print(f"Request failed with status code: {response.status_code}")
    except requests.exceptions.RequestException as e:
        print("An error occurred:", e)

In [13]:
flagsRegionCodes = copy.deepcopy(FlagsCodes)


def addResult(regionsMap, region, name, matchData):
    id = matchData['osm_id']
    detailsUrl = getReverseIdURL(str(id))
    print(id)
    detailsData = fetchData(detailsUrl)

    if detailsData is None:
        print("No details data")
        print(region)
        return False
                

    isoCode = detailsData['names'].get('ISO3166-2')

    if isoCode is None:
        print("No ISO Code")
        print(region)
        return False    

    nativeName = detailsData['names'].get('name')

    if isoCode in regionsMap:
        print("ISO code already exists ISO: " +isoCode)
        print(region)
        return False
                

    regionsMap[isoCode] = {
                "name": name,
                "nativeName": nativeName,
                "flag": region['flag']
            }
    return True

for countryCode, country in FlagsCodes.items():
    print(countryCode)
    regionsMap = {}

    for region in country['regions']:
        name = region['name']
        api_url = getRegionURL(name, countryCode)

        data = fetchData(api_url)
        if data:
            it = (d for d in data if d.get("type") == "administrative")

            matchData = next(it, None)
                
            while matchData:
                suceed = addResult(regionsMap, region, name, matchData)
                if not suceed:
                    matchData = next(it, None)
                    if matchData is not None:
                        print("Trying next match")
                else:
                    matchData = None
            
        else:
            print("No match data")
    
    flagsRegionCodes[countryCode]['regions'] = regionsMap

AL
1252289
1249872
1250609
1252589
1759889
1250098
1255521
AD
2804753
2804754
2804755
2804756
2804757
2804758
No match data
AQ
No match data
No match data
No match data
No match data
No match data
No match data
No match data
No match data
AR
1224652
4683201
No ISO Code
{'name': 'Buenos Aires Province', 'flag': 'https://upload.wikimedia.org/wikipedia/commons/1/15/Bandera_de_la_Provincia_de_Buenos_Aires.svg'}
6587642
No ISO Code
{'name': 'Chaco Province', 'flag': 'https://upload.wikimedia.org/wikipedia/commons/3/33/Bandera_de_la_Provincia_del_Chaco.svg'}
153548
153551
6413250
No ISO Code
{'name': 'Misiones Province', 'flag': 'https://upload.wikimedia.org/wikipedia/commons/c/ce/Bandera_de_la_Provincia_de_Misiones.svg'}
1606727
153547
16155061
No ISO Code
{'name': 'San Juan Province', 'flag': 'https://upload.wikimedia.org/wikipedia/commons/c/c1/Bandera_de_la_Provincia_de_San_Juan.svg'}
Trying next match
4764377
No ISO Code
{'name': 'San Juan Province', 'flag': 'https://upload.wikimedia.org

In [14]:
flags = flagsRegionCodes

## Save final flags

In [16]:
json_data = json.dumps(flags, indent=4, ensure_ascii=False).encode('utf8').decode()
file_path = "out/flags.json"
with open(file_path, "w") as json_file:
    json_file.write(json_data)

print("JSON data has been saved to", file_path)

JSON data has been saved to out/flags.json
