In [1]:
# script to scrape flag images from Wikipedia and download images
import pandas as pd
import requests
from bs4 import BeautifulSoup
import os
import pandas as pd
import requests
from PIL import Image
from io import BytesIO


In [24]:

# URL of the Wikipedia page
url = "https://en.wikipedia.org/wiki/List_of_national_flags_of_sovereign_states"

# Send a GET request to the URL
response = requests.get(url)

# Parse the content of the page with BeautifulSoup
soup = BeautifulSoup(response.content, 'html.parser')

# Find the table with the flags
table = soup.find('table', class_='wikitable')

# Initialize lists to store the names and images
names = []
images = []

# Iterate through the rows of the table
for row in table.find_all('tr')[1:]:  # Skip the header row
    flag_cell = row.find('td')
    name_cell = row.find('th')  # Extract the name cell

    # Check if both cells are found
    if flag_cell and name_cell:
        # Extract the country name
        name = name_cell.get_text(strip=True)

        # Extract the flag image URL (from the <img> tag)
        img_tag = flag_cell.find('img')
        if img_tag and img_tag.has_attr('src'):
            # Construct the full image URL
            img_url = "https:" + img_tag['src']
        else:
            img_url = None

        # Append the results to the lists
        names.append(name)
        images.append(img_url)

# Create a DataFrame with the results
flags_df = pd.DataFrame({
    'Country': names,
    'Flag Image': images
})

# Display the DataFrame

# Optionally, save the DataFrame to a CSV file
flags_df.to_csv('national_flags.csv', index=False)


In [25]:
flags_df

Unnamed: 0,Country,Flag Image
0,Afghanistan,https://upload.wikimedia.org/wikipedia/commons...
1,Albania,https://upload.wikimedia.org/wikipedia/commons...
2,Algeria,https://upload.wikimedia.org/wikipedia/commons...
3,Andorra,https://upload.wikimedia.org/wikipedia/commons...
4,Angola,https://upload.wikimedia.org/wikipedia/commons...
...,...,...
190,Venezuela,https://upload.wikimedia.org/wikipedia/commons...
191,Vietnam,https://upload.wikimedia.org/wikipedia/commons...
192,Yemen,https://upload.wikimedia.org/wikipedia/commons...
193,Zambia,https://upload.wikimedia.org/wikipedia/commons...


In [None]:

# Directory to store downloaded images
IMAGE_DIR = "images"
if not os.path.exists(IMAGE_DIR):
    os.makedirs(IMAGE_DIR)

# Read the CSV file containing country names and flag URLs
df = pd.read_csv("national_flags.csv")

# Function to download and save images locally
def download_image(url, country_name):
    # Generate local image path based on country name (sanitize name)
    image_path = os.path.join(IMAGE_DIR, f"{country_name}.png")

    # If the image already exists, skip the download
    if os.path.exists(image_path):
        print(f"Image for {country_name} already exists.")
        return

    # Download the image
    try:
        response = requests.get(url)
        response.raise_for_status()  # Check if the request was successful
        img = Image.open(BytesIO(response.content))  # Open image
        img.save(image_path)  # Save image locally
        print(f"Downloaded and saved image for {country_name}")
    except Exception as e:
        print(f"Error downloading image for {country_name}: {e}")

# Loop through the rows of the CSV and download each image
for _, row in df.iterrows():
    country_name = row['Country']
    flag_url = row['Flag Image']
    
    # Sanitize country name (remove brackets, spaces, etc., to make valid filenames)
    sanitized_country_name = country_name.replace(" ", "_").replace("[", "").replace("]", "")
    
    # Download the image
    download_image(flag_url, sanitized_country_name)