In [1]:
import pandas as pd
import requests
from bs4 import BeautifulSoup
import os
import time

In [2]:
df = pd.read_csv('data/country_list.csv')

In [3]:
df.head()

Unnamed: 0.1,Unnamed: 0,continent,country,capital
0,0,Asia,Afghanistan,Kabul
1,1,Asia,Armenia,Yerevan
2,2,Asia,Azerbaijan,Baku
3,3,Asia,Bahrain,Manama
4,4,Asia,Bangladesh,Dhaka


In [11]:
#✅✅✅✅✅✅✅

def download_flag(country):
    try:
        # Construct Wikipedia URL (replace spaces with underscores)
        url = f'https://en.wikipedia.org/wiki/{country.replace(" ", "_")}'
        response = requests.get(url, timeout=10)
        response.raise_for_status()  # Raise HTTPError for bad responses

        soup = BeautifulSoup(response.text, 'html.parser')

        # Find flag image - this method looks for image with 'Flag of' in alt
        flag_img = soup.find('img', alt=lambda x: x and 'Flag of' in x)

        if not flag_img:
            print(f" Flag not found for: {country}")
            return

        # Get full image URL
        img_url = 'https:' + flag_img['src']
        img_data = requests.get(img_url, timeout=10).content

        # Clean filename
        filename = f"{country.replace(' ', '_')}_flag.png"
       
        os.makedirs('flag_image', exist_ok=True)
        # with open(filename, 'wb') as f:
        #     f.write(img_data)


        with open(os.path.join('flag_image', filename), 'wb') as file:
                file.write(img_data)
        
        print(f" Flag downloaded for: {country}")

    except requests.exceptions.RequestException as e:
        print(f" Network error for {country}: {e}")
    except Exception as e:
        print(f" Unexpected error for {country}: {e}")


country_list = df['country'][0:4]

for country in country_list:
    download_flag(country)
    time.sleep(1)  

 Flag downloaded for: Afghanistan
 Flag downloaded for: Armenia
 Flag not found for: Azerbaijan
 Flag downloaded for: Bahrain


In [57]:
# List of Images
folder_path ='flag_image'
file_names = os.listdir(folder_path)
image_files = [f for f in file_names if f.endswith('.png')]

df_flag = pd.DataFrame(image_files, columns=['image_name'])

In [59]:
df_flag.head(2)

Unnamed: 0,image_name
0,Afghanistan_flag.png
1,Albania_flag.png


In [61]:
df_flag['country'] = df_flag['image_name'].apply(lambda x: x.split("_flag.png")[0])

In [63]:
df_flag.head()

Unnamed: 0,image_name,country
0,Afghanistan_flag.png,Afghanistan
1,Albania_flag.png,Albania
2,Algeria_flag.png,Algeria
3,Andorra_flag.png,Andorra
4,Angola_flag.png,Angola


In [65]:
df_flag['country'] = df_flag['country'].str.replace('_',' ')

In [67]:
df_flag.head()

Unnamed: 0,image_name,country
0,Afghanistan_flag.png,Afghanistan
1,Albania_flag.png,Albania
2,Algeria_flag.png,Algeria
3,Andorra_flag.png,Andorra
4,Angola_flag.png,Angola


In [69]:
df_flag.to_csv('data/df_flag.csv')

In [71]:
df_flag.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 196 entries, 0 to 195
Data columns (total 2 columns):
 #   Column      Non-Null Count  Dtype 
---  ------      --------------  ----- 
 0   image_name  196 non-null    object
 1   country     196 non-null    object
dtypes: object(2)
memory usage: 3.2+ KB
