In [1]:
import pandas as pd
import requests
from bs4 import BeautifulSoup
import os

In [2]:
df = pd.read_csv('data/country_list.csv')

In [3]:
df.head()

Unnamed: 0.1,Unnamed: 0,continent,country,capital
0,0,Asia,Afghanistan,Kabul
1,1,Asia,Armenia,Yerevan
2,2,Asia,Azerbaijan,Baku
3,3,Asia,Bahrain,Manama
4,4,Asia,Bangladesh,Dhaka


In [None]:
headers = {
    'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64)'
}

countries = df['country']

for country in countries:
    # country_encoded = quote(country.replace(" ", "_"))
    file_name = f"{country}_(orthographic_projection).svg"
    url = f"https://en.wikipedia.org/wiki/File:{file_name}"

    try:
        html = requests.get(url, headers=headers).text
        soup = BeautifulSoup(html, 'html.parser')
        con = soup.find('div', id='file')
        if not con:
            print(f"[{country}] - Image container not found.")
            continue
        
        globe_tag = con.find('img')
        if not globe_tag or not globe_tag.get('src'):
            print(f"[{country}] - Image tag or src not found.")
            continue
        
        globe_url = 'https:' + globe_tag['src']
        globe_name = country.replace(" ", "_") + '.png'
        
        os.makedirs('globe_image', exist_ok=True)
        img_response = requests.get(globe_url, headers=headers)
        
        if img_response.status_code == 200:
            with open(os.path.join('globe_image', globe_name), 'wb') as file:
                file.write(img_response.content)
            print(f"[{country}] - File downloaded.")
        else:
            print(f"[{country}] - Failed to download image.")
    
    except Exception as e:
        print(f"[{country}] - Error: {e}")


In [9]:
folder_path ='globe_image'
file_names = os.listdir(folder_path)
image_files = [f for f in file_names if f.endswith('.png')]

df_globe = pd.DataFrame(image_files, columns=['image_globe'])

In [13]:
df_globe['image_globe'].str.split('.')[0][0]

'Afghanistan'

In [15]:
df_globe.head(5)

Unnamed: 0,image_globe
0,Afghanistan.png
1,Albania.png
2,Algeria.png
3,Angola.png
4,Argentina.png


In [17]:
df_globe['country'] = df_globe['image_globe'].apply(lambda x: x.split('.')[0])

In [19]:
df_globe.head()

Unnamed: 0,image_globe,country
0,Afghanistan.png,Afghanistan
1,Albania.png,Albania
2,Algeria.png,Algeria
3,Angola.png,Angola
4,Argentina.png,Argentina


In [23]:
df_globe.loc[:,'country']= df_globe['country'].str.replace('_',' ')

In [27]:
df_globe.tail(30)

Unnamed: 0,image_globe,country
120,South_Korea.png,South Korea
121,South_Sudan.png,South Sudan
122,Spain.png,Spain
123,Sri_Lanka.png,Sri Lanka
124,Sudan.png,Sudan
125,Suriname.png,Suriname
126,Sweden.png,Sweden
127,Switzerland.png,Switzerland
128,Syria.png,Syria
129,São_Tomé_and_Príncipe.png,São Tomé and Príncipe


In [None]:
df_globe.to_csv('data/df_globe.csv')