In [1]:
# Scraping wiki tables from this list
# https://ru.wikipedia.org/wiki/Список_стран_по_ВВП
from IPython.display import display
import requests
import numpy as np
import pandas as pd
import requests
from bs4 import BeautifulSoup
gdp_dict = {
    "ВВП Номинал": {
        'url': "https://ru.wikipedia.org/wiki/Список_стран_по_ВВП_(номинал)", 
        'table_position_wiki': 1,  # if there are multiple tables in the wiki html
        'columns_slice': [1, 3]},  # what column (usually what bank or what year) you want to pick
    "ВВП Номинал на человека": {
        'url': "https://ru.wikipedia.org/wiki/Список_стран_по_ВВП_(номинал)_на_душу_населения", 
        'table_position_wiki': 0,
        'columns_slice': [1, 2]},
    "ВВП ППС": {
        'url': "https://ru.wikipedia.org/wiki/Список_стран_по_ВВП_(ППС)", 
        'table_position_wiki': 0,
        'columns_slice': [1, 3]},
    "ВВП ППС на человека": {
        'url': "https://ru.wikipedia.org/wiki/Список_стран_по_ВВП_(ППС)_на_душу_населения", 
        'table_position_wiki': 0,
        'columns_slice': [1, 3]},
}
def explore_wiki_tables(url: str):
    tables = scrape_wiki(url)
    for _ in tables:
        display(pd.read_html(str(_))[0])
        
# scraping block
def scrape_wiki(url: str):
    table_class = "wikitable sortable jquery-tablesorter"
    response = requests.get(url)

    # parse data from the html into a beautifulsoup object
    soup = BeautifulSoup(response.text, 'html.parser')
    s = soup.find_all('table',{'class':"wikitable"})
    return s

scrape_results = {}
for k, v in gdp_dict.items():
    scrape_results[k] = scrape_wiki(v['url'])

    
# data processing block
def make_int(val):
    if isinstance(val, str):
        val = val.replace('\xa0', '').replace(' ', '')
    try:
        return int(val)
    except Exception as e:
        return np.NaN
    
resulting_dfs = []
for k, v in gdp_dict.items():
    html_table = scrape_results[k][v['table_position_wiki']]
    df = pd.read_html(str(html_table))
    df = pd.DataFrame(df[0])
    df = df.iloc[:, v['columns_slice']]
    df.iloc[:,1] = df.iloc[:,1].apply(make_int)  
    df.columns = ['Страна', k]
    resulting_dfs.append(df)
# write data to an Excel file
writer = pd.ExcelWriter('ВВП_4.xlsx', engine='openpyxl')
for i, _ in enumerate(gdp_dict.keys()):
    resulting_dfs[i].to_excel(writer, sheet_name=_, index=False)

info_page = pd.DataFrame([[k, v['url']] for k, v in gdp_dict.items()], columns=['Показатель', 'Ссылка'])
info_page.to_excel(writer, sheet_name='INFO', index=False)
writer.save()