## Web Scraping of Country GDP From Wikipedia.

## Import libraries

In [1]:
from bs4 import BeautifulSoup
import requests
from csv import writer
import pandas as pd

## Check if connection is successful

In [2]:
url = "https://en.m.wikipedia.org/wiki/List_of_countries_by_GDP_(nominal)"
page = requests.get(url)
page  #200 means connection successful

<Response [200]>

### Extracting and Inspecting each table row

In [3]:
## Extracting page content using html Parser
soup = BeautifulSoup(page.content, "html.parser")

# Check for the table tag by inspecting the html tag on the Wikipedia
table = soup.find("table", class_ = "wikitable") 

# Extracting all the table row using the <tr> tag.
rows = table.find_all("tr")


# Extracting all the table row data  using the <td> and <th> tags.
for row in rows[2:]:
    cells = row.find_all(["td", "th"])
    
    cells_text = [cell.get_text(strip = True) for cell in cells]
    print(cells_text)

['World', '—', '93,863,851', '2021', '87,461,674', '2020', '96,100,091', '2021']
['United States', 'Americas', '25,346,805', '2022', '20,893,746', '2020', '22,996,100', '2021']
['China', 'Asia', '19,911,593', '[n 2]2022', '14,722,801', '[n 3]2020', '17,734,063', '2021']
['Japan', 'Asia', '4,912,147', '2022', '5,057,759', '2020', '4,937,422', '2021']
['Germany', 'Europe', '4,256,540', '2022', '3,846,414', '2020', '4,223,116', '2021']
['India', 'Asia', '3,534,743', '2022', '2,664,749', '2020', '3,173,398', '2021']
['United Kingdom', 'Europe', '3,376,003', '2022', '2,764,198', '2020', '3,186,860', '2021']
['France', 'Europe', '2,936,702', '2022', '2,630,318', '2020', '2,937,473', '2021']
['Canada', 'Americas', '2,221,218', '2022', '1,644,037', '2020', '1,990,762', '2021']
['Italy', 'Europe', '2,058,330', '2022', '1,888,709', '2020', '2,099,880', '2021']
['Brazil', 'Americas', '1,833,274', '2022', '1,444,733', '2020', '1,608,981', '2021']
['Russia', 'Europe', '1,829,050', '2022', '1,483,49

['Malawi', 'Africa', '12,042', '2022', '11,762', '2020', '12,627', '2021']
['Mauritius', 'Africa', '11,263', '2022', '10,921', '2020', '11,157', '2021']
['New Caledonia', 'Oceania', '—', '9,709', '2020', '9,436', '2020']
['Kosovo', 'Europe', '9,660', '2022', '7,734', '2020', '9,007', '2021']
['Mauritania', 'Africa', '9,280', '2022', '7,916', '2020', '8,228', '2021']
['Kyrgyzstan', 'Asia', '9,017', '2022', '7,736', '2020', '8,543', '2021']
['Togo', 'Africa', '8,699', '2022', '7,146', '2020', '8,413', '2021']
['Somalia', 'Africa', '8,491', '2022', '1,873', '2020', '7,293', '2021']
['Tajikistan', 'Asia', '7,820', '2022', '7,997', '2020', '8,746', '2021']
['Bermuda', 'Americas', '—', '7,719', '2020', '7,081', '2021']
['Liechtenstein', 'Europe', '—', '6,872', '2020', '6,427', '2019']
['Monaco', 'Europe', '—', '6,816', '2020', '6,816', '2020']
['Cayman Islands', 'Americas', '—', '6,256', '2020', '5,609', '2020']
['Montenegro', 'Europe', '6,018', '2022', '4,789', '2020', '5,809', '2021']
['Gu

## Create CSV and write headers and data into table

In [4]:
with open("countries_GDP.csv", "w", encoding = "utf8", newline = '') as f:
    thewriter = writer(f)
    header = ["Country/Territory", "Subregion", "Region", "IMF_Estimate", "IMF_Year", "UN_Estimate", "UN_Year", "WB_Estimate", "WB_Year"]
    thewriter.writerow(header)
    
    for row in rows[2:]:
        cells = row.find_all(["td", "th"])
        
        cells_text = [cell.get_text(strip = True) for cell in cells]
        thewriter.writerow(cells_text)

## Confirm by importing the data

In [15]:
data = pd.read_csv("countries_GDP.csv")
data.head(10)

Unnamed: 0,Country/Territory,Subregion,Region,IMF_Estimate,IMF_Year,UN_Estimate,UN_Year,WB_Estimate,WB_Year
0,World,—,93863851,2021,87461674,2020,96100091,2021,
1,United States,Americas,25346805,2022,20893746,2020,22996100,2021,
2,China,Asia,19911593,[n 2]2022,14722801,[n 3]2020,17734063,2021,
3,Japan,Asia,4912147,2022,5057759,2020,4937422,2021,
4,Germany,Europe,4256540,2022,3846414,2020,4223116,2021,
5,India,Asia,3534743,2022,2664749,2020,3173398,2021,
6,United Kingdom,Europe,3376003,2022,2764198,2020,3186860,2021,
7,France,Europe,2936702,2022,2630318,2020,2937473,2021,
8,Canada,Americas,2221218,2022,1644037,2020,1990762,2021,
9,Italy,Europe,2058330,2022,1888709,2020,2099880,2021,


In [9]:
data.tail(10)

Unnamed: 0,Country/Territory,Subregion,Region,IMF_Estimate,IMF_Year,UN_Estimate,UN_Year,WB_Estimate,WB_Year
207,São Tomé and Príncipe,Africa,526,2022,476,2020,547.0,2021.0,
208,Micronesia,Oceania,427,2022,403,2020,404.0,2021.0,
209,Cook Islands,Oceania,—,283,2020,—,,,
210,Marshall Islands,Oceania,267,2022,244,2020,249.0,2021.0,
211,Anguilla,Americas,—,258,2020,—,,,
212,Palau,Oceania,244,2022,264,2020,258.0,2020.0,
213,Kiribati,Oceania,216,2022,181,2020,181.0,2020.0,
214,Nauru,Oceania,134,2022,135,2020,133.0,2021.0,
215,Montserrat,Americas,—,68,2020,—,,,
216,Tuvalu,Oceania,66,2022,55,2020,63.0,2021.0,
