In [1]:
# Importing essential libraries
import numpy as np
import pandas as pd

In [2]:
# Load GDP (nominal) table(s) from the archived Wikipedia page
URL = "https://web.archive.org/web/20230902185326/https://en.wikipedia.org/wiki/List_of_countries_by_GDP_%28nominal%29"





Note that the required table is the third one on the website, as shown in the image below.

<img src="https://cf-courses-data.s3.us.cloud-object-storage.appdomain.cloud/IBMDeveloperSkillsNetwork-PY0101EN-SkillsNetwork/images/pandas_wbs_3.png">




In [3]:
# Extract all HTML tables from the given webpage.
# read_html() scans the page for <table> tags and converts each into a DataFrame.
# Returns: a list of DataFrames, one per table found on the webpage.
tables = pd.read_html(URL)

In [4]:
#Retain table number 3 as the required dataframe.
df=tables[3]
df

Unnamed: 0_level_0,Country/Territory,UN region,IMF[1][13],IMF[1][13],World Bank[14],World Bank[14],United Nations[15],United Nations[15]
Unnamed: 0_level_1,Country/Territory,UN region,Estimate,Year,Estimate,Year,Estimate,Year
0,World,—,105568776,2023,100562011,2022,96698005,2021
1,United States,Americas,26854599,2023,25462700,2022,23315081,2021
2,China,Asia,19373586,[n 1]2023,17963171,[n 3]2022,17734131,[n 1]2021
3,Japan,Asia,4409738,2023,4231141,2022,4940878,2021
4,Germany,Europe,4308854,2023,4072192,2022,4259935,2021
...,...,...,...,...,...,...,...,...
209,Anguilla,Americas,—,—,—,—,303,2021
210,Kiribati,Oceania,248,2023,223,2022,227,2021
211,Nauru,Oceania,151,2023,151,2022,155,2021
212,Montserrat,Americas,—,—,—,—,72,2021


In [5]:
# Replacing the column names with column numbers
df.columns = range(df.shape[1])
df

Unnamed: 0,0,1,2,3,4,5,6,7
0,World,—,105568776,2023,100562011,2022,96698005,2021
1,United States,Americas,26854599,2023,25462700,2022,23315081,2021
2,China,Asia,19373586,[n 1]2023,17963171,[n 3]2022,17734131,[n 1]2021
3,Japan,Asia,4409738,2023,4231141,2022,4940878,2021
4,Germany,Europe,4308854,2023,4072192,2022,4259935,2021
...,...,...,...,...,...,...,...,...
209,Anguilla,Americas,—,—,—,—,303,2021
210,Kiribati,Oceania,248,2023,223,2022,227,2021
211,Nauru,Oceania,151,2023,151,2022,155,2021
212,Montserrat,Americas,—,—,—,—,72,2021


In [6]:
# Retain columns with index 0 and 2 (name of country and value of GDP)
df=df[[0,2]]
df

Unnamed: 0,0,2
0,World,105568776
1,United States,26854599
2,China,19373586
3,Japan,4409738
4,Germany,4308854
...,...,...
209,Anguilla,—
210,Kiribati,248
211,Nauru,151
212,Montserrat,—


In [7]:
# Retain the Rows with index 1 to 10, indicating the top 10 economies of the world.
df=df[1:11]
df

Unnamed: 0,0,2
1,United States,26854599
2,China,19373586
3,Japan,4409738
4,Germany,4308854
5,India,3736882
6,United Kingdom,3158938
7,France,2923489
8,Italy,2169745
9,Canada,2089672
10,Brazil,2081235


In [8]:
# Assign column names as "Country" and "GDP"
df.columns=['Country','GDP']
df

Unnamed: 0,Country,GDP
1,United States,26854599
2,China,19373586
3,Japan,4409738
4,Germany,4308854
5,India,3736882
6,United Kingdom,3158938
7,France,2923489
8,Italy,2169745
9,Canada,2089672
10,Brazil,2081235


In [9]:
# Change the data type of the 'GDP (Million USD)' column to integer. Use astype() method.
df = df.copy()
df['GDP'] = df['GDP'].astype(int)
df

Unnamed: 0,Country,GDP
1,United States,26854599
2,China,19373586
3,Japan,4409738
4,Germany,4308854
5,India,3736882
6,United Kingdom,3158938
7,France,2923489
8,Italy,2169745
9,Canada,2089672
10,Brazil,2081235


In [10]:
# Convert the GDP value in Million USD to Billion USD
df.loc[:,'GDP'] = df['GDP']/1000
df

Unnamed: 0,Country,GDP
1,United States,26854.599
2,China,19373.586
3,Japan,4409.738
4,Germany,4308.854
5,India,3736.882
6,United Kingdom,3158.938
7,France,2923.489
8,Italy,2169.745
9,Canada,2089.672
10,Brazil,2081.235


In [12]:
# Use numpy.round() method to round the value to 2 decimal places.
df.loc[:,'GDP'] = np.round(df.loc[1:,'GDP'],2)
df

Unnamed: 0,Country,GDP
1,United States,26854.6
2,China,19373.59
3,Japan,4409.74
4,Germany,4308.85
5,India,3736.88
6,United Kingdom,3158.94
7,France,2923.49
8,Italy,2169.74
9,Canada,2089.67
10,Brazil,2081.24


In [13]:
# Rename the column header from 'GDP (Million USD)' to 'GDP (Billion USD)'
df=df.rename(columns = {'GDP' : 'GDP (Billion USD)'})
df

Unnamed: 0,Country,GDP (Billion USD)
1,United States,26854.6
2,China,19373.59
3,Japan,4409.74
4,Germany,4308.85
5,India,3736.88
6,United Kingdom,3158.94
7,France,2923.49
8,Italy,2169.74
9,Canada,2089.67
10,Brazil,2081.24


In [14]:
# Load the DataFrame to the CSV file named "Largest_economies.csv"
df.to_csv('top_10_gdp_countries.csv')
df

Unnamed: 0,Country,GDP (Billion USD)
1,United States,26854.6
2,China,19373.59
3,Japan,4409.74
4,Germany,4308.85
5,India,3736.88
6,United Kingdom,3158.94
7,France,2923.49
8,Italy,2169.74
9,Canada,2089.67
10,Brazil,2081.24
