In [79]:
# Importing the Beautiful Soup library for web scraping
import requests
from bs4 import BeautifulSoup

In [80]:
url = 'https://datatopics.worldbank.org/debt/ids/countryanalytical/VNM'
page = requests.get(url)
soup = BeautifulSoup(page.text, 'html')

In [81]:
print(soup.prettify())

<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
<html xmlns="http://www.w3.org/1999/xhtml" xmlns:og="http://ogp.me/ns#">
 <head>
  <script charset="UTF-8" src="/debt/IDS/js/appdynamics.js" type="text/javascript">
  </script>
  <meta content="no-referrer" name="referrer"/>
  <title>
   IDS Online Tables_Analytical | International Debt Statistics
  </title>
  <meta content="IDS Online Tables_Analytical" name="title"/>
  <meta content="IDS Online Tables_Analytical" property="og:title"/>
  <meta content="IE=edge,chrome=1" http-equiv="X-UA-Compatible"/>
  <script async="async" src="//script.crazyegg.com/pages/scripts/0058/1350.js" type="text/javascript">
  </script>
  <link href="/debt/IDS/images/favicon.ico" rel="shortcut icon" type="image/x-icon"/>
  <link href="/debt/IDS/views/styles/template/jquery-ui.css" rel="stylesheet" type="text/css"/>
  <script src="/debt/IDS/views/scripts/template/jquery.min.js" type="text/

In [82]:
# Find the headings
header_row = soup.find('tr', class_='hdrrow-4 hdrrow')
header_row

<tr class="hdrrow-4 hdrrow"><td class="cell1 header info-cell">$ millions, unless otherwise indicated</td><td class="cell-separator header"><div>2010</div></td><td class="cell-separator header"><div>2011</div></td><td class="cell-separator header"><div>2012</div></td><td class="cell-separator header"><div>2013</div></td><td class="cell-separator header"><div>2014</div></td><td class="cell-separator header"><div>2015</div></td><td class="cell-separator header"><div>2016</div></td><td class="cell-separator header"><div>2017</div></td><td class="cell-separator header"><div>2018</div></td><td class="cell-separator header"><div>2019</div></td><td class="cell-separator header"><div>2020</div></td><td class="cell-separator header"><div>2021</div></td></tr>

In [83]:
# Get text and strip
headings = [td.text.strip() for td in header_row.find_all('td')]
headings

['$ millions, unless otherwise indicated',
 '2010',
 '2011',
 '2012',
 '2013',
 '2014',
 '2015',
 '2016',
 '2017',
 '2018',
 '2019',
 '2020',
 '2021']

In [84]:
# Put headings into a dataframe
import pandas as pd
df = pd.DataFrame(columns = headings)
df

Unnamed: 0,"$ millions, unless otherwise indicated",2010,2011,2012,2013,2014,2015,2016,2017,2018,2019,2020,2021


In [85]:
# Find table content (rows)
table_rows = soup.find(id='table-ddpreport-data')
table_rows

<table class="Silver" id="table-ddpreport-data"><tr class="filler"><td class="cell1"></td><td class="data"></td><td class="data"></td><td class="data"></td><td class="data"></td><td class="data"></td><td class="data"></td><td class="data"></td><td class="data"></td><td class="data"></td><td class="data"></td><td class="data"></td><td class="data"></td></tr><tr class="custom-row"><td class="cell-R1Sub1 cell" colspan="13">Summary external debt data by debtor type</td></tr><tr class="rowdata level-0"><td class="header cell1 metadata cell-R1" onclick="loadMetaData('DT.DOD.DECT.CD','S','Series','External debt stocks, total (DOD, current US$)','14','101011')">Total External debt stocks</td><td class="data"><div class="spacer2">44,940.7</div></td><td class="data"><div class="spacer2">53,925.8</div></td><td class="data"><div class="spacer2">61,613.1</div></td><td class="data"><div class="spacer2">65,484.3</div></td><td class="data"><div class="spacer2">72,454.1</div></td><td class="data"><div 

In [86]:
# Get data, strip and put content into the dataframe
for row in table_rows:
    row_data = row.find_all('td')
    individual_row_data = [data.text.strip() for data in row_data]
    if any(individual_row_data):    # Remove blank rows
        #print(individual_row_data)
        if len(individual_row_data) == len(headings):
            df.loc[len(df)] = individual_row_data

In [87]:
df.head()

Unnamed: 0,"$ millions, unless otherwise indicated",2010,2011,2012,2013,2014,2015,2016,2017,2018,2019,2020,2021
0,Total External debt stocks,44940.7,53925.8,61613.1,65484.3,72454.1,77832.9,85665.5,104090.7,106855.3,117338.1,125065.0,136213.2
1,Use of IMF Credit and SDR allocations,529.4,496.0,483.8,484.8,456.1,436.2,423.2,448.3,437.8,435.3,453.4,1987.4
2,Long-term external debt,37478.8,42648.4,48778.9,52830.2,58393.9,65406.0,72522.2,81744.3,86832.6,92505.5,98008.0,100992.8
3,Public and publicly guaranteed sector,32798.8,36786.4,39972.9,42969.5,44834.7,46366.9,48038.7,51790.5,52115.5,51782.5,52200.0,48535.7
4,Public sector,32798.8,36670.4,39817.4,42760.0,44606.7,46148.8,47843.1,51620.2,51970.6,51662.9,52105.8,48466.9


In [88]:
# Define the dictionary mapping values for the indentation levels of "$ millions, unless otherwise indicated" column values
indentation_levels = {
    'Total External debt stocks': [0],
    'Use of IMF Credit and SDR allocations': [1, 0],
    'Long-term external debt': [1],
    'Public and publicly guaranteed sector': [2, 1, 1, 1],
    'Public sector': [3, 2, 2, 2],
    'of which: General Government': [4, 3, 3, 3],
    'Private sector guaranteed by public sector': [3, 2, 2, 2],
    'Private sector not guaranteed': [2, 1, 1, 1],
    'Short-term external debt': [1],
    'Disbursements (long-term)': [0],
    'Principal repayments (long-term)': [0],
    'Interest payments (long-term)': [0],
    'Long-term External debt stocks': [0],
    'Public and publicly guaranteed debt from:': [1],
    'Official creditors': [2, 2],
    'Multilateral': [3, 3],
    'of which: World Bank': [4, 4],
    'Bilateral': [3, 3],
    'Private creditors': [2, 2],
    'Bondholders': [3, 3],
    'Commercial banks and others': [3],
    'Use of IMF Credit': [1],
    'Long-term': [1],
    'Banks and other private': [3],
    'Short-term': [1],
    'Foreign direct investment': [1],
    'Portfolio equity': [1],
    'External debt stocks to exports (%)': [0],
    'External debt stocks to GNI (%)': [0],
    'Debt service to exports (%)': [0],
    'Short-term to external debt stocks (%)': [0],
    'Multilateral to external debt stocks (%)': [0],
    'Reserves to external debt stocks (%)': [0],
    'Gross national income (GNI)': [0]     
}

In [89]:
# Define function to apply indentation based on indentation levels
def apply_indentation(value, indentation_levels):
    indentation = ' ' * (indentation_levels.get(value, [0])[-1] * 4)    # Use 4 spaces for each level of indentation
    return indentation + str(value)

In [90]:
# Apply indentation to the column values
df['$ millions, unless otherwise indicated'] = df['$ millions, unless otherwise indicated'].apply(lambda x: apply_indentation(x, indentation_levels))

In [91]:
df.head(10)

Unnamed: 0,"$ millions, unless otherwise indicated",2010,2011,2012,2013,2014,2015,2016,2017,2018,2019,2020,2021
0,Total External debt stocks,44940.7,53925.8,61613.1,65484.3,72454.1,77832.9,85665.5,104090.7,106855.3,117338.1,125065.0,136213.2
1,Use of IMF Credit and SDR allocations,529.4,496.0,483.8,484.8,456.1,436.2,423.2,448.3,437.8,435.3,453.4,1987.4
2,Long-term external debt,37478.8,42648.4,48778.9,52830.2,58393.9,65406.0,72522.2,81744.3,86832.6,92505.5,98008.0,100992.8
3,Public and publicly guaranteed sector,32798.8,36786.4,39972.9,42969.5,44834.7,46366.9,48038.7,51790.5,52115.5,51782.5,52200.0,48535.7
4,Public sector,32798.8,36670.4,39817.4,42760.0,44606.7,46148.8,47843.1,51620.2,51970.6,51662.9,52105.8,48466.9
5,of which: General Government,26995.1,29550.5,30558.7,30947.0,31119.3,32055.6,33256.9,36159.8,36739.3,36672.3,36976.4,34200.9
6,Private sector guaranteed by public se...,..,116.0,155.5,209.5,228.0,218.1,195.6,170.2,144.9,119.6,94.2,68.9
7,Private sector not guaranteed,4680.0,5862.0,8806.0,9860.7,13559.2,19039.2,24483.6,29953.8,34717.1,40723.1,45808.0,52457.0
8,Short-term external debt,6932.5,10781.4,12350.4,12169.3,13604.2,11990.7,12720.0,21898.1,19584.9,24397.2,26603.7,33233.0
9,Disbursements (long-term),6921.8,6968.0,8727.7,9493.2,13808.3,12951.1,13117.8,18660.7,20335.9,18674.4,17467.0,23371.0


In [92]:
df.dtypes

$ millions, unless otherwise indicated    object
2010                                      object
2011                                      object
2012                                      object
2013                                      object
2014                                      object
2015                                      object
2016                                      object
2017                                      object
2018                                      object
2019                                      object
2020                                      object
2021                                      object
dtype: object

In [93]:
to_float = ["2010", "2011", "2012", "2013", "2014", "2015", "2016", "2017", "2018", "2019", "2020", "2021"]

In [94]:
import numpy as np

In [95]:
for column in to_float:
    df[column] = df[column].astype(str).str.replace(",", "")    # Remove commas to convert to float
    df[column] = df[column].replace('..', np.nan)               # Replace '..' values with NaN
    df[column] = df[column].astype(float)                       # Convert to float

In [96]:
df.dtypes

$ millions, unless otherwise indicated     object
2010                                      float64
2011                                      float64
2012                                      float64
2013                                      float64
2014                                      float64
2015                                      float64
2016                                      float64
2017                                      float64
2018                                      float64
2019                                      float64
2020                                      float64
2021                                      float64
dtype: object

In [97]:
df.head(10)

Unnamed: 0,"$ millions, unless otherwise indicated",2010,2011,2012,2013,2014,2015,2016,2017,2018,2019,2020,2021
0,Total External debt stocks,44940.7,53925.8,61613.1,65484.3,72454.1,77832.9,85665.5,104090.7,106855.3,117338.1,125065.0,136213.2
1,Use of IMF Credit and SDR allocations,529.4,496.0,483.8,484.8,456.1,436.2,423.2,448.3,437.8,435.3,453.4,1987.4
2,Long-term external debt,37478.8,42648.4,48778.9,52830.2,58393.9,65406.0,72522.2,81744.3,86832.6,92505.5,98008.0,100992.8
3,Public and publicly guaranteed sector,32798.8,36786.4,39972.9,42969.5,44834.7,46366.9,48038.7,51790.5,52115.5,51782.5,52200.0,48535.7
4,Public sector,32798.8,36670.4,39817.4,42760.0,44606.7,46148.8,47843.1,51620.2,51970.6,51662.9,52105.8,48466.9
5,of which: General Government,26995.1,29550.5,30558.7,30947.0,31119.3,32055.6,33256.9,36159.8,36739.3,36672.3,36976.4,34200.9
6,Private sector guaranteed by public se...,,116.0,155.5,209.5,228.0,218.1,195.6,170.2,144.9,119.6,94.2,68.9
7,Private sector not guaranteed,4680.0,5862.0,8806.0,9860.7,13559.2,19039.2,24483.6,29953.8,34717.1,40723.1,45808.0,52457.0
8,Short-term external debt,6932.5,10781.4,12350.4,12169.3,13604.2,11990.7,12720.0,21898.1,19584.9,24397.2,26603.7,33233.0
9,Disbursements (long-term),6921.8,6968.0,8727.7,9493.2,13808.3,12951.1,13117.8,18660.7,20335.9,18674.4,17467.0,23371.0


In [98]:
#Export to csv file
df.to_csv(r'C:\Users\Admin\Desktop\Python Web Scrapping\Output\VN_International_Debt_Statistics.csv',index = False)