In [43]:
pip install pandas

Note: you may need to restart the kernel to use updated packages.


In [58]:
import pandas as pd

# Read the CSV file into a DataFrame, skipping metadata rows
worldbank_df = pd.read_csv('data/World_Bank/World_Bank_GDP.csv', encoding='utf-8')

# Print the column names to identify the exact names
print(worldbank_df.columns)

Index(['Country Name', 'Country Code', '1990 [YR1990]', '2000 [YR2000]',
       '2014 [YR2014]', '2015 [YR2015]', '2016 [YR2016]', '2017 [YR2017]',
       '2018 [YR2018]', '2019 [YR2019]', '2020 [YR2020]', '2021 [YR2021]',
       '2022 [YR2022]', '2023 [YR2023]'],
      dtype='object')


In [59]:
# Select only the 'Country Name' and '2023 [YR2023]' columns
gdp_df = worldbank_df[['Country Name', '2023 [YR2023]']]

# Rename columns for clarity
gdp_df.columns = ['Country Name', 'GDP']

In [60]:
# Remove rows where GDP is null or contains '..'
gdp_df = gdp_df[gdp_df['GDP'].notnull() & (gdp_df['GDP'] != '..')]

In [61]:
# Optionally, convert GDP to numeric (this will handle any non-numeric values)
gdp_df['GDP'] = pd.to_numeric(gdp_df['GDP'], errors='coerce')

In [62]:
# Remove rows where GDP could not be converted to numeric
gdp_df = gdp_df[gdp_df['GDP'].notnull()]

# Reset index
gdp_df = gdp_df.reset_index(drop=True)

In [63]:
# Print the resulting DataFrame
print(gdp_df)

            Country Name           GDP
0                Albania  2.297768e+10
1                Algeria  2.398995e+11
2                Andorra  3.727674e+09
3                 Angola  8.472296e+10
4    Antigua and Barbuda  2.033085e+09
..                   ...           ...
181              Vanuatu  1.126313e+09
182             Viet Nam  4.297170e+11
183   West Bank and Gaza  1.739630e+10
184               Zambia  2.816263e+10
185             Zimbabwe  2.653827e+10

[186 rows x 2 columns]


In [64]:
# Convert GDP to millions
gdp_df['GDP'] = gdp_df['GDP'] / 1_000_000

# Optionally, format GDP values to two decimal places
gdp_df['GDP'] = gdp_df['GDP'].apply(lambda x: f"{x:.2f}M")

# Print the resulting DataFrame
print(gdp_df)

            Country Name         GDP
0                Albania   22977.68M
1                Algeria  239899.49M
2                Andorra    3727.67M
3                 Angola   84722.96M
4    Antigua and Barbuda    2033.09M
..                   ...         ...
181              Vanuatu    1126.31M
182             Viet Nam  429716.97M
183   West Bank and Gaza   17396.30M
184               Zambia   28162.63M
185             Zimbabwe   26538.27M

[186 rows x 2 columns]
