In [1]:
import pandas as pd

# 1) Load the two metadata CSVs with their full paths
meta_country = pd.read_csv(
    r"F:/Ironhack/Databases/Metadata_Country_API_NY.GDP.MKTP.KD.ZG_DS2_en_csv_v2_38363.csv"
)
meta_indicator = pd.read_csv(
    r"F:/Ironhack/Databases/Metadata_Indicator_API_NY.GDP.MKTP.KD.ZG_DS2_en_csv_v2_38363.csv"
)

# 2) Load the GDP data, skipping the 4-line preamble
gdp = pd.read_csv(
    r"F:/Ironhack/Databases/API_NY.GDP.MKTP.KD.ZG_DS2_en_csv_v2_38363.csv",
    skiprows=4,
    low_memory=False      # keep this if you're on the default (C) engine
)

# 3) Normalize column names to simplify merges
for df in (gdp, meta_country, meta_indicator):
    df.columns = (
        df.columns
          .str.strip()
          .str.replace(' ', '_')
          .str.lower()
    )

# 4) Merge country metadata then indicator metadata
combined = (
    gdp
    .merge(meta_country,   on="country_code",   how="left")
    .merge(meta_indicator, on="indicator_code", how="left")
)

# 5) Inspect the result
print("Combined shape:", combined.shape)
print(combined.head())


Combined shape: (266, 79)
                  country_name country_code       indicator_name_x  \
0                        Aruba          ABW  GDP growth (annual %)   
1  Africa Eastern and Southern          AFE  GDP growth (annual %)   
2                  Afghanistan          AFG  GDP growth (annual %)   
3   Africa Western and Central          AFW  GDP growth (annual %)   
4                       Angola          AGO  GDP growth (annual %)   

      indicator_code  1960      1961      1962      1963      1964      1965  \
0  NY.GDP.MKTP.KD.ZG   NaN       NaN       NaN       NaN       NaN       NaN   
1  NY.GDP.MKTP.KD.ZG   NaN  0.469708  7.868623  5.622472  4.689533  5.159536   
2  NY.GDP.MKTP.KD.ZG   NaN       NaN       NaN       NaN       NaN       NaN   
3  NY.GDP.MKTP.KD.ZG   NaN  1.869637  3.725941  7.039191  5.364761  4.105616   
4  NY.GDP.MKTP.KD.ZG   NaN       NaN       NaN       NaN       NaN       NaN   

   ...  unnamed:_69                                             region  

In [2]:
combined

Unnamed: 0,country_name,country_code,indicator_name_x,indicator_code,1960,1961,1962,1963,1964,1965,...,unnamed:_69,region,incomegroup,specialnotes,tablename,unnamed:_5,indicator_name_y,source_note,source_organization,unnamed:_4
0,Aruba,ABW,GDP growth (annual %),NY.GDP.MKTP.KD.ZG,,,,,,,...,,Latin America & Caribbean,High income,,Aruba,,GDP growth (annual %),Gross domestic product is the total income ear...,"Country official statistics, National Statisti...",
1,Africa Eastern and Southern,AFE,GDP growth (annual %),NY.GDP.MKTP.KD.ZG,,0.469708,7.868623,5.622472,4.689533,5.159536,...,,,,"26 countries, stretching from the Red Sea in t...",Africa Eastern and Southern,,GDP growth (annual %),Gross domestic product is the total income ear...,"Country official statistics, National Statisti...",
2,Afghanistan,AFG,GDP growth (annual %),NY.GDP.MKTP.KD.ZG,,,,,,,...,,"Middle East, North Africa, Afghanistan & Pakistan",Low income,The reporting period for national accounts dat...,Afghanistan,,GDP growth (annual %),Gross domestic product is the total income ear...,"Country official statistics, National Statisti...",
3,Africa Western and Central,AFW,GDP growth (annual %),NY.GDP.MKTP.KD.ZG,,1.869637,3.725941,7.039191,5.364761,4.105616,...,,,,"22 countries, stretching from the westernmost ...",Africa Western and Central,,GDP growth (annual %),Gross domestic product is the total income ear...,"Country official statistics, National Statisti...",
4,Angola,AGO,GDP growth (annual %),NY.GDP.MKTP.KD.ZG,,,,,,,...,,Sub-Saharan Africa,Lower middle income,The World Bank systematically assesses the app...,Angola,,GDP growth (annual %),Gross domestic product is the total income ear...,"Country official statistics, National Statisti...",
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
261,Kosovo,XKX,GDP growth (annual %),NY.GDP.MKTP.KD.ZG,,,,,,,...,,Europe & Central Asia,Upper middle income,,Kosovo,,GDP growth (annual %),Gross domestic product is the total income ear...,"Country official statistics, National Statisti...",
262,"Yemen, Rep.",YEM,GDP growth (annual %),NY.GDP.MKTP.KD.ZG,,,,,,,...,,"Middle East, North Africa, Afghanistan & Pakistan",Low income,The World Bank systematically assesses the app...,"Yemen, Rep.",,GDP growth (annual %),Gross domestic product is the total income ear...,"Country official statistics, National Statisti...",
263,South Africa,ZAF,GDP growth (annual %),NY.GDP.MKTP.KD.ZG,,3.844734,6.177931,7.373709,7.939609,6.122798,...,,Sub-Saharan Africa,Upper middle income,Fiscal year end: March 31; reporting period fo...,South Africa,,GDP growth (annual %),Gross domestic product is the total income ear...,"Country official statistics, National Statisti...",
264,Zambia,ZMB,GDP growth (annual %),NY.GDP.MKTP.KD.ZG,,1.361382,-2.490839,3.272393,12.214048,16.647456,...,,Sub-Saharan Africa,Lower middle income,National accounts data were rebased to reflect...,Zambia,,GDP growth (annual %),Gross domestic product is the total income ear...,"Country official statistics, National Statisti...",


In [5]:
combined.columns

Index(['country_name', 'country_code', 'indicator_name_x', 'indicator_code',
       '1960', '1961', '1962', '1963', '1964', '1965', '1966', '1967', '1968',
       '1969', '1970', '1971', '1972', '1973', '1974', '1975', '1976', '1977',
       '1978', '1979', '1980', '1981', '1982', '1983', '1984', '1985', '1986',
       '1987', '1988', '1989', '1990', '1991', '1992', '1993', '1994', '1995',
       '1996', '1997', '1998', '1999', '2000', '2001', '2002', '2003', '2004',
       '2005', '2006', '2007', '2008', '2009', '2010', '2011', '2012', '2013',
       '2014', '2015', '2016', '2017', '2018', '2019', '2020', '2021', '2022',
       '2023', '2024', 'unnamed:_69', 'region', 'incomegroup', 'specialnotes',
       'tablename', 'unnamed:_5', 'indicator_name_y', 'source_note',
       'source_organization', 'unnamed:_4'],
      dtype='object')

In [11]:
brazil_rows = combined[ combined['country_name'] == 'Brazil' ]
brazil_rows[['1960', '1961', '1962', '1963', '1964', '1965', '1966', '1967', '1968',
       '1969', '1970', '1971']].sum().median()

6.650000000000009