In [2]:
import pandas as pd  
import os


In [3]:
csv_path = os.path.join('..', 'Web Scrapping', 'WITS-Country-Timeseries.xlsx')

# Read the CSV file
df = pd.read_excel(csv_path, sheet_name="Country-Timeseries")  # Using pandas

In [4]:
df.head(50)

Unnamed: 0,Country Name,Indicator Name,2010,2011,2012,2013,2014,2015,2016,2017,2018,2019,2020,2021,2022
0,Afghanistan,GDP per capita (current US$),554.594735,621.912414,663.141053,651.987862,628.146804,592.476165,520.251955,530.149863,502.057099,500.522981,516.866797,363.674087,
1,Albania,GDP per capita (current US$),4094.349699,4437.141146,4247.631356,4413.063397,4578.633208,3952.803584,4124.05539,4531.032207,5287.660817,5396.214227,5343.037704,6377.203096,6802.804519
2,Algeria,GDP per capita (current US$),4495.921455,5473.281801,5610.733306,5519.777576,5516.229463,4197.419971,3967.20066,4134.936099,4171.795309,4021.983608,3354.157303,3700.311195,4273.922183
3,Andorra,GDP per capita (current US$),48237.891173,51428.196955,44902.380765,44747.753864,45680.53499,38885.530324,39931.216982,40632.231554,42904.828456,41328.600499,37207.222,42072.341103,41992.793358
4,Angola,GDP per capita (current US$),3496.784796,4511.153227,4962.552072,5101.983876,5059.080441,3100.830685,1709.515534,2283.214233,2487.500996,2142.238757,1502.950754,1903.717405,2998.501158
5,Antigua and Barbuda,GDP per capita (current US$),13404.516016,13117.146941,13686.476585,13350.149137,14004.811212,14861.882707,15862.651663,16110.3124,17514.355864,18187.779712,15284.772384,16740.348196,18745.173509
6,Argentina,GDP per capita (current US$),10385.964432,12848.740476,13082.664326,13080.254732,12334.798245,13789.060425,12790.264064,14613.035715,11795.162885,9963.674231,8496.428157,10636.11553,13686.008674
7,Armenia,GDP per capita (current US$),3143.029482,3462.681774,3643.715404,3833.157071,4017.229913,3666.141825,3679.952349,4041.995072,4391.923274,4828.504889,4505.867746,4966.513471,7014.206592
8,Aruba,GDP per capita (current US$),24452.928363,26044.435933,25609.955724,26515.67808,26942.307976,28421.386493,28451.273745,29326.708058,30918.515218,31902.762582,24487.863569,29342.10073,
9,Australia,GDP per capita (current US$),52132.469608,62598.686618,68047.378178,68156.386105,62515.314832,56708.961197,49876.712376,53934.154374,57206.99067,54941.06572,51722.069,60444.502355,64491.429886


In [5]:
df.columns.tolist()

['Country Name',
 'Indicator Name',
 '2010',
 '2011',
 '2012',
 '2013',
 '2014',
 '2015',
 '2016',
 '2017',
 '2018',
 '2019',
 '2020',
 '2021',
 '2022']

We need to filter out values from this dataset

In [6]:
df['Indicator Name'].unique()

array(['GDP per capita (current US$)'], dtype=object)

In [7]:
df.dtypes

Country Name       object
Indicator Name     object
2010              float64
2011              float64
2012              float64
2013              float64
2014              float64
2015              float64
2016              float64
2017              float64
2018              float64
2019              float64
2020              float64
2021              float64
2022              float64
dtype: object

In [8]:
df = df.set_index("Country Name")

# Select only numeric columns (excluding 'Indicator Name')
numeric_cols = df.select_dtypes(include=['number'])

# Fill NaN values with the respective country's mean
df[numeric_cols.columns] = numeric_cols.apply(lambda row: row.fillna(row.mean()), axis=1)

# Reset index to bring 'Country Name' back
df = df.reset_index()

In [9]:
df

Unnamed: 0,Country Name,Indicator Name,2010,2011,2012,2013,2014,2015,2016,2017,2018,2019,2020,2021,2022
0,Afghanistan,GDP per capita (current US$),554.594735,621.912414,663.141053,651.987862,628.146804,592.476165,520.251955,530.149863,502.057099,500.522981,516.866797,363.674087,553.815151
1,Albania,GDP per capita (current US$),4094.349699,4437.141146,4247.631356,4413.063397,4578.633208,3952.803584,4124.055390,4531.032207,5287.660817,5396.214227,5343.037704,6377.203096,6802.804519
2,Algeria,GDP per capita (current US$),4495.921455,5473.281801,5610.733306,5519.777576,5516.229463,4197.419971,3967.200660,4134.936099,4171.795309,4021.983608,3354.157303,3700.311195,4273.922183
3,Andorra,GDP per capita (current US$),48237.891173,51428.196955,44902.380765,44747.753864,45680.534990,38885.530324,39931.216982,40632.231554,42904.828456,41328.600499,37207.222000,42072.341103,41992.793358
4,Angola,GDP per capita (current US$),3496.784796,4511.153227,4962.552072,5101.983876,5059.080441,3100.830685,1709.515534,2283.214233,2487.500996,2142.238757,1502.950754,1903.717405,2998.501158
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
188,Venezuela,GDP per capita (current US$),13692.914967,10877.112364,12937.927597,12433.980785,15975.729375,13183.533018,13183.533018,13183.533018,13183.533018,13183.533018,13183.533018,13183.533018,13183.533018
189,Vietnam,GDP per capita (current US$),1684.011667,1953.556979,2190.232284,2367.499542,2558.778924,2595.234979,2760.717101,2992.071746,3267.225009,3491.091279,3586.347297,3756.489122,4163.514300
190,Yemen,GDP per capita (current US$),1249.063085,1284.617635,1349.990295,1497.747941,1557.601406,1488.416267,1069.816997,893.716573,701.714878,693.816484,583.875663,603.707959,676.928385
191,Zambia,GDP per capita (current US$),1469.361450,1644.456831,1729.647471,1840.320553,1724.576220,1307.909649,1249.923143,1495.752138,1475.199836,1268.120941,956.831747,1137.344395,1487.907764


In [10]:
gdp_df=df.copy()

In [11]:
gdp_df.drop(['Indicator Name'], axis=1, inplace=True)

In [12]:
gdp_df.rename(columns={'Country Name': 'Country'},
          inplace=True, errors='raise')

In [14]:
pivoted_gdp_df = gdp_df.melt(id_vars=["Country"], var_name="Year", value_name="GDP per capita")

pivoted_gdp_df["Year"] = pd.to_numeric(pivoted_gdp_df["Year"])

In [15]:
pivoted_gdp_df=pivoted_gdp_df[(pivoted_gdp_df['Year']>=2013) & (pivoted_gdp_df['Year']<=2021)]

In [16]:
pivoted_gdp_df['Country'] = pivoted_gdp_df['Country'].replace('Russian Federation', 'Russia')
pivoted_gdp_df['Country'] = pivoted_gdp_df['Country'].replace('Egypt, Arab Rep.', 'Egypt')
pivoted_gdp_df['Country'] = pivoted_gdp_df['Country'].replace('Ethiopia(excludes Eritrea)', 'Ethiopia')
pivoted_gdp_df['Country'] = pivoted_gdp_df['Country'].replace('Gambia, The', 'Gambia')
pivoted_gdp_df['Country'] = pivoted_gdp_df['Country'].replace('Hong Kong, China', 'Hong Kong')
pivoted_gdp_df['Country'] = pivoted_gdp_df['Country'].replace('Iran, Islamic Rep.', 'Iran')
pivoted_gdp_df['Country'] = pivoted_gdp_df['Country'].replace('Korea, Rep.', 'South Korea')
pivoted_gdp_df['Country'] = pivoted_gdp_df['Country'].replace('Serbia, FR(Serbia/Montenegro)', 'Serbia')


In [18]:
pivoted_gdp_df[pivoted_gdp_df['Country']=='South Korea']

Unnamed: 0,Country,Year,GDP per capita
675,South Korea,2013,27182.73431
868,South Korea,2014,29249.575221
1061,South Korea,2015,28732.231076
1254,South Korea,2016,29288.870439
1447,South Korea,2017,31616.8434
1640,South Korea,2018,33436.923065
1833,South Korea,2019,31902.416905
2026,South Korea,2020,31721.298018
2219,South Korea,2021,34997.781643


In [19]:
pivoted_gdp_df.to_csv('GDP_per_capita.csv',index=False)