# 10/16/2024

In [1]:
import pandas as pd
import plotly.express as px

import warnings
warnings.filterwarnings('ignore')

pd.set_option("display.max_rows", None)

df = pd.read_csv("https://raw.githubusercontent.com/wcj365/python-stats-dataviz/refs/heads/master/fall2024/data/World_Development_Indicators_(WDI).csv")

print(df.shape)
df.sample(3)

(4123, 9)


Unnamed: 0,Year,Country,GDP per capita (current US$),"Life expectancy at birth, total (years)","Population, total",Country Code,Region,Income Group,Lending Type
2375,2004,Mauritania,801.778683,61.593,2946575.0,MRT,Sub-Saharan Africa,Lower middle income,IDA
811,2017,Colombia,6449.970987,76.646,48351671.0,COL,Latin America & Caribbean,Upper middle income,IBRD
24,2009,Albania,4114.134033,77.781,2927519.0,ALB,Europe & Central Asia,Upper middle income,IBRD


In [2]:
column_names_dict = {
    "GDP per capita (current US$)" : "GDP per Capita",
    "Life expectancy at birth, total (years)" : "Life Expectancy",
    "Population, total" : "Population"
}

df = df.rename(columns=column_names_dict)
df.sample(3)

Unnamed: 0,Year,Country,GDP per Capita,Life Expectancy,Population,Country Code,Region,Income Group,Lending Type
2919,2016,Paraguay,5759.042198,73.534,6266615.0,PRY,Latin America & Caribbean,Upper middle income,IBRD
3799,2022,Turkmenistan,8792.548504,,6430770.0,TKM,Europe & Central Asia,Upper middle income,IBRD
3123,2011,San Marino,55815.285261,,32495.0,SMR,Europe & Central Asia,High income,Not classified


In [3]:
df.sample(3).T

Unnamed: 0,2739,868,395
Year,2007,2017,2019
Country,Nigeria,"Congo, Rep.",Benin
GDP per Capita,1876.413033,2227.720139,1170.885995
Life Expectancy,50.033,63.76,60.454
Population,148294028.0,5312340.0,12290444.0
Country Code,NGA,COG,BEN
Region,Sub-Saharan Africa,Sub-Saharan Africa,Sub-Saharan Africa
Income Group,Lower middle income,Lower middle income,Lower middle income
Lending Type,Blend,Blend,IDA


## Section One

Summary Statistics with describe() function

In [4]:
df[["Population","Life Expectancy"]].describe()

Unnamed: 0,Population,Life Expectancy
count,4123.0,3777.0
mean,33195750.0,71.060853
std,131643000.0,8.499806
min,9791.0,42.125
25%,743620.0,65.351
50%,5872624.0,72.765
75%,21484940.0,77.529
max,1417173000.0,85.497561


In [5]:
df.describe()

Unnamed: 0,Year,GDP per Capita,Life Expectancy,Population
count,4123.0,3962.0,3777.0,4123.0
mean,2013.0,17007.692848,71.060853,33195750.0
std,5.47789,25733.109164,8.499806,131643000.0
min,2004.0,128.538423,42.125,9791.0
25%,2008.0,1862.8924,65.351,743620.0
50%,2013.0,6048.304202,72.765,5872624.0
75%,2018.0,22137.309568,77.529,21484940.0
max,2022.0,240862.182448,85.497561,1417173000.0


In [6]:
df.describe(include="object")

Unnamed: 0,Country,Country Code,Region,Income Group,Lending Type
count,4123,4123,4123,4123,4123
unique,217,217,7,5,4
top,Afghanistan,AFG,Europe & Central Asia,High income,Not classified
freq,19,19,1102,1558,1387


In [7]:
df.describe(include="object").T

Unnamed: 0,count,unique,top,freq
Country,4123,217,Afghanistan,19
Country Code,4123,217,AFG,19
Region,4123,7,Europe & Central Asia,1102
Income Group,4123,5,High income,1558
Lending Type,4123,4,Not classified,1387


## Section Two

Compute quartiles and transform a numerical column into a categorical column based on the quartiles.

In [8]:
def assign_quartile(gdp):
    if gdp <= 1862.89:
        quartile = "1st Quartile (<=25%)"
    elif gdp > 1862.89 and gdp <= 6048.30:
        quartile = "2nd Quartile (25-50%)"
    elif gdp > 6048.30 and gdp <= 22137.31:
        quartile = "3rd Quartile (50-75%)"
    else:
        quartile = "4th Quartile (>75%)"
    return quartile


hungary = assign_quartile(14294.25)
hungary

'3rd Quartile (50-75%)'

In [9]:
df_usa = df[df["Country Code"] == "USA"]
df_usa

Unnamed: 0,Year,Country,GDP per Capita,Life Expectancy,Population,Country Code,Region,Income Group,Lending Type
3914,2004,United States,41724.631629,77.487805,292805298.0,USA,North America,High income,Not classified
3915,2005,United States,44123.407068,77.487805,295516599.0,USA,North America,High income,Not classified
3916,2006,United States,46302.00088,77.687805,298379912.0,USA,North America,High income,Not classified
3917,2007,United States,48050.223777,77.987805,301231207.0,USA,North America,High income,Not classified
3918,2008,United States,48570.04598,78.039024,304093966.0,USA,North America,High income,Not classified
3919,2009,United States,47194.943355,78.390244,306771529.0,USA,North America,High income,Not classified
3920,2010,United States,48650.643128,78.541463,309327143.0,USA,North America,High income,Not classified
3921,2011,United States,50065.966504,78.641463,311583481.0,USA,North America,High income,Not classified
3922,2012,United States,51784.418574,78.741463,313877662.0,USA,North America,High income,Not classified
3923,2013,United States,53291.127689,78.741463,316059947.0,USA,North America,High income,Not classified


In [10]:
assign_quartile(76329.58)

'4th Quartile (>75%)'

In [11]:
df_2020 = df[df["Year"] == 2020]
print(df_2020.shape[0])
print(f"There are {df_2020.shape[0]} countries in the 2020 dataset")
df_2020.sample(3)

217
There are 217 countries in the 2020 dataset


Unnamed: 0,Year,Country,GDP per Capita,Life Expectancy,Population,Country Code,Region,Income Group,Lending Type
3664,2020,Thailand,7001.78546,79.274,71475664.0,THA,East Asia & Pacific,Upper middle income,IBRD
4101,2020,Zambia,956.831729,62.38,18927715.0,ZMB,Sub-Saharan Africa,Lower middle income,IDA
928,2020,Croatia,14269.908855,77.72439,4047680.0,HRV,Europe & Central Asia,High income,IBRD


In [12]:
# Tuple
df_2020.shape

(217, 9)

## In-class Exercise

Create a new column call "GDP Quartile" in the df_2020 dataframe and assign each country the quartile it belongs to by applying the assign_quartile() function.


In [13]:
df_2020["GDP Quartile"] = df_2020["GDP per Capita"].apply(assign_quartile)

print(df_2020.shape[0])
print(df_2020["GDP Quartile"].value_counts())

217
GDP Quartile
4th Quartile (>75%)      61
2nd Quartile (25-50%)    58
3rd Quartile (50-75%)    54
1st Quartile (<=25%)     44
Name: count, dtype: int64


In [14]:
df_2020.sample(10)

Unnamed: 0,Year,Country,GDP per Capita,Life Expectancy,Population,Country Code,Region,Income Group,Lending Type,GDP Quartile
1460,2020,Greece,17617.291506,81.287805,10698599.0,GRC,Europe & Central Asia,High income,Not classified,3rd Quartile (50-75%)
852,2020,"Congo, Dem. Rep.",524.666686,59.739,92853164.0,COD,Sub-Saharan Africa,Low income,IDA,1st Quartile (<=25%)
396,2020,Benin,1240.733155,60.088,12643123.0,BEN,Sub-Saharan Africa,Lower middle income,IDA,1st Quartile (<=25%)
4025,2020,Viet Nam,3586.347176,75.378,96648685.0,VNM,East Asia & Pacific,Lower middle income,IBRD,2nd Quartile (25-50%)
1099,2020,Ecuador,5645.19929,72.153,17588595.0,ECU,Latin America & Caribbean,Upper middle income,IBRD,2nd Quartile (25-50%)
149,2020,Argentina,8500.837939,75.892,45376763.0,ARG,Latin America & Caribbean,Upper middle income,IBRD,3rd Quartile (50-75%)
434,2020,Bhutan,3181.339747,71.609,772506.0,BTN,South Asia,Lower middle income,IDA,2nd Quartile (25-50%)
35,2020,Albania,5343.037704,76.989,2837849.0,ALB,Europe & Central Asia,Upper middle income,IBRD,2nd Quartile (25-50%)
2600,2020,Namibia,4252.04172,62.829,2489098.0,NAM,Sub-Saharan Africa,Upper middle income,IBRD,2nd Quartile (25-50%)
2619,2020,Nauru,10124.700622,63.437,12315.0,NRU,East Asia & Pacific,High income,IBRD,3rd Quartile (50-75%)


In [15]:
df_4th = df_2020[df_2020["GDP Quartile"] == "4th Quartile (>75%)"]
df_4th.shape

(61, 10)

In [16]:
", ".join(list(df_4th["Country"]))

"Andorra, Aruba, Australia, Austria, Bahamas, The, Bahrain, Belgium, Bermuda, British Virgin Islands, Brunei Darussalam, Canada, Cayman Islands, Channel Islands, Cyprus, Czechia, Denmark, Eritrea, Estonia, Faroe Islands, Finland, France, Germany, Gibraltar, Greenland, Guam, Hong Kong SAR, China, Iceland, Ireland, Isle of Man, Israel, Italy, Japan, Korea, Dem. People's Rep., Korea, Rep., Kuwait, Liechtenstein, Luxembourg, Macao SAR, China, Malta, Monaco, Netherlands, New Caledonia, New Zealand, Norway, Portugal, Puerto Rico, Qatar, San Marino, Singapore, Sint Maarten (Dutch part), Slovenia, South Sudan, Spain, St. Martin (French part), Sweden, Switzerland, United Arab Emirates, United Kingdom, United States, Venezuela, RB, Virgin Islands (U.S.)"

In [17]:


quartile_groups = df_2020.groupby("GDP Quartile")["Country"].apply(list)


for quartile, countries in quartile_groups.items():
  print(f"{quartile}:")
  for country in countries:
      print(f"  - {country}")
  print("\n")

1st Quartile (<=25%):
  - Afghanistan
  - Angola
  - Benin
  - Burkina Faso
  - Burundi
  - Cambodia
  - Cameroon
  - Central African Republic
  - Chad
  - Comoros
  - Congo, Dem. Rep.
  - Ethiopia
  - Gambia, The
  - Guinea
  - Guinea-Bissau
  - Haiti
  - Kiribati
  - Kyrgyz Republic
  - Lesotho
  - Liberia
  - Madagascar
  - Malawi
  - Mali
  - Mauritania
  - Mozambique
  - Myanmar
  - Nepal
  - Niger
  - Pakistan
  - Rwanda
  - Senegal
  - Sierra Leone
  - Somalia
  - Sudan
  - Syrian Arab Republic
  - Tajikistan
  - Tanzania
  - Timor-Leste
  - Togo
  - Uganda
  - Uzbekistan
  - Yemen, Rep.
  - Zambia
  - Zimbabwe


2nd Quartile (25-50%):
  - Albania
  - Algeria
  - Armenia
  - Azerbaijan
  - Bangladesh
  - Belize
  - Bhutan
  - Bolivia
  - Botswana
  - Cabo Verde
  - Colombia
  - Congo, Rep.
  - Cote d'Ivoire
  - Djibouti
  - Ecuador
  - Egypt, Arab Rep.
  - El Salvador
  - Eswatini
  - Fiji
  - Georgia
  - Ghana
  - Guatemala
  - Honduras
  - India
  - Indonesia
  - Iran, Islamic

In [18]:
for quartile, countries in quartile_groups.items():
  print(f"{quartile}:")
  for country in countries:
      print(f"  - {country}")
  print("\n")

1st Quartile (<=25%):
  - Afghanistan
  - Angola
  - Benin
  - Burkina Faso
  - Burundi
  - Cambodia
  - Cameroon
  - Central African Republic
  - Chad
  - Comoros
  - Congo, Dem. Rep.
  - Ethiopia
  - Gambia, The
  - Guinea
  - Guinea-Bissau
  - Haiti
  - Kiribati
  - Kyrgyz Republic
  - Lesotho
  - Liberia
  - Madagascar
  - Malawi
  - Mali
  - Mauritania
  - Mozambique
  - Myanmar
  - Nepal
  - Niger
  - Pakistan
  - Rwanda
  - Senegal
  - Sierra Leone
  - Somalia
  - Sudan
  - Syrian Arab Republic
  - Tajikistan
  - Tanzania
  - Timor-Leste
  - Togo
  - Uganda
  - Uzbekistan
  - Yemen, Rep.
  - Zambia
  - Zimbabwe


2nd Quartile (25-50%):
  - Albania
  - Algeria
  - Armenia
  - Azerbaijan
  - Bangladesh
  - Belize
  - Bhutan
  - Bolivia
  - Botswana
  - Cabo Verde
  - Colombia
  - Congo, Rep.
  - Cote d'Ivoire
  - Djibouti
  - Ecuador
  - Egypt, Arab Rep.
  - El Salvador
  - Eswatini
  - Fiji
  - Georgia
  - Ghana
  - Guatemala
  - Honduras
  - India
  - Indonesia
  - Iran, Islamic

In [19]:
df_2020 = df_2020.dropna()
df_2020["GDP (Rounded)"] = df_2020["GDP per Capita"].apply(round)
df_2020.sample(5)

Unnamed: 0,Year,Country,GDP per Capita,Life Expectancy,Population,Country Code,Region,Income Group,Lending Type,GDP Quartile,GDP (Rounded)
2505,2020,Mongolia,4041.174146,72.141,3294335.0,MNG,East Asia & Pacific,Lower middle income,IBRD,2nd Quartile (25-50%),4041
2125,2020,Lesotho,917.356381,54.693,2254100.0,LSO,Sub-Saharan Africa,Lower middle income,IDA,1st Quartile (<=25%),917
985,2020,Cyprus,28281.425781,81.391,1237537.0,CYP,Europe & Central Asia,High income,Not classified,4th Quartile (>75%),28281
1365,2020,"Gambia, The",704.030463,62.612,2573995.0,GMB,Sub-Saharan Africa,Low income,IDA,1st Quartile (<=25%),704
263,2020,"Bahamas, The",23998.268019,72.677,406471.0,BHS,Latin America & Caribbean,High income,Not classified,4th Quartile (>75%),23998


In [20]:
def pop_million(pop):
    return pop / 1000000


df_2020["Population (Million)"] = df_2020["Population"].apply(pop_million)

df_2020.sample(5)

Unnamed: 0,Year,Country,GDP per Capita,Life Expectancy,Population,Country Code,Region,Income Group,Lending Type,GDP Quartile,GDP (Rounded),Population (Million)
3436,2020,Sri Lanka,3852.389091,76.393,21919000.0,LKA,South Asia,Lower middle income,IDA,2nd Quartile (25-50%),3852,21.919
1042,2020,Djibouti,2921.738706,62.694,1090156.0,DJI,Middle East & North Africa,Lower middle income,IDA,2nd Quartile (25-50%),2922,1.090156
3607,2020,Syrian Arab Republic,537.090235,72.14,20772595.0,SYR,Middle East & North Africa,Low income,IDA,1st Quartile (<=25%),537,20.772595
1232,2020,Ethiopia,918.652594,65.371,117190911.0,ETH,Sub-Saharan Africa,Low income,IDA,1st Quartile (<=25%),919,117.190911
814,2020,Colombia,5304.289129,74.769,50930662.0,COL,Latin America & Caribbean,Upper middle income,IBRD,2nd Quartile (25-50%),5304,50.930662


In [21]:
df_2020["Population2 (Million)"] = df_2020["Population"].apply(lambda pop : pop / 1000000)
df_2020.sample(5)

Unnamed: 0,Year,Country,GDP per Capita,Life Expectancy,Population,Country Code,Region,Income Group,Lending Type,GDP Quartile,GDP (Rounded),Population (Million),Population2 (Million)
947,2020,Cuba,9499.572504,77.567,11300698.0,CUB,Latin America & Caribbean,Upper middle income,Not classified,3rd Quartile (50-75%),9500,11.300698,11.300698
1935,2020,Kenya,1936.250755,62.675,51985780.0,KEN,Sub-Saharan Africa,Lower middle income,Blend,2nd Quartile (25-50%),1936,51.98578,51.98578
54,2020,Algeria,3354.153164,74.453,43451666.0,DZA,Middle East & North Africa,Lower middle income,IBRD,2nd Quartile (25-50%),3354,43.451666,43.451666
225,2020,Austria,48789.49785,81.192683,8916864.0,AUT,Europe & Central Asia,High income,Not classified,4th Quartile (>75%),48789,8.916864,8.916864
3379,2020,South Africa,5753.066494,65.252,58801927.0,ZAF,Sub-Saharan Africa,Upper middle income,IBRD,2nd Quartile (25-50%),5753,58.801927,58.801927


## Homework Question 1

Save the summary statistics of all numerical columns and all categorical columns to only one Excel spreadsheet file with two worksheets, one for numerical and one for categorical columns.

In [22]:
import pandas as pd


df = pd.read_csv("https://raw.githubusercontent.com/wcj365/python-stats-dataviz/refs/heads/master/fall2024/data/World_Development_Indicators_(WDI).csv")  # or use another method to load data

# Separate numerical and categorical columns
numerical_columns = df.select_dtypes(include=['number']).columns
categorical_columns = df.select_dtypes(include=['object', 'category']).columns

# Summary for numerical columns
numerical_summary = df[numerical_columns].describe()
print("Numerical Summary:")
print(numerical_summary)

# Summary for categorical columns (unique counts and frequencies)
categorical_summary = df[categorical_columns].apply(lambda x: x.value_counts()).T
print("Categorical Summary:")
print(categorical_summary)

# Saving the data into two excel files
with pd.ExcelWriter('summary_statistics.xlsx') as writer:
    numerical_summary.to_excel(writer, sheet_name='Numerical')
    categorical_summary.to_excel(writer, sheet_name='Categorical')






Numerical Summary:
             Year  GDP per capita (current US$)  \
count  4123.00000                   3962.000000   
mean   2013.00000                  17007.692848   
std       5.47789                  25733.109164   
min    2004.00000                    128.538423   
25%    2008.00000                   1862.892400   
50%    2013.00000                   6048.304202   
75%    2018.00000                  22137.309568   
max    2022.00000                 240862.182448   

       Life expectancy at birth, total (years)  Population, total  
count                              3777.000000       4.123000e+03  
mean                                 71.060853       3.319575e+07  
std                                   8.499806       1.316430e+08  
min                                  42.125000       9.791000e+03  
25%                                  65.351000       7.436200e+05  
50%                                  72.765000       5.872624e+06  
75%                                  77.52900

In [23]:
# for df_2020

numerical_summary = df_2020.describe()
print("Numerical Summary Statistics:")
print(numerical_summary)

categorical_summary = df_2020.describe(include="object").T
print("Categorical Summary Statistics:")
print(categorical_summary)


# Saving the data into two excel files
with pd.ExcelWriter('summary_statistics.xlsx') as writer:
    numerical_summary.to_excel(writer, sheet_name='Numerical')
    categorical_summary.to_excel(writer, sheet_name='Categorical')

Numerical Summary Statistics:
         Year  GDP per Capita  Life Expectancy    Population  GDP (Rounded)  \
count   202.0      202.000000       202.000000  2.020000e+02     202.000000   
mean   2020.0    16198.267524        72.314599  3.825505e+07   16198.277228   
std       0.0    23391.117592         7.451962  1.446623e+08   23391.118369   
min    2020.0      216.827417        52.777000  1.106900e+04     217.000000   
25%    2020.0     2188.047693        66.779750  1.307377e+06    2188.250000   
50%    2020.0     5920.260419        72.871500  6.916570e+06    5920.000000   
75%    2020.0    20761.211199        77.981750  2.673161e+07   20761.000000   
max    2020.0   165287.186767        85.497561  1.411100e+09  165287.000000   

       Population (Million)  Population2 (Million)  
count            202.000000             202.000000  
mean              38.255054              38.255054  
std              144.662308             144.662308  
min                0.011069               0.01

## Homework Question 2

Create a new column that is the GDP per Capita rounded to 2 decimal points by using the Python's build function "round"
Hint: apply the round function/method and provide a parameter 2.

In [24]:
# Apply the round function to GDP per Capita and create a new column
df['GDP per Capita (rounded)'] = df['GDP per capita (current US$)'].apply(lambda x: round(x, 2))

# Display the first few rows to verify the new column
print(df[['GDP per capita (current US$)', 'GDP per Capita (rounded)']].head())




   GDP per capita (current US$)  GDP per Capita (rounded)
0                    221.830531                    221.83
1                    254.115274                    254.12
2                    274.015394                    274.02
3                    376.318296                    376.32
4                    382.533804                    382.53


In [25]:
df_2020["GDP per Capita (Rounded)"] = df_2020["GDP per Capita"].apply(lambda x: round(x, 2))

df_2020.sample(5)

Unnamed: 0,Year,Country,GDP per Capita,Life Expectancy,Population,Country Code,Region,Income Group,Lending Type,GDP Quartile,GDP (Rounded),Population (Million),Population2 (Million),GDP per Capita (Rounded)
2049,2020,Kyrgyz Republic,1256.929226,71.8,6579900.0,KGZ,Europe & Central Asia,Lower middle income,IDA,1st Quartile (<=25%),1257,6.5799,6.5799,1256.93
2315,2020,Maldives,7216.816371,79.875,514438.0,MDV,South Asia,Upper middle income,IDA,3rd Quartile (50-75%),7217,0.514438,0.514438,7216.82
3569,2020,Sweden,52837.903978,82.356098,10353442.0,SWE,Europe & Central Asia,High income,Not classified,4th Quartile (>75%),52838,10.353442,10.353442,52837.9
909,2020,Cote d'Ivoire,2349.069882,59.032,26811790.0,CIV,Sub-Saharan Africa,Lower middle income,IDA,2nd Quartile (25-50%),2349,26.81179,26.81179,2349.07
681,2020,Canada,43349.677856,81.670488,38007166.0,CAN,North America,High income,Not classified,4th Quartile (>75%),43350,38.007166,38.007166,43349.68


## Homework Question 3 (bonus, will cover next week)

step 1 - create a column "Life Expectancy Quartile" similar to the GDP per Capita Quartile above.
Step 2 - Create a column "health and wealth status" based on the following definition
- rich and healthy (1st GDP per capita quartile and 1st life expentancy quartile)
- poor and unhealthy ((4th GDP per capita quartile and 4th life expentancy quartile)
- rich and unhealthy (1st GDP per capita quartile and 4th life expentancy quartile)
- poor and healthy ((4th GDP per capita quartile and 1st life expentancy quartile)
- Other

In [26]:
# Step 1: Create the "GDP per Capita Quartile" and "Life Expectancy Quartile"

# Dropping NA values from the relevant columns for clean quartile calculation

df = df.dropna(subset=['GDP per capita (current US$)', 'Life expectancy at birth, total (years)'])

# Create GDP per Capita Quartile
df['GDP per Capita Quartile'] = pd.qcut(df['GDP per capita (current US$)'], 4, labels=[1, 2, 3, 4])

# Create Life Expectancy Quartile
df['Life Expectancy Quartile'] = pd.qcut(df['Life expectancy at birth, total (years)'], 4, labels=[1, 2, 3, 4])

# Step 2: Create the "health and wealth status" column based on the given definition
def health_wealth_status(row):
    if row['GDP per Capita Quartile'] == 1 and row['Life Expectancy Quartile'] == 1:
        return 'rich and healthy'
    elif row['GDP per Capita Quartile'] == 4 and row['Life Expectancy Quartile'] == 4:
        return 'poor and unhealthy'
    elif row['GDP per Capita Quartile'] == 1 and row['Life Expectancy Quartile'] == 4:
        return 'rich and unhealthy'
    elif row['GDP per Capita Quartile'] == 4 and row['Life Expectancy Quartile'] == 1:
        return 'poor and healthy'
    else:
        return 'Other'

# Apply the function to each row to create the new column
df['health and wealth status'] = df.apply(health_wealth_status, axis=1)

# Display the first few rows to verify the results
print(df[['GDP per capita (current US$)', 'Life expectancy at birth, total (years)',
          'GDP per Capita Quartile', 'Life Expectancy Quartile', 'health and wealth status']].head())



   GDP per capita (current US$)  Life expectancy at birth, total (years)  \
0                    221.830531                                   57.944   
1                    254.115274                                   58.361   
2                    274.015394                                   58.684   
3                    376.318296                                   59.111   
4                    382.533804                                   59.852   

  GDP per Capita Quartile Life Expectancy Quartile health and wealth status  
0                       1                        1         rich and healthy  
1                       1                        1         rich and healthy  
2                       1                        1         rich and healthy  
3                       1                        1         rich and healthy  
4                       1                        1         rich and healthy  


In [27]:
df.sample(55)

Unnamed: 0,Year,Country,GDP per capita (current US$),"Life expectancy at birth, total (years)","Population, total",Country Code,Region,Income Group,Lending Type,GDP per Capita (rounded),GDP per Capita Quartile,Life Expectancy Quartile,health and wealth status
2516,2012,Montenegro,6586.399703,76.2,620601.0,MNE,Europe & Central Asia,Upper middle income,IBRD,6586.4,3,3,Other
800,2006,Colombia,3782.603496,73.468,42772910.0,COL,Latin America & Caribbean,Upper middle income,IBRD,3782.6,2,3,Other
1781,2018,Ireland,79446.939109,82.204878,4867316.0,IRL,Europe & Central Asia,High income,Not classified,79446.94,4,4,poor and unhealthy
1161,2006,Eritrea,420.52874,61.611,2880093.0,ERI,Sub-Saharan Africa,Low income,IDA,420.53,1,1,rich and healthy
2231,2012,"Macao SAR, China",74111.601186,83.326,582766.0,MAC,East Asia & Pacific,High income,Not classified,74111.6,4,4,poor and unhealthy
3420,2004,Sri Lanka,1060.136943,67.091,19490431.0,LKA,South Asia,Lower middle income,IDA,1060.14,1,2,Other
1511,2014,Guam,33483.941436,76.749,167543.0,GUM,East Asia & Pacific,High income,Not classified,33483.94,4,3,Other
1729,2004,"Iran, Islamic Rep.",2751.793042,71.37,69061674.0,IRN,Middle East & North Africa,Lower middle income,IBRD,2751.79,2,2,Other
5,2009,Afghanistan,453.387382,60.364,27385307.0,AFG,South Asia,Low income,IDA,453.39,1,1,rich and healthy
454,2021,Bolivia,3345.023002,63.63,12079472.0,BOL,Latin America & Caribbean,Lower middle income,IBRD,3345.02,2,1,Other


In [28]:
df_2020.sample(5)

Unnamed: 0,Year,Country,GDP per Capita,Life Expectancy,Population,Country Code,Region,Income Group,Lending Type,GDP Quartile,GDP (Rounded),Population (Million),Population2 (Million),GDP per Capita (Rounded)
3227,2020,Seychelles,12020.021064,77.236585,98462.0,SYC,Sub-Saharan Africa,High income,IBRD,3rd Quartile (50-75%),12020,0.098462,0.098462,12020.02
2011,2020,Kosovo,4310.934002,76.567,1790133.0,XKX,Europe & Central Asia,Upper middle income,IDA,2nd Quartile (25-50%),4311,1.790133,1.790133,4310.93
681,2020,Canada,43349.677856,81.670488,38007166.0,CAN,North America,High income,Not classified,4th Quartile (>75%),43350,38.007166,38.007166,43349.68
1194,2020,Estonia,23595.243684,78.595122,1329522.0,EST,Europe & Central Asia,High income,Not classified,4th Quartile (>75%),23595,1.329522,1.329522,23595.24
2030,2020,Kuwait,24297.710802,76.92,4360444.0,KWT,Middle East & North Africa,High income,Not classified,4th Quartile (>75%),24298,4.360444,4.360444,24297.71
