# Import data from world bank

In [56]:
import wbdata
import pandas as pd

In [57]:
wbdata.get_countries()

id    name
----  --------------------------------------------------------------------------------
ABW   Aruba
AFE   Africa Eastern and Southern
AFG   Afghanistan
AFR   Africa
AFW   Africa Western and Central
AGO   Angola
ALB   Albania
AND   Andorra
ARB   Arab World
ARE   United Arab Emirates
ARG   Argentina
ARM   Armenia
ASM   American Samoa
ATG   Antigua and Barbuda
AUS   Australia
AUT   Austria
AZE   Azerbaijan
BDI   Burundi
BEA   East Asia & Pacific (IBRD-only countries)
BEC   Europe & Central Asia (IBRD-only countries)
BEL   Belgium
BEN   Benin
BFA   Burkina Faso
BGD   Bangladesh
BGR   Bulgaria
BHI   IBRD countries classified as high income
BHR   Bahrain
BHS   Bahamas, The
BIH   Bosnia and Herzegovina
BLA   Latin America & the Caribbean (IBRD-only countries)
BLR   Belarus
BLZ   Belize
BMN   Middle East & North Africa (IBRD-only countries)
BMU   Bermuda
BOL   Bolivia
BRA   Brazil
BRB   Barbados
BRN   Brunei Darussalam
BSS   Sub-Saharan Africa (IBRD-only countries)
BTN   Bhutan
BWA  

In [58]:
wbdata.get_sources()

  id  name
----  --------------------------------------------------------------------
   1  Doing Business
   2  World Development Indicators
   3  Worldwide Governance Indicators
   5  Subnational Malnutrition Database
   6  International Debt Statistics
  11  Africa Development Indicators
  12  Education Statistics
  13  Enterprise Surveys
  14  Gender Statistics
  15  Global Economic Monitor
  16  Health Nutrition and Population Statistics
  18  IDA Results Measurement System
  19  Millennium Development Goals
  20  Quarterly Public Sector Debt
  22  Quarterly External Debt Statistics SDDS
  23  Quarterly External Debt Statistics GDDS
  25  Jobs
  27  Global Economic Prospects
  28  Global Financial Inclusion
  29  The Atlas of Social Protection: Indicators of Resilience and Equity
  30  Exporter Dynamics Database – Indicators at Country-Year Level
  31  Country Policy and Institutional Assessment
  32  Global Financial Development
  33  G20 Financial Inclusion Indicators
  34  Glob

In [59]:
wbdata.get_indicators(source=2)

id                          name
--------------------------  ---------------------------------------------------------------------------------------------------------------------------------------------
AG.CON.FERT.PT.ZS           Fertilizer consumption (% of fertilizer production)
AG.CON.FERT.ZS              Fertilizer consumption (kilograms per hectare of arable land)
AG.LND.AGRI.K2              Agricultural land (sq. km)
AG.LND.AGRI.ZS              Agricultural land (% of land area)
AG.LND.ARBL.HA              Arable land (hectares)
AG.LND.ARBL.HA.PC           Arable land (hectares per person)
AG.LND.ARBL.ZS              Arable land (% of land area)
AG.LND.CREL.HA              Land under cereal production (hectares)
AG.LND.CROP.ZS              Permanent cropland (% of land area)
AG.LND.EL5M.RU.K2           Rural land area where elevation is below 5 meters (sq. km)
AG.LND.EL5M.RU.ZS           Rural land area where elevation is below 5 meters (% of total land area)
AG.LND.EL5M.UR.K2  

In [100]:
import wbdata
import pandas as pd

# Define the indicators for population, GDP per capita, current account balance, GDP growth, and inflation rate
indicators = {
    'SP.POP.TOTL': 'Population', 
    'NY.GDP.PCAP.CD': 'GDP per Capita', 
    'BN.CAB.XOKA.CD': 'Current Account Balance',
    'NY.GDP.MKTP.KD.ZG': 'GDP Growth',
    'FP.CPI.TOTL.ZG': 'Inflation Rate',
# add overall GDP
    'NY.GDP.MKTP.CD': 'GDP',
    'DT.DOD.DECT.CD': 'External Debt'
    }

# Define countries
countries = ['PK', 'IN', 'BD', 'LK', 'AF']

# Fetch data for the defined period
data = wbdata.get_dataframe(indicators, country=countries)

# Reset index for a better structure
data.reset_index(inplace=True)

# Rename columns for clarity
data.rename(columns={'country': 'Country', 'date': 'Year'}, inplace=True)

# Ensure Year column is numeric
data['Year'] = pd.to_numeric(data['Year'])

# Filter data between 2010 and 2023
data = data[(data['Year'] >= 2000) & (data['Year'] <= 2023)]
# data.to_excel('data.xlsx')

# Display the first few rows of the dataset
print(data.head())

       Country  Year  Population  GDP per Capita  Current Account Balance  \
0  Afghanistan  2023  42239854.0             NaN                      NaN   
1  Afghanistan  2022  41128771.0      352.603733                      NaN   
2  Afghanistan  2021  40099462.0      355.777826                      NaN   
3  Afghanistan  2020  38972230.0      512.055098            -3.136733e+09   
4  Afghanistan  2019  37769499.0      497.741431            -3.791935e+09   

   GDP Growth  Inflation Rate           GDP  External Debt  
0         NaN             NaN           NaN            NaN  
1   -6.240172             NaN  1.450216e+10   3.393247e+09  
2  -20.738839             NaN  1.426650e+10   3.555784e+09  
3   -2.351101             NaN  1.995593e+10   3.040072e+09  
4    3.911603        2.302373  1.879944e+10   2.661686e+09  


In [92]:
# Show only data for Pakistan
data_pakistan =data[data['Country'] == 'Pakistan']
print(data_pakistan)



      Country  Year   Population  GDP per Capita  Current Account Balance  \
256  Pakistan  2023  240485658.0     1407.021351            -3.500440e+08   
257  Pakistan  2022  235824862.0     1589.263980            -1.221611e+10   
258  Pakistan  2021  231402117.0     1506.108293            -1.228311e+10   
259  Pakistan  2020  227196741.0     1322.314785            -6.508740e+08   
260  Pakistan  2019  223293280.0     1437.165833            -8.557928e+09   
261  Pakistan  2018  219731479.0     1620.742591            -1.885899e+10   
262  Pakistan  2017  216379655.0     1567.640612            -1.617962e+10   
263  Pakistan  2016  213524840.0     1468.822082            -7.190898e+09   
264  Pakistan  2015  210969298.0     1421.835278            -2.803000e+09   
265  Pakistan  2014  208251628.0     1303.185370            -3.658000e+09   
266  Pakistan  2013  205337562.0     1259.668368            -4.416000e+09   
267  Pakistan  2012  202205861.0     1236.892763            -2.342000e+09   

In [72]:
import plotly.express as px
import matplotlib.pyplot as plt
fig = px.bar(data_pakistan, x='Year', y='Current Account Balance',
             title='Current Account Balance of Pakistan from 2010 to 2023')
# give title

fig.show()


In [73]:
import plotly.express as px
import matplotlib.pyplot as plt
fig = px.bar(data_pakistan, x='Year', y='GDP Growth',
             title='GDP Growth of Pakistan from 2010 to 2023')
# give title

fig.show()

In [74]:
import plotly.express as px
import matplotlib.pyplot as plt
fig = px.bar(data_pakistan, x='Year', y='Inflation Rate',
             title='Inflation Rate of Pakistan from 2010 to 2023')
# give title

fig.show()

In [94]:
import plotly.express as px
import matplotlib.pyplot as plt
fig = px.line(data_pakistan, x='Year', y='External Debt',
             title='External Debt of Pakistan from 2010 to 2023')
# give title

fig.show()

In [75]:
import wbdata
import pandas as pd

# Define the indicators for population, GDP per capita, current account balance, GDP growth, and inflation rate
# indicators = {
#     'AG.CON.FERT.PT.ZS': 'Fertilizer consumption (% of fertilizer production)', 
# }

# # Define countries
# countries = ['PK', 'IN', 'BD', 'LK', 'AF']

# Fetch data for the defined period
df = wbdata.get_data('AG.CON.FERT.PT.ZS',country=["PAK", "IND"], date=("2000", "2023"))
df=pd.DataFrame(df)
# Reset index for a better structure
df.reset_index(inplace=True)

# Rename columns for clarity
df.rename(columns={'country': 'Country', 'date': 'Year'}, inplace=True)

# Ensure Year column is numeric
df['Year'] = pd.to_numeric(df['Year'])
df.head()

Unnamed: 0,index,indicator,Country,countryiso3code,Year,value,unit,obs_status,decimal
0,0,"{'id': 'AG.CON.FERT.PT.ZS', 'value': 'Fertiliz...","{'id': 'IN', 'value': 'India'}",IND,2023,,,,1
1,1,"{'id': 'AG.CON.FERT.PT.ZS', 'value': 'Fertiliz...","{'id': 'IN', 'value': 'India'}",IND,2022,,,,1
2,2,"{'id': 'AG.CON.FERT.PT.ZS', 'value': 'Fertiliz...","{'id': 'IN', 'value': 'India'}",IND,2021,160.620281,,,1
3,3,"{'id': 'AG.CON.FERT.PT.ZS', 'value': 'Fertiliz...","{'id': 'IN', 'value': 'India'}",IND,2020,176.042247,,,1
4,4,"{'id': 'AG.CON.FERT.PT.ZS', 'value': 'Fertiliz...","{'id': 'IN', 'value': 'India'}",IND,2019,156.483317,,,1


# Import data from FAO STAT

In [None]:
#pip install faostat

In [78]:
import faostat

In [80]:
faostat.list_datasets()

[('code',
  'label',
  'date_update',
  'note_update',
  'release_current',
  'state_current',
  'year_current',
  'release_next',
  'state_next',
  'year_next'),
 ('QCL',
  'Crops and livestock products',
  '2024-10-07',
  'minor revision',
  '2023-12-23 / 2024-10-07',
  'final',
  '2022',
  '2024-12',
  'final',
  '2023'),
 ('QI',
  'Production Indices',
  '2024-03-13',
  '',
  '2024-03-13',
  'final',
  '2022',
  '2024-12',
  'final',
  '2023'),
 ('QV',
  'Value of Agricultural Production',
  '2024-03-13',
  '',
  '2024-03-13',
  'final',
  '2022',
  '2024-12',
  'final',
  '2023'),
 ('FS',
  'Suite of Food Security Indicators',
  '2024-07-25',
  'minor revision',
  '2024-07-24 / 2024-07-25',
  'final',
  '2023',
  '2025-07',
  'final',
  '2024'),
 ('FBS',
  'Food Balances (2010-)',
  '2024-07-19',
  '',
  '2024-07-19',
  'final',
  '2022',
  '2025-07',
  'final',
  '2023'),
 ('SCL',
  'Supply Utilization Accounts (2010-)',
  '2024-07-19',
  '',
  '2024-07-19',
  'final',
  '2022',


In [81]:
data = faostat.list_datasets()
data[1:4]

[('QCL',
  'Crops and livestock products',
  '2024-10-07',
  'minor revision',
  '2023-12-23 / 2024-10-07',
  'final',
  '2022',
  '2024-12',
  'final',
  '2023'),
 ('QI',
  'Production Indices',
  '2024-03-13',
  '',
  '2024-03-13',
  'final',
  '2022',
  '2024-12',
  'final',
  '2023'),
 ('QV',
  'Value of Agricultural Production',
  '2024-03-13',
  '',
  '2024-03-13',
  'final',
  '2022',
  '2024-12',
  'final',
  '2023')]

In [82]:
df = faostat.list_datasets_df()
# df.to_csv('datasets.csv', index=False)
df

Unnamed: 0,code,label,date_update,note_update,release_current,state_current,year_current,release_next,state_next,year_next
0,QCL,Crops and livestock products,2024-10-07,minor revision,2023-12-23 / 2024-10-07,final,2022,2024-12,final,2023
1,QI,Production Indices,2024-03-13,,2024-03-13,final,2022,2024-12,final,2023
2,QV,Value of Agricultural Production,2024-03-13,,2024-03-13,final,2022,2024-12,final,2023
3,FS,Suite of Food Security Indicators,2024-07-25,minor revision,2024-07-24 / 2024-07-25,final,2023,2025-07,final,2024
4,FBS,Food Balances (2010-),2024-07-19,,2024-07-19,final,2022,2025-07,final,2023
...,...,...,...,...,...,...,...,...,...,...
61,FA,Food Aid Shipments (WFP),2016-12-22,,2016-12-22,preliminary,2016,,,
62,RM,Machinery,2021-12-03,minor revision,2011-12-22 / 2021-12-03,final,2009,,,
63,RY,Machinery Archive,2021-12-03,minor revision,2006-12-31 / 2021-12-03,final,2005,,,
64,RA,Fertilizers archive,2020-09-08,minor revision,2013-12-31 / 2020-09-08,final,2002,,,


In [83]:
y = faostat.get_par_df('RM', 'specialgroups')
y

Unnamed: 0,label,code,aggregate_type
0,European Union (27) + (Total),5707,+
1,European Union (27) > (List),5707>,>
2,Least Developed Countries + (Total),5801,+
3,Least Developed Countries > (List),5801>,>
4,Land Locked Developing Countries + (Total),5802,+
5,Land Locked Developing Countries > (List),5802>,>
6,Small Island Developing States + (Total),5803,+
7,Small Island Developing States > (List),5803>,>
8,Low Income Food Deficit Countries + (Total),5815,+
9,Low Income Food Deficit Countries > (List),5815>,>


In [None]:
mypars = {'element':[2312, 2313],'item':'221'}
data = faostat.get_data('QCL', pars=mypars)
data[40:44]

# get data in a dataframe
df = faostat.get_data_df('QCL', pars=mypars)
# df.to_csv('crops.csv', index=False)

In [85]:
df.columns

Index(['Domain Code', 'Domain', 'Area Code', 'Area', 'Element Code', 'Element',
       'Item Code', 'Item', 'Year Code', 'Year', 'Unit', 'Value'],
      dtype='object')

In [87]:
df = df[['Domain','Area', 'Element','Item', 'Year', 'Unit', 'Value']]
# df.to_csv('crops.csv', index=False)

# Import data from Eurostate

In [None]:
# pip install eurostat

In [89]:
import eurostat

In [101]:
eurostat.get_toc(agency='all', dataset='all', lang='en')

[('title',
  'code',
  'type',
  'last update of data',
  'last table structure change',
  'data start',
  'data end'),
 ('Regional disparities in employment rates (NUTS level 2, NUTS level 3)',
  'LFST_R_LMDER',
  'dataset',
  '2024-07-02T23:00:00+0200',
  '2024-07-02T23:00:00+0200',
  '1999',
  '2023'),
 ('Regional disparities in gender employment gap (NUTS level 2)',
  'LFST_R_LMDGEG',
  'dataset',
  '2024-10-31T23:00:00+0100',
  '2024-10-31T23:00:00+0100',
  '1999',
  '2023'),
 ('Regional disparities in long-term unemployment rates (NUTS level 2)',
  'LFST_R_LMDLTU',
  'dataset',
  '2024-10-31T23:00:00+0100',
  '2024-10-31T23:00:00+0100',
  '1999',
  '2023'),
 ('Regional disparities in unemployment rates (NUTS level 2, NUTS level 3)',
  'LFST_R_LMDUR',
  'dataset',
  '2024-07-02T23:00:00+0200',
  '2024-07-02T23:00:00+0200',
  '1999',
  '2023'),
 ('Population by sex, age, migration status and degree of urbanisation',
  'LFST_R_PGAUM',
  'dataset',
  '2024-09-12T23:00:00+0200',
  '20

In [102]:
eurostat.get_data_df('GOV_10DD_SLGD') # General government debt

Unnamed: 0,freq,na_item,sector,maturity,unit,geo\TIME_PERIOD,2020,2021,2022,2023
0,A,F22,S1_S2,TOTAL,MIO_EUR,AT,,,0.0,0.0
1,A,F22,S1_S2,TOTAL,MIO_EUR,BE,,,0.0,0.0
2,A,F22,S1_S2,TOTAL,MIO_EUR,DE,0.0,0.0,0.0,0.0
3,A,F22,S1_S2,TOTAL,MIO_EUR,ES,,,0.0,0.0
4,A,F22,S1_S2,TOTAL,MIO_NAC,AT,,,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...
1606,A,GD,S1_S2,Y_LT1,PC_GDP,ES,,,0.2,0.2
1607,A,GD,S1_S2,Y_LT1,PC_TOT,AT,,,2.2,1.7
1608,A,GD,S1_S2,Y_LT1,PC_TOT,BE,,,6.8,5.9
1609,A,GD,S1_S2,Y_LT1,PC_TOT,DE,5.8,5.0,2.1,2.7
