# Import data from world bank

In [56]:
import wbdata
import pandas as pd

In [57]:
wbdata.get_countries()

id    name
----  --------------------------------------------------------------------------------
ABW   Aruba
AFE   Africa Eastern and Southern
AFG   Afghanistan
AFR   Africa
AFW   Africa Western and Central
AGO   Angola
ALB   Albania
AND   Andorra
ARB   Arab World
ARE   United Arab Emirates
ARG   Argentina
ARM   Armenia
ASM   American Samoa
ATG   Antigua and Barbuda
AUS   Australia
AUT   Austria
AZE   Azerbaijan
BDI   Burundi
BEA   East Asia & Pacific (IBRD-only countries)
BEC   Europe & Central Asia (IBRD-only countries)
BEL   Belgium
BEN   Benin
BFA   Burkina Faso
BGD   Bangladesh
BGR   Bulgaria
BHI   IBRD countries classified as high income
BHR   Bahrain
BHS   Bahamas, The
BIH   Bosnia and Herzegovina
BLA   Latin America & the Caribbean (IBRD-only countries)
BLR   Belarus
BLZ   Belize
BMN   Middle East & North Africa (IBRD-only countries)
BMU   Bermuda
BOL   Bolivia
BRA   Brazil
BRB   Barbados
BRN   Brunei Darussalam
BSS   Sub-Saharan Africa (IBRD-only countries)
BTN   Bhutan
BWA  

In [58]:
wbdata.get_sources()

  id  name
----  --------------------------------------------------------------------
   1  Doing Business
   2  World Development Indicators
   3  Worldwide Governance Indicators
   5  Subnational Malnutrition Database
   6  International Debt Statistics
  11  Africa Development Indicators
  12  Education Statistics
  13  Enterprise Surveys
  14  Gender Statistics
  15  Global Economic Monitor
  16  Health Nutrition and Population Statistics
  18  IDA Results Measurement System
  19  Millennium Development Goals
  20  Quarterly Public Sector Debt
  22  Quarterly External Debt Statistics SDDS
  23  Quarterly External Debt Statistics GDDS
  25  Jobs
  27  Global Economic Prospects
  28  Global Financial Inclusion
  29  The Atlas of Social Protection: Indicators of Resilience and Equity
  30  Exporter Dynamics Database – Indicators at Country-Year Level
  31  Country Policy and Institutional Assessment
  32  Global Financial Development
  33  G20 Financial Inclusion Indicators
  34  Glob

In [59]:
wbdata.get_indicators(source=2)

id                          name
--------------------------  ---------------------------------------------------------------------------------------------------------------------------------------------
AG.CON.FERT.PT.ZS           Fertilizer consumption (% of fertilizer production)
AG.CON.FERT.ZS              Fertilizer consumption (kilograms per hectare of arable land)
AG.LND.AGRI.K2              Agricultural land (sq. km)
AG.LND.AGRI.ZS              Agricultural land (% of land area)
AG.LND.ARBL.HA              Arable land (hectares)
AG.LND.ARBL.HA.PC           Arable land (hectares per person)
AG.LND.ARBL.ZS              Arable land (% of land area)
AG.LND.CREL.HA              Land under cereal production (hectares)
AG.LND.CROP.ZS              Permanent cropland (% of land area)
AG.LND.EL5M.RU.K2           Rural land area where elevation is below 5 meters (sq. km)
AG.LND.EL5M.RU.ZS           Rural land area where elevation is below 5 meters (% of total land area)
AG.LND.EL5M.UR.K2  

In [100]:
import wbdata
import pandas as pd

# Define the indicators for population, GDP per capita, current account balance, GDP growth, and inflation rate
indicators = {
    'SP.POP.TOTL': 'Population', 
    'NY.GDP.PCAP.CD': 'GDP per Capita', 
    'BN.CAB.XOKA.CD': 'Current Account Balance',
    'NY.GDP.MKTP.KD.ZG': 'GDP Growth',
    'FP.CPI.TOTL.ZG': 'Inflation Rate',
# add overall GDP
    'NY.GDP.MKTP.CD': 'GDP',
    'DT.DOD.DECT.CD': 'External Debt'
    }

# Define countries
countries = ['PK', 'IN', 'BD', 'LK', 'AF']

# Fetch data for the defined period
data = wbdata.get_dataframe(indicators, country=countries)

# Reset index for a better structure
data.reset_index(inplace=True)

# Rename columns for clarity
data.rename(columns={'country': 'Country', 'date': 'Year'}, inplace=True)

# Ensure Year column is numeric
data['Year'] = pd.to_numeric(data['Year'])

# Filter data between 2010 and 2023
data = data[(data['Year'] >= 2000) & (data['Year'] <= 2023)]
# data.to_excel('data.xlsx')

# Display the first few rows of the dataset
print(data.head())

       Country  Year  Population  GDP per Capita  Current Account Balance  \
0  Afghanistan  2023  42239854.0             NaN                      NaN   
1  Afghanistan  2022  41128771.0      352.603733                      NaN   
2  Afghanistan  2021  40099462.0      355.777826                      NaN   
3  Afghanistan  2020  38972230.0      512.055098            -3.136733e+09   
4  Afghanistan  2019  37769499.0      497.741431            -3.791935e+09   

   GDP Growth  Inflation Rate           GDP  External Debt  
0         NaN             NaN           NaN            NaN  
1   -6.240172             NaN  1.450216e+10   3.393247e+09  
2  -20.738839             NaN  1.426650e+10   3.555784e+09  
3   -2.351101             NaN  1.995593e+10   3.040072e+09  
4    3.911603        2.302373  1.879944e+10   2.661686e+09  


In [92]:
# Show only data for Pakistan
data_pakistan =data[data['Country'] == 'Pakistan']
print(data_pakistan)



      Country  Year   Population  GDP per Capita  Current Account Balance  \
256  Pakistan  2023  240485658.0     1407.021351            -3.500440e+08   
257  Pakistan  2022  235824862.0     1589.263980            -1.221611e+10   
258  Pakistan  2021  231402117.0     1506.108293            -1.228311e+10   
259  Pakistan  2020  227196741.0     1322.314785            -6.508740e+08   
260  Pakistan  2019  223293280.0     1437.165833            -8.557928e+09   
261  Pakistan  2018  219731479.0     1620.742591            -1.885899e+10   
262  Pakistan  2017  216379655.0     1567.640612            -1.617962e+10   
263  Pakistan  2016  213524840.0     1468.822082            -7.190898e+09   
264  Pakistan  2015  210969298.0     1421.835278            -2.803000e+09   
265  Pakistan  2014  208251628.0     1303.185370            -3.658000e+09   
266  Pakistan  2013  205337562.0     1259.668368            -4.416000e+09   
267  Pakistan  2012  202205861.0     1236.892763            -2.342000e+09   

In [72]:
import plotly.express as px
import matplotlib.pyplot as plt
fig = px.bar(data_pakistan, x='Year', y='Current Account Balance',
             title='Current Account Balance of Pakistan from 2010 to 2023')
# give title

fig.show()


In [73]:
import plotly.express as px
import matplotlib.pyplot as plt
fig = px.bar(data_pakistan, x='Year', y='GDP Growth',
             title='GDP Growth of Pakistan from 2010 to 2023')
# give title

fig.show()

In [74]:
import plotly.express as px
import matplotlib.pyplot as plt
fig = px.bar(data_pakistan, x='Year', y='Inflation Rate',
             title='Inflation Rate of Pakistan from 2010 to 2023')
# give title

fig.show()

In [94]:
import plotly.express as px
import matplotlib.pyplot as plt
fig = px.line(data_pakistan, x='Year', y='External Debt',
             title='External Debt of Pakistan from 2010 to 2023')
# give title

fig.show()

In [75]:
import wbdata
import pandas as pd

# Define the indicators for population, GDP per capita, current account balance, GDP growth, and inflation rate
# indicators = {
#     'AG.CON.FERT.PT.ZS': 'Fertilizer consumption (% of fertilizer production)', 
# }

# # Define countries
# countries = ['PK', 'IN', 'BD', 'LK', 'AF']

# Fetch data for the defined period
df = wbdata.get_data('AG.CON.FERT.PT.ZS',country=["PAK", "IND"], date=("2000", "2023"))
df=pd.DataFrame(df)
# Reset index for a better structure
df.reset_index(inplace=True)

# Rename columns for clarity
df.rename(columns={'country': 'Country', 'date': 'Year'}, inplace=True)

# Ensure Year column is numeric
df['Year'] = pd.to_numeric(df['Year'])
df.head()

Unnamed: 0,index,indicator,Country,countryiso3code,Year,value,unit,obs_status,decimal
0,0,"{'id': 'AG.CON.FERT.PT.ZS', 'value': 'Fertiliz...","{'id': 'IN', 'value': 'India'}",IND,2023,,,,1
1,1,"{'id': 'AG.CON.FERT.PT.ZS', 'value': 'Fertiliz...","{'id': 'IN', 'value': 'India'}",IND,2022,,,,1
2,2,"{'id': 'AG.CON.FERT.PT.ZS', 'value': 'Fertiliz...","{'id': 'IN', 'value': 'India'}",IND,2021,160.620281,,,1
3,3,"{'id': 'AG.CON.FERT.PT.ZS', 'value': 'Fertiliz...","{'id': 'IN', 'value': 'India'}",IND,2020,176.042247,,,1
4,4,"{'id': 'AG.CON.FERT.PT.ZS', 'value': 'Fertiliz...","{'id': 'IN', 'value': 'India'}",IND,2019,156.483317,,,1


# Import data from FAO STAT

In [None]:
#pip install faostat

In [78]:
import faostat

In [80]:
faostat.list_datasets()

[('code',
  'label',
  'date_update',
  'note_update',
  'release_current',
  'state_current',
  'year_current',
  'release_next',
  'state_next',
  'year_next'),
 ('QCL',
  'Crops and livestock products',
  '2024-10-07',
  'minor revision',
  '2023-12-23 / 2024-10-07',
  'final',
  '2022',
  '2024-12',
  'final',
  '2023'),
 ('QI',
  'Production Indices',
  '2024-03-13',
  '',
  '2024-03-13',
  'final',
  '2022',
  '2024-12',
  'final',
  '2023'),
 ('QV',
  'Value of Agricultural Production',
  '2024-03-13',
  '',
  '2024-03-13',
  'final',
  '2022',
  '2024-12',
  'final',
  '2023'),
 ('FS',
  'Suite of Food Security Indicators',
  '2024-07-25',
  'minor revision',
  '2024-07-24 / 2024-07-25',
  'final',
  '2023',
  '2025-07',
  'final',
  '2024'),
 ('FBS',
  'Food Balances (2010-)',
  '2024-07-19',
  '',
  '2024-07-19',
  'final',
  '2022',
  '2025-07',
  'final',
  '2023'),
 ('SCL',
  'Supply Utilization Accounts (2010-)',
  '2024-07-19',
  '',
  '2024-07-19',
  'final',
  '2022',


In [81]:
data = faostat.list_datasets()
data[1:4]

[('QCL',
  'Crops and livestock products',
  '2024-10-07',
  'minor revision',
  '2023-12-23 / 2024-10-07',
  'final',
  '2022',
  '2024-12',
  'final',
  '2023'),
 ('QI',
  'Production Indices',
  '2024-03-13',
  '',
  '2024-03-13',
  'final',
  '2022',
  '2024-12',
  'final',
  '2023'),
 ('QV',
  'Value of Agricultural Production',
  '2024-03-13',
  '',
  '2024-03-13',
  'final',
  '2022',
  '2024-12',
  'final',
  '2023')]

In [82]:
df = faostat.list_datasets_df()
# df.to_csv('datasets.csv', index=False)
df

Unnamed: 0,code,label,date_update,note_update,release_current,state_current,year_current,release_next,state_next,year_next
0,QCL,Crops and livestock products,2024-10-07,minor revision,2023-12-23 / 2024-10-07,final,2022,2024-12,final,2023
1,QI,Production Indices,2024-03-13,,2024-03-13,final,2022,2024-12,final,2023
2,QV,Value of Agricultural Production,2024-03-13,,2024-03-13,final,2022,2024-12,final,2023
3,FS,Suite of Food Security Indicators,2024-07-25,minor revision,2024-07-24 / 2024-07-25,final,2023,2025-07,final,2024
4,FBS,Food Balances (2010-),2024-07-19,,2024-07-19,final,2022,2025-07,final,2023
...,...,...,...,...,...,...,...,...,...,...
61,FA,Food Aid Shipments (WFP),2016-12-22,,2016-12-22,preliminary,2016,,,
62,RM,Machinery,2021-12-03,minor revision,2011-12-22 / 2021-12-03,final,2009,,,
63,RY,Machinery Archive,2021-12-03,minor revision,2006-12-31 / 2021-12-03,final,2005,,,
64,RA,Fertilizers archive,2020-09-08,minor revision,2013-12-31 / 2020-09-08,final,2002,,,


In [83]:
y = faostat.get_par_df('RM', 'specialgroups')
y

Unnamed: 0,label,code,aggregate_type
0,European Union (27) + (Total),5707,+
1,European Union (27) > (List),5707>,>
2,Least Developed Countries + (Total),5801,+
3,Least Developed Countries > (List),5801>,>
4,Land Locked Developing Countries + (Total),5802,+
5,Land Locked Developing Countries > (List),5802>,>
6,Small Island Developing States + (Total),5803,+
7,Small Island Developing States > (List),5803>,>
8,Low Income Food Deficit Countries + (Total),5815,+
9,Low Income Food Deficit Countries > (List),5815>,>


In [None]:
mypars = {'element':[2312, 2313],'item':'221'}
data = faostat.get_data('QCL', pars=mypars)
data[40:44]

# get data in a dataframe
df = faostat.get_data_df('QCL', pars=mypars)
# df.to_csv('crops.csv', index=False)

In [85]:
df.columns

Index(['Domain Code', 'Domain', 'Area Code', 'Area', 'Element Code', 'Element',
       'Item Code', 'Item', 'Year Code', 'Year', 'Unit', 'Value'],
      dtype='object')

In [87]:
df = df[['Domain','Area', 'Element','Item', 'Year', 'Unit', 'Value']]
# df.to_csv('crops.csv', index=False)

# Import data from Eurostate

In [None]:
# pip install eurostat

In [89]:
import eurostat

In [105]:
import os

# Create the directory if it does not exist
output_dir = r'D:\Drive D\One drive folder\OneDrive - Higher Education Commission\Drive G\Data science coding\Data sets'
os.makedirs(output_dir, exist_ok=True)

df = eurostat.get_toc_df(agency='all', dataset='all', lang='en')
df.to_excel(os.path.join(output_dir, 'codes_eurostat_toc.xlsx'), index=False)


In [106]:
data=eurostat.get_data_df('GOV_10DD_SLGD') # General government debt
data

Unnamed: 0,freq,na_item,sector,maturity,unit,geo\TIME_PERIOD,2020,2021,2022,2023
0,A,F22,S1_S2,TOTAL,MIO_EUR,AT,,,0.0,0.0
1,A,F22,S1_S2,TOTAL,MIO_EUR,BE,,,0.0,0.0
2,A,F22,S1_S2,TOTAL,MIO_EUR,DE,0.0,0.0,0.0,0.0
3,A,F22,S1_S2,TOTAL,MIO_EUR,ES,,,0.0,0.0
4,A,F22,S1_S2,TOTAL,MIO_NAC,AT,,,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...
1606,A,GD,S1_S2,Y_LT1,PC_GDP,ES,,,0.2,0.2
1607,A,GD,S1_S2,Y_LT1,PC_TOT,AT,,,2.2,1.7
1608,A,GD,S1_S2,Y_LT1,PC_TOT,BE,,,6.8,5.9
1609,A,GD,S1_S2,Y_LT1,PC_TOT,DE,5.8,5.0,2.1,2.7


In [108]:
data=pd.DataFrame(data)
data.head()

Unnamed: 0,freq,na_item,sector,maturity,unit,geo\TIME_PERIOD,2020,2021,2022,2023
0,A,F22,S1_S2,TOTAL,MIO_EUR,AT,,,0.0,0.0
1,A,F22,S1_S2,TOTAL,MIO_EUR,BE,,,0.0,0.0
2,A,F22,S1_S2,TOTAL,MIO_EUR,DE,0.0,0.0,0.0,0.0
3,A,F22,S1_S2,TOTAL,MIO_EUR,ES,,,0.0,0.0
4,A,F22,S1_S2,TOTAL,MIO_NAC,AT,,,0.0,0.0


In [112]:
import pandas as pd


# Melt the DataFrame to create a single column for years
df_melted = pd.melt(
    data,
    id_vars=['freq', 'na_item', 'sector', 'maturity', 'unit', 'geo\TIME_PERIOD'],  # Keep these columns
    var_name='Year',  # Name for the new "Year" column
    value_name='Value'  # Name for the data values column
)

# Display the transformed DataFrame
print(df_melted)

# Optionally save the reshaped data to a new Excel file
#df_melted.to_excel("reshaped_data.xlsx", index=False)



     freq na_item sector maturity     unit geo\TIME_PERIOD  Year  Value
0       A     F22  S1_S2    TOTAL  MIO_EUR              AT  2020    NaN
1       A     F22  S1_S2    TOTAL  MIO_EUR              BE  2020    NaN
2       A     F22  S1_S2    TOTAL  MIO_EUR              DE  2020    0.0
3       A     F22  S1_S2    TOTAL  MIO_EUR              ES  2020    NaN
4       A     F22  S1_S2    TOTAL  MIO_NAC              AT  2020    NaN
...   ...     ...    ...      ...      ...             ...   ...    ...
6439    A      GD  S1_S2    Y_LT1   PC_GDP              ES  2023    0.2
6440    A      GD  S1_S2    Y_LT1   PC_TOT              AT  2023    1.7
6441    A      GD  S1_S2    Y_LT1   PC_TOT              BE  2023    5.9
6442    A      GD  S1_S2    Y_LT1   PC_TOT              DE  2023    2.7
6443    A      GD  S1_S2    Y_LT1   PC_TOT              ES  2023    0.9

[6444 rows x 8 columns]


In [113]:
df_melted.head()

Unnamed: 0,freq,na_item,sector,maturity,unit,geo\TIME_PERIOD,Year,Value
0,A,F22,S1_S2,TOTAL,MIO_EUR,AT,2020,
1,A,F22,S1_S2,TOTAL,MIO_EUR,BE,2020,
2,A,F22,S1_S2,TOTAL,MIO_EUR,DE,2020,0.0
3,A,F22,S1_S2,TOTAL,MIO_EUR,ES,2020,
4,A,F22,S1_S2,TOTAL,MIO_NAC,AT,2020,


In [118]:
# Change column names
df_melted.rename(columns={'geo\\TIME_PERIOD': 'Geo', 'Value': 'Govt_debt'}, inplace=True)
df_melted.head()

Unnamed: 0,freq,na_item,sector,maturity,unit,Geo,Year,Govt_debt
0,A,F22,S1_S2,TOTAL,MIO_EUR,AT,2020,
1,A,F22,S1_S2,TOTAL,MIO_EUR,BE,2020,
2,A,F22,S1_S2,TOTAL,MIO_EUR,DE,2020,0.0
3,A,F22,S1_S2,TOTAL,MIO_EUR,ES,2020,
4,A,F22,S1_S2,TOTAL,MIO_NAC,AT,2020,


In [119]:
eurostat.get_data_df('demo_gind') # 

Unnamed: 0,freq,indic_de,geo\TIME_PERIOD,1960,1961,1962,1963,1964,1965,1966,...,2015,2016,2017,2018,2019,2020,2021,2022,2023,2024
0,A,AVG,AD,,,,,,,,...,,72419.0,,75486.0,76860.0,,,80562.0,83345.0,
1,A,AVG,AL,1608800.0,1659800.0,1711319.0,1762621.0,1814135.0,1864791.0,1914573.0,...,2880694.0,2876092.0,2873458.0,2866376.0,2854191.0,2837848.0,2811667.0,2777689.0,,
2,A,AVG,AM,,,,,,,,...,3004588.0,2992364.0,2979442.0,2969001.0,2962482.0,2961473.0,,,2984166.0,
3,A,AVG,AT,7047539.0,7086299.0,7129864.0,7175811.0,7223801.0,7270889.0,7322066.0,...,8642699.0,8736668.0,8797566.0,8840521.0,8879920.0,8916864.0,8955797.0,9041851.0,9131761.0,
4,A,AVG,AZ,,,,,,,,...,9649341.0,9757812.0,9854033.0,9939771.0,10024283.0,10093121.0,10137750.0,10141756.0,10153958.0,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1556,A,POPTRT,SM,,,,,,,,...,,,,,,,-2.6,29.5,,
1557,A,POPTRT,TR,,,,,,,,...,,,,33.1,32.5,,,,,
1558,A,POPTRT,UA,,,,,,,,...,,,,,,42.6,,,,
1559,A,POPTRT,UK,,,,,,,,...,35.7,35.6,35.7,34.5,34.8,,,,,


In [120]:
toc_df = eurostat.get_toc_df()
search_results = eurostat.subset_toc_df(toc_df, 'migration')
search_results

Unnamed: 0,title,code,type,last update of data,last table structure change,data start,data end
373,Persons subject to immigration law enforcement...,MIGR_EILPOP,dataset,2024-09-30T23:00:00+0200,2024-05-30T23:00:00+0200,2008,2023
390,"Emigration by age group, sex and citizenship",MIGR_EMI1CTZ,dataset,2024-11-13T23:00:00+0100,2024-11-29T11:00:00+0100,1998,2022
391,Emigration by age and sex,MIGR_EMI2,dataset,2024-11-13T23:00:00+0100,2024-06-07T23:00:00+0200,1990,2022
392,"Emigration by age group, sex and country of ne...",MIGR_EMI3NXT,dataset,2024-11-13T23:00:00+0100,2024-11-28T11:00:00+0100,1998,2022
393,"Emigration by age group, sex and country of birth",MIGR_EMI4CTB,dataset,2024-11-13T23:00:00+0100,2024-11-29T11:00:00+0100,2008,2022
...,...,...,...,...,...,...,...
2640,"Population by sex, age, migration status, citi...",LFSA_PGANEDM$DV_1806,dataset,2024-09-12T23:00:00+0200,2024-04-24T23:00:00+0200,,
2796,"Unemployment rates by sex, age, migration stat...",LFSA_URGANEDM,dataset,2024-09-12T23:00:00+0200,2024-04-24T23:00:00+0200,2021,2023
2799,"Unemployment rates by sex, age, migration stat...",LFSA_URGANEDM$DV_1763,dataset,2024-09-12T23:00:00+0200,2024-04-24T23:00:00+0200,,
4075,"Assumptions for net migration by age, sex, typ...",URT_PROJ_19RANMIG,dataset,2022-12-20T23:00:00+0100,2022-12-20T23:00:00+0100,2019,2100


In [121]:
eurostat.get_data_df('TGS00099')

Unnamed: 0,freq,indic_de,geo\TIME_PERIOD,2011,2012,2013,2014,2015,2016,2017,2018,2019,2020,2021,2022
0,A,CNMIGRATRT,AL01,,,,-15.9,-10.8,-9.3,-12.6,-10.7,-14.4,-9.3,,-17.6
1,A,CNMIGRATRT,AL02,,,,5.9,0.2,8.8,9.7,2.1,1.2,-0.1,,-3.1
2,A,CNMIGRATRT,AL03,,,,-15.3,-12.6,-12.7,-17.4,-9.8,-14.6,-10.4,,-18.4
3,A,CNMIGRATRT,AT11,7.3,7.2,6.4,6.8,12.8,6.3,6.5,6.3,7.4,10.2,9.9,17.4
4,A,CNMIGRATRT,AT12,4.2,4.2,5.8,8.1,11.9,8.2,4.1,5.7,5.7,6.3,7.3,14.4
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1009,A,NATGROWRT,UKM6,-1.1,-0.8,-0.9,-1.0,-2.3,-1.7,-2.6,-3.2,,,,
1010,A,NATGROWRT,UKM7,1.7,1.2,,,,,-0.4,-0.7,,,,
1011,A,NATGROWRT,UKM8,1.1,0.7,,,,,-0.5,-0.9,,,,
1012,A,NATGROWRT,UKM9,-0.6,-1.2,,,,,-2.8,-3.3,,,,


In [122]:
df.head()

Unnamed: 0,title,code,type,last update of data,last table structure change,data start,data end
0,"Unemployment rates by sex, age and degree of u...",LFST_R_URGAU,dataset,2024-09-12T23:00:00+0200,2024-04-24T23:00:00+0200,1992,2023
1,Gross weight of goods handled in all ports by ...,MAR_GO_AA,dataset,2024-10-21T23:00:00+0200,2024-10-01T23:00:00+0200,1997,2023
2,Gross weight of goods transported to/from main...,MAR_GO_AM_BE,dataset,2024-07-09T23:00:00+0200,2024-07-09T23:00:00+0200,1997,2023
3,Gross weight of goods transported to/from main...,MAR_GO_AM_BG,dataset,2024-07-09T23:00:00+0200,2024-07-09T23:00:00+0200,2001,2023
4,Gross weight of goods transported to/from main...,MAR_GO_AM_CY,dataset,2024-08-26T23:00:00+0200,2024-08-26T23:00:00+0200,2002,2023


# Import data from yfinance

In [None]:
# pip install yfinance

In [123]:
import pandas as pd
import yfinance as yf

Find the ticker symbols here: https://stockanalysis.com/stocks/

In [124]:
# ticker define
ticker = 'NVDA'

In [125]:
# define the time period
start_date = '2015-01-01'
end_date = '2024-11-21'

In [126]:
# download the data
df_nvda = yf.download(ticker, start=start_date, end=end_date)
df_nvda.head()

[*********************100%%**********************]  1 of 1 completed


Unnamed: 0_level_0,Open,High,Low,Close,Adj Close,Volume
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2015-01-02,0.50325,0.507,0.49525,0.50325,0.483143,113680000
2015-01-05,0.50325,0.50475,0.4925,0.49475,0.474983,197952000
2015-01-06,0.4955,0.496,0.47925,0.47975,0.460583,197764000
2015-01-07,0.48325,0.4875,0.477,0.4785,0.459382,321808000
2015-01-08,0.484,0.4995,0.48375,0.4965,0.476663,283780000


# Define each column in the dataset

Date: The date of the stock market data\
Open: The opening price of the stock\
High: The highest price of the stock\
Low: The lowest price of the stock\
Close: The closing price of the stock\
Adj Close: The adjusted closing price of the stock\
Volume: The volume of the stock\
Ticker: The ticker symbol of the stock\
Name: The name of the stock\
Sector: The sector of the stock\
Industry: The industry of the stock\
Country: The country of the stock

In [127]:
# save the data in a csv file
df_nvda.to_csv(r'D:\Drive D\One drive folder\OneDrive - Higher Education Commission\Drive G\Data science coding\Data sets\yfinance.csv')

In [128]:
# download the data from multiple tickers
tickers = ['AAPL', 'MSFT', 'NVDA', 'GOOGL', 'AMZN']
start_date = '2010-01-01'
end_date = '2024-11-21'
df = yf.download(tickers, start=start_date, end=end_date)
df.head()


[*********************100%%**********************]  5 of 5 completed


Price,Adj Close,Adj Close,Adj Close,Adj Close,Adj Close,Close,Close,Close,Close,Close,...,Open,Open,Open,Open,Open,Volume,Volume,Volume,Volume,Volume
Ticker,AAPL,AMZN,GOOGL,MSFT,NVDA,AAPL,AMZN,GOOGL,MSFT,NVDA,...,AAPL,AMZN,GOOGL,MSFT,NVDA,AAPL,AMZN,GOOGL,MSFT,NVDA
Date,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2,Unnamed: 13_level_2,Unnamed: 14_level_2,Unnamed: 15_level_2,Unnamed: 16_level_2,Unnamed: 17_level_2,Unnamed: 18_level_2,Unnamed: 19_level_2,Unnamed: 20_level_2,Unnamed: 21_level_2
2010-01-04,6.447414,6.695,15.627781,23.30069,0.423923,7.643214,6.695,15.684434,30.950001,0.46225,...,7.6225,6.8125,15.689439,30.620001,0.46275,493729600,151998000,78169752,38409100,800204000
2010-01-05,6.458557,6.7345,15.558962,23.308205,0.430113,7.656429,6.7345,15.615365,30.959999,0.469,...,7.664286,6.6715,15.695195,30.85,0.4605,601904800,177038000,120067812,49749600,728648000
2010-01-06,6.355827,6.6125,15.166741,23.165174,0.432864,7.534643,6.6125,15.221722,30.77,0.472,...,7.656429,6.73,15.662162,30.879999,0.46875,552160000,143576000,158988852,58182400,649168000
2010-01-07,6.344077,6.5,14.813666,22.924255,0.424381,7.520714,6.5,14.867367,30.450001,0.46275,...,7.5625,6.6005,15.25025,30.629999,0.4695,477131200,220604000,256315428,50559700,547792000
2010-01-08,6.386255,6.676,15.011148,23.08235,0.425298,7.570714,6.676,15.065566,30.66,0.46375,...,7.510714,6.528,14.814815,30.280001,0.459,447610800,196610000,188783028,51197400,478168000
