## Importing libraries

In [1]:
import pandas as pd
import requests

## Importing data

### Ireland

#### Public

In [2]:
# Read the CSV file into a pandas DataFrame
THA25_df = pd.read_csv('THA25.csv')
THA25_df

Unnamed: 0,STATISTIC,Statistic Label,TLIST(W1),Week,C03935V04687,Mode of Transport,UNIT,VALUE
0,THA25C01,Passenger Journeys,2019W01,2019 Week 01,10,Dublin Metro Bus,Number,1987891.0
1,THA25C01,Passenger Journeys,2019W01,2019 Week 01,20,"Bus, excluding Dublin Metro",Number,497598.0
2,THA25C01,Passenger Journeys,2019W01,2019 Week 01,30,Rail,Number,
3,THA25C01,Passenger Journeys,2019W01,2019 Week 01,40,"All public transport, excluding LUAS",Number,2485489.0
4,THA25C01,Passenger Journeys,2019W02,2019 Week 02,10,Dublin Metro Bus,Number,2709579.0
...,...,...,...,...,...,...,...,...
1035,THA25C01,Passenger Journeys,2023W47,2023 Week 47,40,"All public transport, excluding LUAS",Number,5059620.0
1036,THA25C01,Passenger Journeys,2023W48,2023 Week 48,10,Dublin Metro Bus,Number,3453532.0
1037,THA25C01,Passenger Journeys,2023W48,2023 Week 48,20,"Bus, excluding Dublin Metro",Number,941393.0
1038,THA25C01,Passenger Journeys,2023W48,2023 Week 48,30,Rail,Number,942218.0


In [3]:
unique_units = THA25_df['UNIT'].unique()
print(unique_units)

['Number']


In [4]:
columns_to_remove = ['STATISTIC', 'Week', 'C03935V04687', 'UNIT']

# Remove the specified columns
THA25_df = THA25_df.drop(columns=columns_to_remove)

THA25_df

Unnamed: 0,Statistic Label,TLIST(W1),Mode of Transport,VALUE
0,Passenger Journeys,2019W01,Dublin Metro Bus,1987891.0
1,Passenger Journeys,2019W01,"Bus, excluding Dublin Metro",497598.0
2,Passenger Journeys,2019W01,Rail,
3,Passenger Journeys,2019W01,"All public transport, excluding LUAS",2485489.0
4,Passenger Journeys,2019W02,Dublin Metro Bus,2709579.0
...,...,...,...,...
1035,Passenger Journeys,2023W47,"All public transport, excluding LUAS",5059620.0
1036,Passenger Journeys,2023W48,Dublin Metro Bus,3453532.0
1037,Passenger Journeys,2023W48,"Bus, excluding Dublin Metro",941393.0
1038,Passenger Journeys,2023W48,Rail,942218.0


In [5]:
# Assuming 'TLIST(W1)' contains values like '2019W01'
THA25_df['Year'] = THA25_df['TLIST(W1)'].str[:4]
THA25_df['Week'] = THA25_df['TLIST(W1)'].str[5:]

# Convert the new columns to appropriate data types if needed
THA25_df['Year'] = THA25_df['Year'].astype(int)
THA25_df['Week'] = THA25_df['Week'].astype(int)

# Now, THA25_df contains new columns 'Year' and 'Week'
THA25_df

Unnamed: 0,Statistic Label,TLIST(W1),Mode of Transport,VALUE,Year,Week
0,Passenger Journeys,2019W01,Dublin Metro Bus,1987891.0,2019,1
1,Passenger Journeys,2019W01,"Bus, excluding Dublin Metro",497598.0,2019,1
2,Passenger Journeys,2019W01,Rail,,2019,1
3,Passenger Journeys,2019W01,"All public transport, excluding LUAS",2485489.0,2019,1
4,Passenger Journeys,2019W02,Dublin Metro Bus,2709579.0,2019,2
...,...,...,...,...,...,...
1035,Passenger Journeys,2023W47,"All public transport, excluding LUAS",5059620.0,2023,47
1036,Passenger Journeys,2023W48,Dublin Metro Bus,3453532.0,2023,48
1037,Passenger Journeys,2023W48,"Bus, excluding Dublin Metro",941393.0,2023,48
1038,Passenger Journeys,2023W48,Rail,942218.0,2023,48


In [6]:
# Remove the 'TLIST(W1)' column
THA25_df.drop(columns=['TLIST(W1)'], inplace=True)

In [7]:
THA25_df

Unnamed: 0,Statistic Label,Mode of Transport,VALUE,Year,Week
0,Passenger Journeys,Dublin Metro Bus,1987891.0,2019,1
1,Passenger Journeys,"Bus, excluding Dublin Metro",497598.0,2019,1
2,Passenger Journeys,Rail,,2019,1
3,Passenger Journeys,"All public transport, excluding LUAS",2485489.0,2019,1
4,Passenger Journeys,Dublin Metro Bus,2709579.0,2019,2
...,...,...,...,...,...
1035,Passenger Journeys,"All public transport, excluding LUAS",5059620.0,2023,47
1036,Passenger Journeys,Dublin Metro Bus,3453532.0,2023,48
1037,Passenger Journeys,"Bus, excluding Dublin Metro",941393.0,2023,48
1038,Passenger Journeys,Rail,942218.0,2023,48


#### Avia

In [8]:
TAM08_df = pd.read_csv('TAM08.csv')
TAM08_df

Unnamed: 0,STATISTIC,Statistic Label,TLIST(M1),Month,C02935V03550,Airport,UNIT,VALUE
0,TAM08C01,Passengers,201901,2019 January,EIDW,Dublin,Number,2054794.0
1,TAM08C01,Passengers,201901,2019 January,EICK,Cork,Number,148939.0
2,TAM08C01,Passengers,201901,2019 January,EINN,Shannon,Number,87016.0
3,TAM08C01,Passengers,201901,2019 January,EIKN,Knock,Number,42413.0
4,TAM08C01,Passengers,201901,2019 January,EIKY,Kerry,Number,20829.0
...,...,...,...,...,...,...,...,...
343,TAM08C01,Passengers,202310,2023 October,EICK,Cork,Number,253108.0
344,TAM08C01,Passengers,202310,2023 October,EINN,Shannon,Number,168861.0
345,TAM08C01,Passengers,202310,2023 October,EIKN,Knock,Number,
346,TAM08C01,Passengers,202310,2023 October,EIKY,Kerry,Number,39404.0


In [9]:
unique_units = TAM08_df['UNIT'].unique()
print(unique_units)

['Number']


In [10]:
unique = TAM08_df['STATISTIC'].unique()
print(unique)

['TAM08C01']


In [11]:
# Specify the columns to remove
columns_to_remove_tam08 = ['STATISTIC', 'C02935V03550', 'UNIT']

# Remove the specified columns
TAM08_df.drop(columns=columns_to_remove_tam08, inplace=True)

In [12]:
TAM08_df

Unnamed: 0,Statistic Label,TLIST(M1),Month,Airport,VALUE
0,Passengers,201901,2019 January,Dublin,2054794.0
1,Passengers,201901,2019 January,Cork,148939.0
2,Passengers,201901,2019 January,Shannon,87016.0
3,Passengers,201901,2019 January,Knock,42413.0
4,Passengers,201901,2019 January,Kerry,20829.0
...,...,...,...,...,...
343,Passengers,202310,2023 October,Cork,253108.0
344,Passengers,202310,2023 October,Shannon,168861.0
345,Passengers,202310,2023 October,Knock,
346,Passengers,202310,2023 October,Kerry,39404.0


#### Private

In [13]:
# Define the URL for the JSON endpoint
url = "https://ws.cso.ie/public/api.restful/PxStat.Data.Cube_API.ReadDataset/THA18/JSON-stat/1.0/en"

# Make a GET request to the endpoint
response = requests.get(url)

# Check if the request was successful (status code 200)
if response.status_code == 200:
    # Convert the JSON response to a DataFrame using pandas.json_normalize
    data = response.json()
    THA18_df = pd.json_normalize(data)
    # Depending on the structure of the JSON, you might need to navigate deeper into the response.

    # Print or use THA18_df as needed
    print(THA18_df)
else:
    print(f"Error: Unable to fetch data. Status code: {response.status_code}")

  dataset.dimension.STATISTIC.label  \
0                         Statistic   

   dataset.dimension.STATISTIC.category.index.THA18C01  \
0                                                  0     

   dataset.dimension.STATISTIC.category.index.THA18C02  \
0                                                  1     

   dataset.dimension.STATISTIC.category.index.THA18C03  \
0                                                  2     

  dataset.dimension.STATISTIC.category.label.THA18C01  \
0                                 Vehicle Population    

  dataset.dimension.STATISTIC.category.label.THA18C02  \
0                               Kilometres Travelled    

  dataset.dimension.STATISTIC.category.label.THA18C03  \
0                       Average Kilometres Travelled    

  dataset.dimension.STATISTIC.category.unit.THA18C01.base  \
0                                             Number        

  dataset.dimension.STATISTIC.category.unit.THA18C02.base  \
0                                        

In [14]:
THA18_df

Unnamed: 0,dataset.dimension.STATISTIC.label,dataset.dimension.STATISTIC.category.index.THA18C01,dataset.dimension.STATISTIC.category.index.THA18C02,dataset.dimension.STATISTIC.category.index.THA18C03,dataset.dimension.STATISTIC.category.label.THA18C01,dataset.dimension.STATISTIC.category.label.THA18C02,dataset.dimension.STATISTIC.category.label.THA18C03,dataset.dimension.STATISTIC.category.unit.THA18C01.base,dataset.dimension.STATISTIC.category.unit.THA18C02.base,dataset.dimension.STATISTIC.category.unit.THA18C03.base,...,dataset.dimension.C01936V02368.category.label.2022,dataset.dimension.role.geo,dataset.dimension.role.metric,dataset.dimension.role.time,dataset.dimension.id,dataset.dimension.size,dataset.label,dataset.source,dataset.updated,dataset.value
0,Statistic,0,1,2,Vehicle Population,Kilometres Travelled,Average Kilometres Travelled,Number,Million,Kilometres,...,2022,[C03788V04538],[STATISTIC],[TLIST(A1)],"[STATISTIC, TLIST(A1), C02876V03463, C01836V03...","[3, 8, 4, 8, 4, 27, 25]",Road Traffic Volumes of Private Cars,"Central Statistics Office, Ireland",2023-11-22T11:00:00Z,"[1992819.0, 90989.0, 77870.0, 70838.0, 86226.0..."


## Eurostat

#### Air transport of passengers

In [15]:
# Replace 'path/to/Ait transport of passengers Eurostat.xlsx' with the actual path to your Excel file
excel_file_path = 'Air transport of passengers Eurostat.xlsx'

# Read the Excel file into a Pandas DataFrame
air_df = pd.read_excel(excel_file_path)

air_df = air_df.rename(columns={'TIME': 'Country'})

# Now, you can work with the 'air_df' DataFrame as needed
air_df

Unnamed: 0,Country,2011,2012,2013,2014,2015,2016,2017,2018,2019,2020,2021,2022
0,Belgium,25102695.0,25919515.0,26389927.0,28776258.0,30958841.0,30115832.0,33260493.0,34506309.0,35385188.0,9465828.0,13500020.0,27873892.0
1,Bulgaria,6652007.0,6819103.0,7079292.0,7520697.0,7610949.0,9324217.0,11092651.0,12137714.0,11713068.0,3729017.0,5047877.0,8807502.0
2,Czechia,12650532.0,11742352.0,11891812.0,12079873.0,12672004.0,13672362.0,16245554.0,17838221.0,18767088.0,3821372.0,4755160.0,11532650.0
3,Denmark,25808321.0,26532730.0,27459623.0,29015133.0,30095505.0,32763142.0,33261214.0,34701139.0,34780127.0,8658654.0,10817817.0,26649573.0
4,Germany,175316076.0,178591103.0,180783188.0,186445814.0,193936430.0,200687293.0,212389343.0,222422361.0,226764086.0,57795978.0,73597370.0,155302643.0
5,Estonia,1907569.0,2202427.0,1958565.0,2019806.0,2160978.0,2214989.0,2635145.0,2995528.0,3258003.0,857837.0,1292941.0,2731365.0
6,Ireland,23362889.0,23594089.0,24603640.0,26310826.0,29545020.0,32595709.0,34271771.0,36345005.0,37947510.0,8268297.0,9097359.0,32405890.0
7,Greece,33770739.0,32082336.0,34023934.0,39117833.0,42096402.0,45543371.0,50170728.0,54258826.0,56088527.0,17341192.0,32245559.0,57893929.0
8,Spain,165153230.0,159771261.0,157731973.0,165354382.0,174652503.0,193872037.0,209824089.0,220611429.0,228262372.0,57797305.0,91898241.0,199571203.0
9,France,126013257.0,129764462.0,132762875.0,136360671.0,140867569.0,145280602.0,154096485.0,161991179.0,168726788.0,50724011.0,66033809.0,136560938.0


#### Rail transport of passengers

In [16]:
# Replace 'Rail transport of passengers.xlsx'
excel_file_path = 'Rail transport of passengers Eurostat.xlsx'

# Read the Excel file into a Pandas DataFrame
rail_df = pd.read_excel(excel_file_path)

rail_df = rail_df.rename(columns={'TIME': 'Country'})

# Now, you can work with the 'air_df' DataFrame as needed
rail_df

Unnamed: 0,Country,2011,Unnamed: 2,2012,Unnamed: 4,2013,Unnamed: 6,2014,Unnamed: 8,2015,...,Unnamed: 14,2018,Unnamed: 16,2019,Unnamed: 18,2020,Unnamed: 20,2021,Unnamed: 22,2022
0,Belgium,10498.0,,,,,,,,,...,,,,,,,,,,
1,Bulgaria,2059.0,,1870.0,,1821.0,,1698.0,,1549.0,...,,1476.0,,1520.0,,1118.0,,1203.0,,1600.0
2,zehia,,,7196.0,,7512.0,,7644.0,,8125.0,...,,10220.0,,10856.0,,6623.0,,6752.0,,9394.0
3,Denmark,6395.0,,6534.0,,6566.0,,6513.0,,6507.0,...,,6182.0,,6174.0,,3940.0,,4181.0,,6376.0
4,Germany,89316.0,,93918.0,,89450.0,,90978.0,,91050.0,...,,98161.0,,100252.0,,57787.0,,57518.0,,92313.0
5,Estonia,243.0,,235.0,,223.0,,280.0,,286.0,...,,417.0,,392.0,,263.0,,290.0,,382.0
6,Ireland,1638.0,,1578.0,,1569.0,,1728.0,,1918.0,...,,2281.0,,2399.0,,834.0,,870.0,,1748.0
7,Greee,958.0,,832.0,,1056.0,,1072.0,,1263.0,...,,1104.0,,1252.0,,640.0,,653.0,,1117.0
8,Spain,22645.0,,22170.0,,23660.0,,24915.0,,26018.0,...,,28317.0,,28703.0,,11987.0,,17002.0,,27489.0
9,Frane,91298.0,,91205.0,,90485.0,,89499.0,,91377.0,...,,91818.0,,96540.0,,56606.0,,75854.0,,102814.0


# World Bank

## Population

In [17]:
# Specify the file path
file_path = 'Population World Bank.csv'

# Read the CSV file into a DataFrame
population_df = pd.read_csv(file_path)

# Now, 'population_df' contains your data from the CSV file
population_df

Unnamed: 0,Country Name,Country Code,Indicator Name,Indicator Code,1960,1961,1962,1963,1964,1965,...,2013,2014,2015,2016,2017,2018,2019,2020,2021,2022
0,Aruba,ABW,"Population, total",SP.POP.TOTL,54608.0,55811.0,56682.0,57475.0,58178.0,58782.0,...,102880.0,103594.0,104257.0,104874.0,105439.0,105962.0,106442.0,106585.0,106537.0,106445.0
1,Africa Eastern and Southern,AFE,"Population, total",SP.POP.TOTL,130692579.0,134169237.0,137835590.0,141630546.0,145605995.0,149742351.0,...,567892149.0,583651101.0,600008424.0,616377605.0,632746570.0,649757148.0,667242986.0,685112979.0,702977106.0,720859132.0
2,Afghanistan,AFG,"Population, total",SP.POP.TOTL,8622466.0,8790140.0,8969047.0,9157465.0,9355514.0,9565147.0,...,31541209.0,32716210.0,33753499.0,34636207.0,35643418.0,36686784.0,37769499.0,38972230.0,40099462.0,41128771.0
3,Africa Western and Central,AFW,"Population, total",SP.POP.TOTL,97256290.0,99314028.0,101445032.0,103667517.0,105959979.0,108336203.0,...,387204553.0,397855507.0,408690375.0,419778384.0,431138704.0,442646825.0,454306063.0,466189102.0,478185907.0,490330870.0
4,Angola,AGO,"Population, total",SP.POP.TOTL,5357195.0,5441333.0,5521400.0,5599827.0,5673199.0,5736582.0,...,26147002.0,27128337.0,28127721.0,29154746.0,30208628.0,31273533.0,32353588.0,33428486.0,34503774.0,35588987.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
261,Kosovo,XKX,"Population, total",SP.POP.TOTL,947000.0,966000.0,994000.0,1022000.0,1050000.0,1078000.0,...,1818117.0,1812771.0,1788196.0,1777557.0,1791003.0,1797085.0,1788878.0,1790133.0,1786038.0,1761985.0
262,"Yemen, Rep.",YEM,"Population, total",SP.POP.TOTL,5542459.0,5646668.0,5753386.0,5860197.0,5973803.0,6097298.0,...,26984002.0,27753304.0,28516545.0,29274002.0,30034389.0,30790513.0,31546691.0,32284046.0,32981641.0,33696614.0
263,South Africa,ZAF,"Population, total",SP.POP.TOTL,16520441.0,16989464.0,17503133.0,18042215.0,18603097.0,19187194.0,...,53873616.0,54729551.0,55876504.0,56422274.0,56641209.0,57339635.0,58087055.0,58801927.0,59392255.0,59893885.0
264,Zambia,ZMB,"Population, total",SP.POP.TOTL,3119430.0,3219451.0,3323427.0,3431381.0,3542764.0,3658024.0,...,15234976.0,15737793.0,16248230.0,16767761.0,17298054.0,17835893.0,18380477.0,18927715.0,19473125.0,20017675.0


## Income

In [18]:
# Replace 'path_to_your_file' with the actual path to your CSV file
file_path = 'Average income world bank.csv'

# Read the CSV file into a Pandas DataFrame
income_df = pd.read_csv(file_path)

# Display the first few rows of the DataFrame to verify the import
income_df

Unnamed: 0,Country Name,Country Code,Indicator Name,Indicator Code,1960,1961,1962,1963,1964,1965,...,2013,2014,2015,2016,2017,2018,2019,2020,2021,2022
0,Aruba,ABW,Adjusted net national income per capita (curre...,NY.ADJ.NNTY.PC.CD,,,,,,,...,22115.614210,23111.232220,24093.582640,24018.878420,24457.602270,24859.525440,26702.132160,20971.438090,24994.772990,
1,Africa Eastern and Southern,AFE,Adjusted net national income per capita (curre...,NY.ADJ.NNTY.PC.CD,,,,,,,...,1327.021073,1343.347342,1236.498033,1167.651297,1313.117754,1257.046421,1228.514026,1113.051649,1232.351821,
2,Afghanistan,AFG,Adjusted net national income per capita (curre...,NY.ADJ.NNTY.PC.CD,,,,,,,...,598.899585,577.260184,550.241360,485.792229,498.041221,467.286034,469.477760,476.899060,339.682114,
3,Africa Western and Central,AFW,Adjusted net national income per capita (curre...,NY.ADJ.NNTY.PC.CD,,,,,,,...,1679.965667,1838.464813,1589.531108,1397.764260,1316.043520,1412.503047,1440.327860,1399.151980,1415.982509,
4,Angola,AGO,Adjusted net national income per capita (curre...,NY.ADJ.NNTY.PC.CD,,,,,,,...,3187.601761,3490.921523,2374.531996,1207.215215,1501.637531,1507.863453,1328.708143,1100.962834,1232.948233,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
261,Kosovo,XKX,Adjusted net national income per capita (curre...,NY.ADJ.NNTY.PC.CD,,,,,,,...,,,,,,,,,,
262,"Yemen, Rep.",YEM,Adjusted net national income per capita (curre...,NY.ADJ.NNTY.PC.CD,,,,,,,...,1273.489211,1323.269168,1366.411677,1016.887235,849.030933,665.907750,,,,
263,South Africa,ZAF,Adjusted net national income per capita (curre...,NY.ADJ.NNTY.PC.CD,,,,,,,...,5977.137238,5646.961218,5127.603776,4681.677958,5505.776036,5740.363783,5449.048686,4658.248020,5717.120396,
264,Zambia,ZMB,Adjusted net national income per capita (curre...,NY.ADJ.NNTY.PC.CD,,,,,,,...,1409.081210,1375.312762,1050.129153,988.532254,1138.740164,1142.799848,1010.373209,698.249518,614.647305,
