2.	Data structures: You are required to gather and process data that has been stored in at least two distinct formats. For example, this can be data in a CSV file, from a MySQL database or from a web API in JSON format. [0-20]

In [1]:
import pandas as pd
import requests

#Getting the URL 
url = 'https://ws.cso.ie/public/api.restful/PxStat.Data.Cube_API.ReadDataset/THA25/JSON-stat/1.0/en'

# Making a request
response = requests.get(url)

In [2]:
# Loading JSON data
json_data = response.json()

In [3]:
# Extracting statistic label 'Passenger Journeys'
statistic_label = [json_data['dataset']['dimension']['STATISTIC']['category']['label']['THA25C01']]

In [4]:
# Extracting statistic label 'Week'
week_index = list(json_data['dataset']['dimension']['TLIST(W1)']['category']['index'].values())
week_label = list(json_data['dataset']['dimension']['TLIST(W1)']['category']['label'].values())


In [5]:
# Extracting mode of transport
transmode_index = list(json_data['dataset']['dimension']['C03935V04687']['category']['index'].keys())
transmode_label = list(json_data['dataset']['dimension']['C03935V04687']['category']['label'].values())

In [6]:
# Creating a MultiIndex for the DataFrame
index = pd.MultiIndex.from_product([statistic_label, week_label, transmode_label], names=['Statistic', 'Week', 'Mode_of_Transport'])

# Extracting value information
values = json_data['dataset']['value']

# Creating the DataFrame
df = pd.DataFrame(values, columns=['VALUE'], index=index)

# Resetting index to convert MultiIndex to columns
df.reset_index(inplace=True)

In [7]:
df.head()

Unnamed: 0,Statistic,Week,Mode_of_Transport,VALUE
0,Passenger Journeys,2019 Week 01,Dublin Metro Bus,1987891.0
1,Passenger Journeys,2019 Week 01,"Bus, excluding Dublin Metro",497598.0
2,Passenger Journeys,2019 Week 01,Rail,
3,Passenger Journeys,2019 Week 01,"All public transport, excluding LUAS",2485489.0
4,Passenger Journeys,2019 Week 02,Dublin Metro Bus,2709579.0


In [8]:
# Save the DataFrame to a CSV file for checking purposes
df.to_csv('THA25.csv',index=False)

In [9]:
import eurostat


In [10]:
eurostat.get_toc_df()

Unnamed: 0,title,code,type,last update of data,last table structure change,data start,data end
0,Passengers (excluding cruise passengers) trans...,MAR_MP_AM_CFTT,dataset,2022-12-08T23:00:00+0100,2023-07-28T23:00:00+0200,2004,2021
1,Country level - number and gross tonnage of ve...,MAR_MT_AM_CSVI,dataset,2022-11-15T23:00:00+0100,2022-11-15T23:00:00+0100,1997,2021
2,Passengers embarked and disembarked in all por...,MAR_PA_AA,dataset,2023-11-28T23:00:00+0100,2023-11-28T23:00:00+0100,1997,2022
3,Passengers (excluding cruise passengers) trans...,MAR_PA_QM,dataset,2023-11-28T23:00:00+0100,2023-11-28T23:00:00+0100,1997-Q1,2023-Q2
4,Passengers (excluding cruise passengers) trans...,MAR_PA_QM_BE,dataset,2023-11-28T23:00:00+0100,2023-11-28T23:00:00+0100,1997-Q1,2023-Q2
...,...,...,...,...,...,...,...
7578,Percentage of letters delivered on-time (USP u...,POST_CUBE1_X$POST_QOS_1,dataset,2023-05-23T08:18:37Z,2023-05-10T10:54:33Z,,
7579,Postal services,POST_CUBE1_X,dataset,2023-05-23T08:18:37Z,2023-05-10T10:54:33Z,2012,2021
7580,Number of enterprises providing postal services,POST_CUBE1_X$NUM701,dataset,2023-05-23T08:18:37Z,2023-05-10T10:54:33Z,,
7581,Access points (USP under direct or indirect d...,POST_CUBE1_X$POST_ACC_1,dataset,2023-05-23T08:18:37Z,2023-05-10T10:54:33Z,,


In [11]:
toc_df = eurostat.get_toc_df()

In [12]:
eurostat.subset_toc_df(toc_df, 'ports')

Unnamed: 0,title,code,type,last update of data,last table structure change,data start,data end
0,Passengers (excluding cruise passengers) trans...,MAR_MP_AM_CFTT,dataset,2022-12-08T23:00:00+0100,2023-07-28T23:00:00+0200,2004,2021
1,Country level - number and gross tonnage of ve...,MAR_MT_AM_CSVI,dataset,2022-11-15T23:00:00+0100,2022-11-15T23:00:00+0100,1997,2021
2,Passengers embarked and disembarked in all por...,MAR_PA_AA,dataset,2023-11-28T23:00:00+0100,2023-11-28T23:00:00+0100,1997,2022
3,Passengers (excluding cruise passengers) trans...,MAR_PA_QM,dataset,2023-11-28T23:00:00+0100,2023-11-28T23:00:00+0100,1997-Q1,2023-Q2
4,Passengers (excluding cruise passengers) trans...,MAR_PA_QM_BE,dataset,2023-11-28T23:00:00+0100,2023-11-28T23:00:00+0100,1997-Q1,2023-Q2
...,...,...,...,...,...,...,...
7315,Tropical wood imports to the EU from chapter 4...,FOR_TROP,dataset,2022-12-16T23:00:00+0100,2022-12-16T23:00:00+0100,1999,2021-12
7465,Adjusted extra-EU imports since 2000 by tariff...,DS-059281,dataset,2023-11-15T11:00:00+0100,2023-11-15T11:00:00+0100,,
7469,"Sold production, exports and imports",DS-056120,dataset,2023-11-07T16:10:00+0100,2023-11-07T16:10:00+0100,,
7516,Beneficiaries of labour market policy supports...,LMP_PARTSUMM$TPS00080,dataset,2023-11-20T15:46:44Z,2023-11-20T15:46:44Z,,


In [13]:
eurostat.subset_toc_df(toc_df, 'Air passenger transport by main airports in each reporting country')

Unnamed: 0,title,code,type,last update of data,last table structure change,data start,data end
6861,International extra-EU air passenger transport...,AVIA_PAEXAC,dataset,2023-11-29T11:00:00+0100,2023-11-28T23:00:00+0100,1993,2023-Q3
6867,International intra-EU air passenger transport...,AVIA_PAINAC,dataset,2023-11-29T11:00:00+0100,2023-11-28T23:00:00+0100,1993,2023-Q3
6872,National air passenger transport by main airpo...,AVIA_PANA,dataset,2023-11-29T11:00:00+0100,2023-11-28T23:00:00+0100,1993,2023-Q3
6881,Air passenger transport by main airports in ea...,AVIA_PAOA,dataset,2023-11-29T11:00:00+0100,2023-11-28T23:00:00+0100,1993,2023-Q3


In [None]:
data = eurostat.get_data_df('AVIA_PAOA',True)

In [None]:
data

In [None]:
data.info()

In [None]:
data.describe()

In [None]:
data.info()

In [None]:
import eurostat