In [1]:
!pip install requests pandas



In [6]:
import requests
import pandas as pd
from io import StringIO

# Define the base URL of the API
base_url = "https://urban.jrc.ec.europa.eu/ardeco-api-v2/rest/export/"

# Define the variable code you are interested in (e.g., 'GDP_PPS')
variable_code = "SNETDP"
    # SNPTD: Average annual population (Unit: Persons)
    # SUVGDP: GDP per capita at current prices (Unit: EUR, PPS)
    # SNETDP: Employment per capita (Unit: Persons)

# Initialize an empty list to store data for each year
all_data = []

# Loop through years from 1981 to 2021
for year in range(1980, 2022):
    # Define optional filters for each year
    params = {
        "version": "2021",     # Data        
        "level_id": "2",       # NUTS level (optional)NUTS level (optional)
        "year": str(year),     # Year (required for each iteration)
        # "territory_id": "UKC"  # NUTS code for the territory (optional)
    }

    # Construct the full API URL with parameters
    url = base_url + variable_code
    response = requests.get(url, params=params)

    # Check if the request was successful
    if response.status_code == 200:
        # Load the CSV content into a Pandas DataFrame
        data = pd.read_csv(StringIO(response.text))

        # Add a 'year' column to the DataFrame for tracking
        data['year'] = year

        # Append the DataFrame to the list
        all_data.append(data)
        print(f"Successfully retrieved data for year {year}")
    else:
        print(f"Failed to retrieve data for year {year}. Status code: {response.status_code}")

# Concatenate all the data into a single DataFrame
final_data = pd.concat(all_data, ignore_index=True)

# Display the first few rows of the combined data
print(final_data.head())

# Save the final combined data to a CSV file (optional)
final_data.to_csv('ardeco_employmentcapita.csv', index=False)
print("Saved all data to 'ardeco_population.csv'")

Successfully retrieved data for year 1980
Successfully retrieved data for year 1981
Successfully retrieved data for year 1982
Successfully retrieved data for year 1983
Successfully retrieved data for year 1984
Successfully retrieved data for year 1985
Successfully retrieved data for year 1986
Successfully retrieved data for year 1987
Successfully retrieved data for year 1988
Successfully retrieved data for year 1989
Successfully retrieved data for year 1990
Successfully retrieved data for year 1991
Successfully retrieved data for year 1992
Successfully retrieved data for year 1993
Successfully retrieved data for year 1994
Successfully retrieved data for year 1995
Successfully retrieved data for year 1996
Successfully retrieved data for year 1997
Successfully retrieved data for year 1998
Successfully retrieved data for year 1999
Successfully retrieved data for year 2000
Successfully retrieved data for year 2001
Successfully retrieved data for year 2002
Successfully retrieved data for ye

# **Merge ARDECO datasets**

In [7]:
import pandas as pd 
population = pd.read_csv('ardeco_population.csv')
population = population[['TERRITORY_ID', 'YEAR', 'VALUE']]

# change column names
population.columns = ['nuts_id', 'year', 'population']
population.head()

Unnamed: 0,nuts_id,year,population
0,AL01,1980,874412.0
1,AL02,1980,677338.0
2,AL03,1980,1120249.0
3,AT11,1980,269984.0
4,AT12,1980,1427049.0


In [8]:
gdpcapita = pd.read_csv('ardeco_gdpcapita.csv')

# keep only the columns where 'UNIT' = 'EUR' 
gdpcapita = gdpcapita[gdpcapita['UNIT'] == 'EUR']
gdpcapita = gdpcapita[['TERRITORY_ID', 'YEAR', 'VALUE']]

# change column names
gdpcapita.columns = ['nuts_id', 'year', 'gdpcapita']
gdpcapita.head()

Unnamed: 0,nuts_id,year,gdpcapita
0,AT11,1980,4665.0
1,AT12,1980,5876.0
2,AT13,1980,11057.0
3,AT21,1980,6729.0
4,AT22,1980,6526.0


In [9]:
employment = pd.read_csv('ardeco_employmentcapita.csv')
employment = employment[['TERRITORY_ID', 'YEAR', 'VALUE']]

# change column names
employment.columns = ['nuts_id', 'year', 'employment']
employment.head()

Unnamed: 0,nuts_id,year,employment
0,AT11,1980,0.316
1,AT12,1980,0.408
2,AT13,1980,0.524
3,AT21,1980,0.421
4,AT22,1980,0.439


In [10]:
# Merge the three datasets on 'nuts_id' and 'year'
merged_data = pd.merge(population, gdpcapita, on=['nuts_id', 'year'])
merged_data = pd.merge(merged_data, employment, on=['nuts_id', 'year'])
merged_data.head()

Unnamed: 0,nuts_id,year,population,gdpcapita,employment
0,AT11,1980,269984.0,4665.0,0.316
1,AT12,1980,1427049.0,5876.0,0.408
2,AT13,1980,1538886.0,11057.0,0.524
3,AT21,1980,535335.0,6729.0,0.421
4,AT22,1980,1187133.0,6526.0,0.439


In [11]:
merged_data.to_csv('ardeco_merged_data.csv', index=False)

In [12]:
import pandas as pd
ardeco_data = pd.read_csv('ardeco_merged_data.csv')
ardeco_data.head()

Unnamed: 0,nuts_id,year,population,gdpcapita,employment
0,AT11,1980,269984.0,4665.0,0.316
1,AT12,1980,1427049.0,5876.0,0.408
2,AT13,1980,1538886.0,11057.0,0.524
3,AT21,1980,535335.0,6729.0,0.421
4,AT22,1980,1187133.0,6526.0,0.439


In [13]:
# slice the data to only include data from 2010 to 2021
ardeco_data = ardeco_data[(ardeco_data['year'] >= 1980) & (ardeco_data['year'] <= 2021)]
ardeco_data['year'].unique()

array([1980, 1981, 1982, 1983, 1984, 1985, 1986, 1987, 1988, 1989, 1990,
       1991, 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001,
       2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011, 2012,
       2013, 2014, 2015, 2016, 2017, 2018, 2019, 2020, 2021], dtype=int64)

In [14]:
# create 'period' column 
def assign_period(pubyear):
    if 1981 <= pubyear <= 1985:
        return 1
    elif 1986 <= pubyear <= 1990:
        return 2 
    elif 1991 <= pubyear <= 1995:
        return 3 
    elif 1996 <= pubyear <= 2000:
        return 4
    elif 2001 <= pubyear <= 2005:
        return 5
    elif 2006 <= pubyear <= 2010:
        return 6
    elif 2011 <= pubyear <= 2015:
        return 7
    elif 2016 <= pubyear <= 2020:
        return 8
    else:
        return 0 

ardeco_data['period'] = ardeco_data['year'].apply(assign_period)
ardeco_data.head()

Unnamed: 0,nuts_id,year,population,gdpcapita,employment,period
0,AT11,1980,269984.0,4665.0,0.316,0
1,AT12,1980,1427049.0,5876.0,0.408,0
2,AT13,1980,1538886.0,11057.0,0.524,0
3,AT21,1980,535335.0,6729.0,0.421,0
4,AT22,1980,1187133.0,6526.0,0.439,0


In [15]:
# group by 'nuts_id' and 'period' and calculate the mean of each variable
ardeco_data = ardeco_data.groupby(['nuts_id', 'period']).mean().reset_index()
ardeco_data.head(10)

Unnamed: 0,nuts_id,period,year,population,gdpcapita,employment
0,AL01,0,2021.0,790960.0,4300.0,0.441
1,AL01,3,1995.0,1009677.0,510.0,0.327
2,AL01,4,1998.0,979699.0,656.6,0.319
3,AL01,5,2003.0,939710.0,1319.2,0.2768
4,AL01,6,2008.0,891280.0,2236.2,0.344
5,AL01,7,2013.0,849054.0,2720.0,0.3778
6,AL01,8,2018.0,815928.0,3520.0,0.423
7,AL02,0,2021.0,1178530.0,6500.0,0.441
8,AL02,3,1995.0,902079.0,1155.0,0.327
9,AL02,4,1998.0,930865.4,1390.2,0.319


In [16]:
ardeco_data.rename(columns={'nuts_id': 'eu_nuts_id'}, inplace=True)
ardeco_data.head()

Unnamed: 0,eu_nuts_id,period,year,population,gdpcapita,employment
0,AL01,0,2021.0,790960.0,4300.0,0.441
1,AL01,3,1995.0,1009677.0,510.0,0.327
2,AL01,4,1998.0,979699.0,656.6,0.319
3,AL01,5,2003.0,939710.0,1319.2,0.2768
4,AL01,6,2008.0,891280.0,2236.2,0.344


In [17]:
# check null
ardeco_data.isnull().sum()

eu_nuts_id     0
period         0
year           0
population     0
gdpcapita     47
employment    47
dtype: int64

In [18]:
# check rows with null values
ardeco_data[ardeco_data.isnull().any(axis=1)]

Unnamed: 0,eu_nuts_id,period,year,population,gdpcapita,employment
102,ATZZ,0,2021.0,0.0,,
103,ATZZ,4,2000.0,0.0,,
104,ATZZ,5,2003.0,0.0,,
105,ATZZ,6,2008.0,0.0,,
106,ATZZ,7,2013.0,0.0,,
107,ATZZ,8,2018.0,0.0,,
746,DKZZ,0,2021.0,0.0,,
747,DKZZ,4,2000.0,0.0,,
748,DKZZ,5,2003.0,0.0,,
749,DKZZ,6,2008.0,0.0,,


In [19]:
ardeco_data.to_csv('ardeco_merged_data.csv', index=False)