## Required Libraries

In [1]:
import numpy as np
import pandas as pd
import re

## Helper Functions

In [2]:
def load_eurostat_tsv(path):
    """
    Load and clean a Eurostat-formatted TSV dataset.

    This function handles the specific structure used by Eurostat in `.tsv` files,
    where multiple metadata fields (e.g. freq, unit, geo) are embedded inside the
    first column separated by commas, and year values are spread across multiple
    wide-format columns.

    The function performs the following steps:
    - Reads the TSV file using tab separation.
    - Automatically detects and splits the composite metadata column into
      individual fields (e.g. 'freq', 'unit', 'c_resid', 'nace_r2', 'geo').
    - Renames Eurostat's encoded "geo\\TIME_PERIOD" field to a clean "geo".
    - Identifies year columns (four-digit names) and cleans their values:
        * Converts ":" into NaN.
        * Removes Eurostat suffixes ("e", "b", "p") indicating estimated,
          provisional, or series breaks.
        * Converts cleaned values to numeric.
    - Reorders columns so metadata fields appear first (geo, freq, c_resid,
      unit, nace_r2), followed by year columns.
    - Returns a tidy, analysis-ready DataFrame in wide format.

    Parameters
    ----------
    path : str
        Path to the Eurostat TSV file.

    Returns
    -------
    pandas.DataFrame
        A cleaned DataFrame where:
        - Metadata fields are split into separate columns.
        - The 'geo' column is properly extracted and placed first.
        - Year columns contain numeric values or NaN.
        - Non-numeric Eurostat artifacts and flags are removed.

    Notes
    -----
    - This function works with ANY Eurostat `.tsv` dataset based on the standard
      multi-dimension TSV export format (tourism, GDP, population density, etc.).
    - The output is kept in wide format for flexibility; use a melt/reshape
      helper for long-format needs.
    - Eurostat datasets often vary in the number of metadata fields included in
      the first column; this function dynamically adapts to those differences.
    """

    df = pd.read_csv(path, sep="\t")

    # 1. Detect and split the composite first column
    first_col = df.columns[0]
    meta_keys = first_col.split(",")

    df[meta_keys] = df[first_col].str.split(",", expand=True)
    df.drop(columns=[first_col], inplace=True)

    # 2. Rename geo\TIME_PERIOD ‚Üí geo
    for col in df.columns:
        if col.startswith("geo"):
            df.rename(columns={col: "geo"}, inplace=True)
            break

    # 3. Clean year columns
    year_cols = [c for c in df.columns if re.match(r"^\d{4}$", str(c))]

    for col in year_cols:
        df[col] = (
            df[col]
            .astype(str)
            .str.replace(":", np.nan)
            .str.replace(" e", "", regex=False)
            .str.replace(" b", "", regex=False)
            .str.replace(" p", "", regex=False)
        )

        df[col] = pd.to_numeric(df[col], errors="coerce")

    df.rename(columns={c: int(c) for c in year_cols}, inplace=True)

    # 4. Reorder: geo first
    meta_present = [c for c in ["geo", "freq", "c_resid", "unit", "nace_r2"] if c in df.columns]
    other_cols = [c for c in df.columns if c not in meta_present]

    df = df[meta_present + other_cols]

    return df

In [3]:
def eurostat_to_long(df, value_name):
    """
    Convert a cleaned Eurostat wide-format dataframe into long format.

    Parameters
    ----------
    df : pandas.DataFrame
        Output of load_eurostat_tsv()
    value_name : str
        Name of the metric column (e.g. 'nights_spent', 'gdp', 'pop_density')

    Returns
    -------
    pandas.DataFrame
        Long-format dataframe with columns: geo, year, <value_name>
    """

    # Identify year columns
    year_cols = [c for c in df.columns if isinstance(c, int)]

    # Melt
    long_df = df.melt(
        id_vars=["geo"],
        value_vars=year_cols,
        var_name="year",
        value_name=value_name
    )

    # Clean year column
    long_df["year"] = long_df["year"].astype(int)

    return long_df


In [4]:
def clean_year_columns(df):
    """
    Standardize Eurostat year column names.

    Cleans column names by removing spaces and Eurostat flags ('e', 'b', 'p'),
    and converts any 4-digit year strings (e.g. '2012', '2012 e') into integer
    column names. This ensures year columns are correctly detected when
    converting to long format.

    Parameters
    ----------
    df : pandas.DataFrame
        DataFrame returned by load_eurostat_tsv().

    Returns
    -------
    pandas.DataFrame
        Same DataFrame with cleaned and integer-typed year columns.
    """
    fixed_cols = {}
    for col in df.columns:
        col_str = str(col).strip()              # remove spaces
        col_str = col_str.replace(" e", "")      # remove flags
        col_str = col_str.replace(" b", "")
        col_str = col_str.replace(" p", "")
        if col_str.isdigit() and len(col_str) == 4:
            fixed_cols[col] = int(col_str)
    df.rename(columns=fixed_cols, inplace=True)
    return df


In [5]:
def clean_nights(x):
    """
    Clean Eurostat tourism values by removing special flags and converting to numeric.

    Eurostat encodes missing or flagged values in strings such as:
    - ":"  ‚Üí missing value
    - "e"  ‚Üí estimated
    - "p"  ‚Üí provisional
    - "b"  ‚Üí break in series
    - "804181 e" ‚Üí numeric value with an estimation flag

    This function:
    1. Removes spaces from strings.
    2. Converts ":" to None.
    3. Strips Eurostat flags ("e", "p", "b") from numeric strings.
    4. Attempts to convert cleaned values to float.
    5. Returns None if conversion is not possible.

    Parameters
    ----------
    x : str or number
        Raw Eurostat value from the tourism dataset.

    Returns
    -------
    float or None
        Clean numeric value, or None if the entry is missing or invalid.
    """
    if isinstance(x, str):
        x = x.replace(" ", "")
        if x == ":":
            return None
        x = x.replace("e", "").replace("p", "").replace("b", "")  # remove flags
    try:
        return float(x)
    except:
        return None


## Loading the Tourism Dataset (Target Variable)

### Nights spent at tourist accommodation establishments dataset.


---

### üìò **Eurostat Variable Legend**

### **freq ‚Äî Frequency**

* **A** ‚Üí Annual
* **Q** ‚Üí Quarterly
* **M** ‚Üí Monthly

---

### **c_resid ‚Äî Tourist Residency**

* **DOM** ‚Üí Domestic tourists
* **FOR** ‚Üí Foreign tourists
* **TOTAL** ‚Üí All tourists combined

---

### **unit ‚Äî Measurement Unit**

* **NR** ‚Üí Number of nights
* **PC** ‚Üí Percentage
* **PCH_PRE** ‚Üí Percentage change compared to previous period
* **P_THAB** ‚Üí Number of nights spent per thousand inhabitants
* **NRM** ‚Üí Number in millions

---

### **nace_r2 ‚Äî Accommodation Type**

* **I551** ‚Üí Hotels
* **I552** ‚Üí Holiday / short-stay accommodation
* **I553** ‚Üí Camping grounds
* **I551_I552** ‚Üí Hotels + holiday rentals
* **I552_I553** ‚Üí Holiday rentals + camping
* **I551_I553** ‚Üí All accommodation types
* **TOTAL** ‚Üí Entire accommodation sector

---

### **geo ‚Äî Geographic Identifier**

* **Country codes** ‚Üí `AT`, `BE`, `FR`, `ES`, ‚Ä¶
* **NUTS2 / NUTS3 regional codes** ‚Üí `AT11`, `BE21`, `TR22`, `SI03`, `SK04`, ‚Ä¶

---

### **TIME_PERIOD** (Year)

* Years from **1990‚Äì2024**
* Value represents:

  * Number of nights spent
  * Or (% change) depending on `unit`

---

### **Special Symbols**

* **`:`** ‚Üí Missing value
* **`e`** ‚Üí Estimated
* **`p`** ‚Üí Provisional
* **`b`** ‚Üí Break in time series

---

In [6]:
tourism_df = load_eurostat_tsv("raw_data/estat_tour_occ_nin2.tsv")
tourism_df

Unnamed: 0,geo,freq,c_resid,unit,nace_r2,1990,1991,1992,1993,1994,...,2015,2016,2017,2018,2019,2020,2021,2022,2023,2024
0,AL,A,DOM,NR,I551,:,:,:,:,:,...,:,:,:,804181 e,1015037 e,729659 e,1076608 e,1237862 e,1943039 e,2039675
1,AL0,A,DOM,NR,I551,:,:,:,:,:,...,:,:,:,804181 e,1015037 e,729659 e,1076608 e,1237862 e,1943039 e,2039675
2,AL01,A,DOM,NR,I551,:,:,:,:,:,...,:,:,:,199081 e,259910 e,148755 e,206227 e,261012 e,467356 e,501278
3,AL02,A,DOM,NR,I551,:,:,:,:,:,...,:,:,:,269005 e,329702 e,164554 e,245447 e,372722 e,514057 e,598752
4,AL03,A,DOM,NR,I551,:,:,:,:,:,...,:,:,:,336095 e,425425 e,416350 e,624934 e,604128 e,961626 e,939645
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
32259,UKM3,A,TOTAL,P_THAB,I551-I553,:,:,:,:,:,...,:,5324.41,:,:,:,:,:,:,:,:
32260,UKM5,A,TOTAL,P_THAB,I551-I553,:,:,:,:,:,...,8546.93,8436.02,:,:,:,:,:,:,:,:
32261,UKM6,A,TOTAL,P_THAB,I551-I553,:,:,:,:,:,...,10986.14,19337.05,:,:,:,:,:,:,:,:
32262,UKN,A,TOTAL,P_THAB,I551-I553,1443.35 e,:,:,:,1880,...,7341.56,7397.07,:,:,:,:,:,:,:,:


## Loading the GDP Dataset (Feature Variable)

---

## üìò **GDP Dataset Legend (Eurostat ‚Äî nama_10r_2gdp)**

### **freq ‚Äî Frequency**

* **A** ‚Üí Annual

---

### **unit ‚Äî Measurement Unit**

* **MIO_EUR** ‚Üí Millions of euros
* **EUR_HAB** ‚Üí Euros per inhabitant
* **EUR** ‚Üí Euros

---

### **nace_r2 ‚Äî Indicator Code**

* **B1GQ** ‚Üí GDP at market prices

---

### **geo ‚Äî Geographic Identifier**

* **NUTS2 regional codes** (e.g., `ES51`, `DE21`, `ITC4`)

These represent the second administrative level in the EU statistical system.

---

### **TIME_PERIOD** (Year)

* Years available: **2000‚Äì2023**
* Value represents GDP in the specified measurement unit from `unit`.

---

### **Special Symbols**

* **`:`** ‚Üí Missing value
* **`e`** ‚Üí Estimated
* **`p`** ‚Üí Provisional

---


In [7]:
gdp_df = load_eurostat_tsv("raw_data/estat_nama_10r_2gdp.tsv")
gdp_df

Unnamed: 0,geo,freq,unit,2000,2001,2002,2003,2004,2005,2006,...,2014,2015,2016,2017,2018,2019,2020,2021,2022,2023
0,AL,A,EUR_HAB,:,:,:,:,:,:,:,...,3500,3600,3700,4000,4500,4800,4700,5400 p,:,:
1,AL0,A,EUR_HAB,:,:,:,:,:,:,:,...,3500,3600,3700,4000,4500,4800,4700,5400 p,:,:
2,AL01,A,EUR_HAB,:,:,:,:,:,:,:,...,2700,2900,3100,3300,3600,3900,3700,4300 p,:,:
3,AL02,A,EUR_HAB,:,:,:,:,:,:,:,...,4100,4300,4400,4900,5400,5800,5600,6500 p,:,:
4,AL03,A,EUR_HAB,:,:,:,:,:,:,:,...,3400,3300,3400,3600,4000,4400,4300,4900 p,:,:
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3160,TRB2,A,PPS_HAB_EU27_2020,:,:,:,:,15,16,17,...,25,26,26,26,25,24,25,22,25,28
3161,TRC,A,PPS_HAB_EU27_2020,:,:,:,:,21,21,22,...,33,34,33,33,31,29,32,31,36,38
3162,TRC1,A,PPS_HAB_EU27_2020,:,:,:,:,25,26,27,...,41,44,42,42,40,38,43,44,50,50
3163,TRC2,A,PPS_HAB_EU27_2020,:,:,:,:,20,20,21,...,27,28,27,27,24,23,24,22,26,29


## Population Density Dataset (feature variable)

---

## üìò **Population Density Dataset Legend (Eurostat ‚Äî tgs00024)**

### **freq ‚Äî Frequency**

* **A** ‚Üí Annual data

---

### **unit ‚Äî Measurement Unit**

* **PER_KM2** ‚Üí Persons per square kilometer

---

### **geo ‚Äî Geographic Identifier**

* **NUTS-2 regional code**, e.g.:

  * `AT12` (Austria)
  * `BE21` (Belgium)
  * `BE33` (Belgium)
  * `AL01` (Albania)

These codes represent the second administrative level in the EU‚Äôs NUTS classification.

---

### **TIME_PERIOD** (Year)

* Years available: **2012‚Äì2023**
* Value represents **population density (inhabitants per km¬≤)**.

---

### **Special Symbols**

* **`:`** ‚Üí Missing value
* **`e`** ‚Üí Estimated value
* **`b`** ‚Üí Break in time series (e.g., boundary or classification change)

---


In [8]:
pop_df = load_eurostat_tsv("raw_data/estat_tgs00024.tsv")
pop_df

Unnamed: 0,geo,freq,unit,2012,2013,2014,2015,2016,2017,2018,2019,2020,2021,2022,2023
0,AL01,A,PER_KM2,79.2,78.5,77.7,77.0,76.6,76.1,75.5,74.8,77.0 e,76.3 e,74.0 e,:
1,AL02,A,PER_KM2,219.6,222.3,225.1,227.0,229.2,232.4,234.8,236.2,237.8 e,238.6 e,238.8 e,:
2,AL03,A,PER_KM2,73.3,72.4,71.6,70.7,69.9,68.9,67.9,67.1,68.8 e,67.9 e,64.9 e,:
3,AT11,A,PER_KM2,78.0,78.2,78.5,79.0,79.4 b,79.7,77.5,77.8,77.8,78.1,79.2,79.7
4,AT12,A,PER_KM2,85.5,85.7,86.2,87.0,87.7 b,88.2,88.5,88.9,88.9,89.3,90.4,91.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
333,UKM6,A,PER_KM2,11.5,11.6,11.6,11.6,11.6 b,11.6,11.7,:,:,:,:,:
334,UKM7,A,PER_KM2,145.2,145.9,146.8,147.8,149.0 b,150.0,150.8,:,:,:,:,:
335,UKM8,A,PER_KM2,778.5,779.7,782.1,786.4,793.1 b,797.2,800.0,:,:,:,:,:
336,UKM9,A,PER_KM2,59.6,59.6,59.5,59.6,59.7 b,59.7,59.7,:,:,:,:,:


## Cleaning year columns

In [9]:
tourism_df = clean_year_columns(tourism_df)
gdp_df = clean_year_columns(gdp_df)
pop_df = clean_year_columns(pop_df)


## Converting Dataframes to Long Format

In [10]:
tourism_long = eurostat_to_long(tourism_df, value_name="nights_spent")
gdp_long = eurostat_to_long(gdp_df, value_name="gdp")
pop_long = eurostat_to_long(pop_df, value_name="pop_density")


### üîç Filtering the Tourism Dataset (Selecting the Correct Indicator)

The tourism dataset contains many variables (percentages, km¬≤-normalized values, foreign-only values, etc.).  
Here, we extract **only the data we need** for the target variable:

- `unit == "NR"` ‚Üí we keep *absolute number of nights spent*  
- `c_resid == "TOTAL"` ‚Üí includes both domestic and foreign tourists  
- `nace_r2` containing `"I551"`, `"I552"`, `"I553"` ‚Üí selects valid accommodation types  
  (hotels, holiday/short-stay, camping, or combined categories)

This ensures we keep only rows representing **real nights spent in tourist accommodations**,  
not ratios, percentages, or incomplete subsets.


In [11]:
tourism_df_filtered = tourism_df[
    (tourism_df["unit"] == "NR") &              # Nights spent
    (tourism_df["c_resid"] == "TOTAL") &        # Total residents (domestic + foreign)
    (
        tourism_df["nace_r2"].str.contains("I551") |
        tourism_df["nace_r2"].str.contains("I552") |
        tourism_df["nace_r2"].str.contains("I553")
    )
].copy()


In [12]:
# Remove rows where all years are missing
year_cols = [c for c in tourism_df_filtered.columns if isinstance(c, int)]
tourism_df_filtered = tourism_df_filtered.dropna(subset=year_cols, how="all")


In [13]:
# Converting the dataset into Long format
tourism_long = eurostat_to_long(tourism_df_filtered, "nights_spent")


In [14]:
# Removing missing target values
tourism_long = tourism_long.dropna(subset=["nights_spent"])

In [29]:
tourism_long.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 144165 entries, 0 to 144164
Data columns (total 3 columns):
 #   Column        Non-Null Count   Dtype  
---  ------        --------------   -----  
 0   geo           144165 non-null  object 
 1   year          144165 non-null  int64  
 2   nights_spent  66274 non-null   float64
dtypes: float64(1), int64(1), object(1)
memory usage: 3.3+ MB


In [15]:
print("Remaining units:", tourism_df_filtered["unit"].unique())
print("Remaining resid:", tourism_df_filtered["c_resid"].unique())
print("Remaining accom:", tourism_df_filtered["nace_r2"].unique())
print("Rows:", len(tourism_df_filtered))


Remaining units: ['NR']
Remaining resid: ['TOTAL']
Remaining accom: ['I551' 'I551-I553' 'I552' 'I552_I553' 'I553']
Rows: 4119


### üí∂ Filtering the GDP Dataset

The GDP dataset contains multiple units and indicators.  
Here we extract only the variable we want to use as a feature:

- `unit == "EUR_HAB"` ‚Üí GDP per inhabitant (comparable across regions)

Steps:
1. Clean Eurostat‚Äôs year-column names (`clean_year_columns`).
2. Keep only rows corresponding to **GDP per inhabitant**.
3. Remove regions where *all* year values are missing.
4. Convert the dataset from **wide format** (one column per year)  
   to **long format** (`geo`, `year`, `gdp`).
5. Remove rows where GDP is still missing.

In [17]:
gdp_df = clean_year_columns(gdp_df)

In [32]:
gdp_df_filtered = gdp_df[gdp_df["unit"] == "EUR_HAB"].copy()

year_cols = [c for c in gdp_df_filtered.columns if isinstance(c, int)]
gdp_df_filtered = gdp_df_filtered.dropna(subset=year_cols, how="all")

gdp_long = eurostat_to_long(gdp_df_filtered, "gdp")
gdp_long = gdp_long.dropna(subset=["gdp"])

gdp_long


Unnamed: 0,geo,year,gdp
0,AL,2000,:
1,AL0,2000,:
2,AL01,2000,:
3,AL02,2000,:
4,AL03,2000,:
...,...,...,...
10507,TRB2,2023,4800
10508,TRC,2023,6300
10509,TRC1,2023,8300
10510,TRC2,2023,4900


### üèôÔ∏è Filtering the Population Density Dataset

This dataset reports population density (inhabitants per km¬≤) for each NUTS-2 region.

We apply the following steps:

- Keep only `unit == "PER_KM2"` ‚Üí actual population density values.
- Remove rows where all year columns are empty (regions with no data).
- Convert from wide format to long format (`geo`, `year`, `pop_density`).
- Drop rows with missing population density.


In [34]:
pop_df = clean_year_columns(pop_df)

pop_df_filtered = pop_df[pop_df["unit"] == "PER_KM2"].copy()

year_cols = [c for c in pop_df_filtered.columns if isinstance(c, int)]
pop_df_filtered = pop_df_filtered.dropna(subset=year_cols, how="all")

pop_long = eurostat_to_long(pop_df_filtered, "pop_density")
pop_long = pop_long.dropna(subset=["pop_density"])

pop_long


Unnamed: 0,geo,year,pop_density
0,AL01,2012,79.2
1,AL02,2012,219.6
2,AL03,2012,73.3
3,AT11,2012,78.0
4,AT12,2012,85.5
...,...,...,...
4051,UKM6,2023,:
4052,UKM7,2023,:
4053,UKM8,2023,:
4054,UKM9,2023,:


### üîó Merging Tourism, GDP, and Population Density Data

We now combine the three cleaned long-format datasets:

- `tourism_long` ‚Üí target variable: **nights spent**
- `gdp_long` ‚Üí feature: **GDP per inhabitant**
- `pop_long` ‚Üí feature: **population density**

We merge them on the common keys:
- `geo` ‚Üí NUTS-2 regional code
- `year` ‚Üí reference year

We use *inner joins* to keep only the region-year pairs  
where **all datasets overlap**.  
This ensures the final dataset contains:

| geo | year | nights_spent | gdp | pop_density |

The resulting table is a consistent multi-feature dataset  
suitable for exploratory analysis and machine learning.

In [20]:
df = tourism_long.merge(gdp_long, on=["geo", "year"], how="inner")
df = df.merge(pop_long, on=["geo", "year"], how="inner")

df

Unnamed: 0,geo,year,nights_spent,gdp,pop_density
0,AL01,2012,:,2700,79.2
1,AL02,2012,:,3900,219.6
2,AL03,2012,:,3100,73.3
3,AT11,2012,2009106,25400,78.0
4,AT12,2012,4518191,30800,85.5
...,...,...,...,...,...
17095,TRB1,2023,1108,7200,48.2
17096,TRB2,2023,:,4800,53.6
17097,TRC1,2023,:,8300,193.2
17098,TRC2,2023,:,4900,117.9


### üìÖ Restricting the Tourism Dataset to Years With Valid Data

Eurostat reports nights-spent data inconsistently across earlier decades.  
To avoid carrying large portions of missing data, we determine the **earliest year**  
that contains at least one valid `nights_spent` entry.

We then filter the dataset to keep only years **>= this minimum valid year**.

This step reduces noise and ensures all remaining rows correspond to years  
where the variable actually exists.

In [22]:
min_valid_year = tourism_long[tourism_long["nights_spent"].notna()]["year"].min()
tourism_long = tourism_long[tourism_long["year"] >= min_valid_year]

tourism_long

Unnamed: 0,geo,year,nights_spent
0,AL,1990,:
1,AL0,1990,:
2,AL01,1990,:
3,AL02,1990,:
4,AL03,1990,:
...,...,...,...
144160,UKM3,2024,:
144161,UKM5,2024,:
144162,UKM6,2024,:
144163,UKN,2024,:


### üßº Cleaning `nights_spent` (Removing Eurostat Flags and Missing Symbols)

We use helper function `clean_nights()` to clean our target variable `nights_spent`


In [23]:
tourism_long["nights_spent"] = tourism_long["nights_spent"].apply(clean_nights)

### üîó Final Merge: Tourism √ó GDP √ó Population Density

We now merge the three cleaned long-format datasets:

- `tourism_long` ‚Üí Target variable `nights_spent`
- `gdp_long` ‚Üí GDP per inhabitant
- `pop_long` ‚Üí Population density

We merge on:
- `geo` (NUTS-2 region code)
- `year`

We use **inner joins** to ensure we keep only region-year pairs  
that exist in *all* datasets.

The resulting table contains:

| geo | year | nights_spent | gdp | pop_density |

In [24]:
df = tourism_long.merge(gdp_long, on=["geo", "year"], how="inner")
df = df.merge(pop_long, on=["geo", "year"], how="inner")

df

Unnamed: 0,geo,year,nights_spent,gdp,pop_density
0,AL01,2012,,2700,79.2
1,AL02,2012,,3900,219.6
2,AL03,2012,,3100,73.3
3,AT11,2012,2009106.0,25400,78.0
4,AT12,2012,4518191.0,30800,85.5
...,...,...,...,...,...
17095,TRB1,2023,1108.0,7200,48.2
17096,TRB2,2023,,4800,53.6
17097,TRC1,2023,,8300,193.2
17098,TRC2,2023,,4900,117.9


In [35]:
# Removing Rows Without Tourism Data (Missing Target Variable)
df_clean = df.dropna(subset=["nights_spent"])

# Move target variable to the last column
cols = [c for c in df_clean.columns if c != "nights_spent"] + ["nights_spent"]
df_clean = df_clean[cols]

df_clean

Unnamed: 0,geo,year,gdp,pop_density,nights_spent
3,AT11,2012,25400,78.0,2009106.0
4,AT12,2012,30800,85.5,4518191.0
5,AT13,2012,46700,4377.6,11208975.0
6,AT21,2012,32100,59.4,6919782.0
7,AT22,2012,34200,74.4,7238491.0
...,...,...,...,...,...
17084,TR61,2023,12600,96.0,25495.0
17085,TR62,2023,10100,145.1,3582.0
17086,TR63,2023,7800,143.7,1007.0
17087,TR71,2023,8900,52.2,348.0
