In [1]:
import pandas as pd

In [7]:
# Load Eurostat TSV
df = pd.read_csv(
    "estat_isoc_r_eb_ain2.tsv",
    sep="\t",header=0
)

In [11]:
print(df.head())
print(df.columns)

  freq,nace_r2,size_emp,indic_is,unit,geo\TIME_PERIOD   2023  2024    2025 
0                         A,C,GE10,E_AI_CC,PC_ENT,AT    9.95     :   27.04 
1                        A,C,GE10,E_AI_CC,PC_ENT,AT1    9.35     :       : 
2                       A,C,GE10,E_AI_CC,PC_ENT,AT11    5.81     :       : 
3                       A,C,GE10,E_AI_CC,PC_ENT,AT12    6.38     :       : 
4                       A,C,GE10,E_AI_CC,PC_ENT,AT13   17.93     :       : 
Index(['freq,nace_r2,size_emp,indic_is,unit,geo\TIME_PERIOD', '2023 ', '2024 ',
       '2025 '],
      dtype='object')


In [13]:
# Split the first column by comma
first_col = df.columns[0]

split_cols = df[first_col].str.split(",", expand=True)

split_cols.columns = [
    "freq",
    "nace_r2",
    "size_emp",
    "indic_is",
    "unit",
    "geo"
]

# Drop original collapsed column
df = df.drop(columns=[first_col])

# Attach split columns
df = pd.concat([split_cols, df], axis=1)

print(df.head())


  freq nace_r2 size_emp indic_is    unit   geo   2023  2024    2025 
0    A       C     GE10  E_AI_CC  PC_ENT    AT   9.95     :   27.04 
1    A       C     GE10  E_AI_CC  PC_ENT   AT1   9.35     :       : 
2    A       C     GE10  E_AI_CC  PC_ENT  AT11   5.81     :       : 
3    A       C     GE10  E_AI_CC  PC_ENT  AT12   6.38     :       : 
4    A       C     GE10  E_AI_CC  PC_ENT  AT13  17.93     :       : 


In [15]:
df_long = df.melt(
    id_vars=["freq", "nace_r2", "size_emp", "indic_is", "unit", "geo"],
    var_name="Year",
    value_name="Adoption_Rate"
)

print(df_long.head())


  freq nace_r2 size_emp indic_is    unit   geo   Year Adoption_Rate
0    A       C     GE10  E_AI_CC  PC_ENT    AT  2023          9.95 
1    A       C     GE10  E_AI_CC  PC_ENT   AT1  2023          9.35 
2    A       C     GE10  E_AI_CC  PC_ENT  AT11  2023          5.81 
3    A       C     GE10  E_AI_CC  PC_ENT  AT12  2023          6.38 
4    A       C     GE10  E_AI_CC  PC_ENT  AT13  2023         17.93 


In [17]:
import pandas as pd

df_long["Adoption_Rate"] = (
    df_long["Adoption_Rate"]
    .replace(":", pd.NA)
)

df_long["Adoption_Rate"] = pd.to_numeric(
    df_long["Adoption_Rate"],
    errors="coerce"
)


In [19]:
df_long["Year"] = df_long["Year"].astype(int)


In [23]:
print(df_long.head(10))

  freq nace_r2 size_emp indic_is    unit   geo  Year  Adoption_Rate
0    A       C     GE10  E_AI_CC  PC_ENT    AT  2023           9.95
1    A       C     GE10  E_AI_CC  PC_ENT   AT1  2023           9.35
2    A       C     GE10  E_AI_CC  PC_ENT  AT11  2023           5.81
3    A       C     GE10  E_AI_CC  PC_ENT  AT12  2023           6.38
4    A       C     GE10  E_AI_CC  PC_ENT  AT13  2023          17.93
5    A       C     GE10  E_AI_CC  PC_ENT   AT2  2023           7.98
6    A       C     GE10  E_AI_CC  PC_ENT  AT21  2023           6.20
7    A       C     GE10  E_AI_CC  PC_ENT  AT22  2023           8.73
8    A       C     GE10  E_AI_CC  PC_ENT   AT3  2023          11.24
9    A       C     GE10  E_AI_CC  PC_ENT  AT31  2023          11.59


In [25]:
df_long.to_csv(
    "eurostat_clean_long.csv",
    index=False
)