In [None]:
import requests
import pandas as pd
import io
import zipfile

In [None]:
# API to Destatis Genesis Portal
BASE_URL = 'https://www-genesis.destatis.de/genesisWS/rest/2020/'
USERNAME = "6d842c23ee654b4dbaf333d1b995fde4"
PASSWORD = ""

headers = {
    'Content-Type': 'application/x-www-form-urlencoded',
    'username': USERNAME,
    'password': PASSWORD
}

In [None]:
# Download the data
responseTable = requests.post(
    BASE_URL + 'data/tablefile',
    headers=headers,
    data={
        'name': '71211-0002',
        'area': 'all',
        'compress': 'false',
        'transpose': 'false',
        'startyear': '2014',
        'endyear': '2026',
        'format': 'ffcsv',
        'job': 'false',
        'stand': '',
        'language': 'de'
    }
)

if responseTable.status_code == 200:
    print("File downloaded successfully")

    # Extract and read the data
    with zipfile.ZipFile(io.BytesIO(responseTable.content)) as z:
        csv_filename = z.namelist()[0]

        with z.open(csv_filename) as csv_file:
            df = pd.read_csv(csv_file,
                            sep=';',
                            encoding='utf-8',
                            low_memory=False)

            print(f"Data shape: {df.shape}")
            print(f"Columns: {df.columns.tolist()}")

else:
    print(f"Error: Status code {responseTable.status_code}")



File downloaded successfully
Data shape: (429, 17)
Columns: ['statistics_code', 'statistics_label', 'time_code', 'time_label', 'time', '1_variable_code', '1_variable_label', '1_variable_attribute_code', '1_variable_attribute_label', '2_variable_code', '2_variable_label', '2_variable_attribute_code', '2_variable_attribute_label', 'value', 'value_unit', 'value_variable_code', 'value_variable_label']


In [None]:
# Filter the dataframe
df_clean = df[['time', '2_variable_attribute_label', 'value', 'value_unit']].copy()

df_federal_tax_rev = df_clean[df_clean['2_variable_attribute_label'] == 'Steuereinnahmen des Bundes'].copy()

# Drop the label column
df_federal_tax_rev = df_federal_tax_rev.drop(columns=['2_variable_attribute_label'])

# Sort by year
df_federal_tax_rev = df_federal_tax_rev.sort_values('time')

# Reset index
df_federal_tax_rev = df_federal_tax_rev.reset_index(drop=True)

print(f"\nFiltered data shape: {df_federal_tax_rev.shape}")
print(df_federal_tax_rev)


Filtered data shape: (11, 3)
    time      value value_unit
0   2014  270746283   Tsd. EUR
1   2015  281607698   Tsd. EUR
2   2016  289017935   Tsd. EUR
3   2017  309361188   Tsd. EUR
4   2018  322358667   Tsd. EUR
5   2019  329052167   Tsd. EUR
6   2020  283114831   Tsd. EUR
7   2021  313667447   Tsd. EUR
8   2022  337209479   Tsd. EUR
9   2023  356041655   Tsd. EUR
10  2024  374948728   Tsd. EUR


In [None]:
# Save to CSV
output_file = 'project_chart4_federal_tax_revenue.csv'
df_federal_tax_rev.to_csv(output_file, index=False, encoding='utf-8-sig')
print(f"\n✓ Saved to: {output_file}")


✓ Saved to: project_chart4_federal_tax_revenue.csv
