In [None]:
# This installs the packages so Jupyter Notebook can execute the code. 
# Version info: python 3.9.7
# If you run this in a mybinder.org environment, you do not need to execute this block of code. 
import sys
!conda install --yes --prefix {sys.prefix} requests 
!conda install --yes --prefix {sys.prefix} jsonschema 
!conda install --yes --prefix {sys.prefix} pandas 
!conda install --yes --prefix {sys.prefix} numpy 
!conda install --yes --prefix {sys.prefix} matplotlib

In [None]:
# The package pyDataverse handles everything you need to connect and download files from a Dataverse installation.
# Check out the documentation: 
# https://pydataverse.readthedocs.io/en/latest/user/basic-usage.html#download-and-save-a-dataset-to-disk

from pyDataverse.api import NativeApi, DataAccessApi
import io
import pandas as pd

In [None]:
# Step 1: Define and connect to repository API
repository_url = 'https://data.aussda.at/' 
native_api = NativeApi(repository_url)

In [None]:
# Step 2: Define and download dataset
DOI = "doi:10.11587/EHJHFJ"
dataset = native_api.get_dataset(DOI)

In [None]:
# Create a list of files that are available in the dataset
files_list = dataset.json()['data']['latestVersion']['files']

# Find the first tab-file
ident = []

for file in files_list:
    filename = file["dataFile"]["filename"]
    file_id = file["dataFile"]["id"]
    if filename.endswith('.tab'):
        ident.append(file_id)

# Save the ID of the first tab file that occurs in the list
datafile_id = ident[0] 

# Step 3: Connect to API for data access at the repository
data_access_api = DataAccessApi(repository_url)

# Step 4: Download data file using its id
response = data_access_api.get_datafile(datafile_id)

In [None]:
## Working with the datafile Pt. 1

# Transform the response into a Pandas data frame
data = io.StringIO(str(response.content,'utf-8'))

# The file is a tab-delimited file in the repository, i.e. the seperator between columns (variables) is a tab.
# The first line (the header) consists of the variable names.
data = pd.read_csv(data, sep="\t", index_col=0)

In [None]:
## Working with the datafile Pt. 2

# Show sample data by displaying the first rows
data.head()

In [None]:
# If you want to use the survey data of the Corona Panel Project (Kittel et al, 2020) change the DOI in the code above to "10.11587/P5YJ0O" in Step 2. Look at the codebook or the questionnaire to determine which variable contains information on which question.

In [None]:
# Manual Process: Specify the id of the file you want to download, and proceed with Step 1 and Step 2
# Check the id numbers for each file.
for file in files_list:
    filename = file["dataFile"]["filename"]
    file_id = file["dataFile"]["id"]
    print("File name {} has id {}".format(filename, file_id))

# Specify which file you want to retrieve.
datafile_id = 188 
# Step 1: Connect to API for data access at the repository
data_access_api = DataAccessApi(repository_url)
# Step 2: Retrieve data file
response = data_access_api.get_datafile(datafile_id)