# Script for Uploading Files Automatically

If you have doubts about the code, contact rdr-contacte@csuc.cat

## Script Objective
The main objective of this script is to automatically upload files to a dataset with their respective metadata placed in an Excel file.

## Script Observation

Place the script files and the files to be uploaded to the dataset in the same folder on your computer.

### In COLAB
Upload the Excel file to the Upload files icon.

### To generate the Excel and make the script work correctly, you must follow these requirements:

- The first row is the header and must contain the name of the variables in this order:
    - File Name
    - Description
    - File Path
    - Tag
- Each row corresponds to a file.
- The file name (File Name) is the only mandatory metadata.
    - It must be written correctly and include its extension.
- In case any cell does not contain information, it should be left blank.
- In case the metadata contains a number, it should be written within quotes.
- In the tags variable (Tag), if multiple tags are desired, they should be written separated by a comma.


In [None]:
# @title Install or Update Libraries. Click the Run button &#x25B6;


import ipywidgets as widgets
from IPython.display import display, HTML, clear_output

def install_packages(b):
    """
    Function to install or update required packages.

    Args:
    b : ipywidgets.Button : Button widget for installation.

    Returns:
    None
    """
    clear_output(wait=True)
    !pip install --upgrade pip -q
    !pip install pyDataverse -q
    !pip install numpy -q
    !pip install numpy pandas -q
    !pip install openpyxl -q
    print("Libraries downloaded or updated.")

# Displaying installation message
display(HTML("<p style='font-size:14px;'><b>Click the button below to install the required libraries.</b></p>"))

# Creating installation button
install_button = widgets.Button(description='Install Libraries')
install_button.on_click(install_packages)

# Displaying the installation button
display(install_button)


In [None]:
# @title Enter DOI (doi:10.34810/dataXXX), the token and the name of the excel file with the extension .xlsx and the repository URL. Click the execute button cell &#x25B6;
import os
import pandas as pd
from pyDataverse.api import NativeApi, DataAccessApi
from pyDataverse.models import Dataverse, Datafile
from pathlib import Path

# Provide input values
doi = ""  # @param {type:"string"}
token = ""  # @param {type:"string"}
excel_file_name = "" # @param {type:"string"}
base_url = 'https://dataverse.csuc.cat/'  # @param {type:"string"}

# Initialize API
api = NativeApi(base_url, token)
data_api = DataAccessApi(base_url, token)

def upload_files(base_url, token, doi, excel_file_name):
    """
    Function to upload files to a dataset based on metadata provided in an Excel file.

    Args:
    base_url (str): Base URL of the repository.
    token (str): API token for authentication.
    doi (str): DOI of the dataset.
    excel_file_name (str): Name of the Excel file with metadata.

    Returns:
    None
    """
    try:
        # Read metadata from Excel file
        files_metadata = pd.read_excel(excel_file_name).to_numpy().tolist()
        verifier = True
        for i in range(len(files_metadata)):
            file_name = files_metadata[i][0]
            path = Path(file_name)
            if not path.is_file():
                print('File not found: ' + file_name)
                verifier = False
        if verifier:
            try:
                dataset = api.get_dataset(doi)
                for i in range(len(files_metadata)):
                    df = Datafile()
                    df.set({'pid': doi})
                    file_name = files_metadata[i][0]
                    df.set({'filename': file_name})
                    if type(files_metadata[i][1]) != float:
                        file_description = files_metadata[i][1]
                        df.set({'description': file_description})
                    if type(files_metadata[i][2]) != float:
                        file_path = files_metadata[i][2]
                        df.set({'directoryLabel': file_path})
                    if type(files_metadata[i][3]) != float:
                        file_categories = files_metadata[i][3].split(",")
                        df.set({'categories': file_categories})
                    df.get()
                    resp = api.upload_datafile(doi, file_name, df.json())
                    print('File uploaded: ' + file_name)
            except:
                print('Incorrect token or DOI not found: ' + doi)
        else:
            print('No files uploaded. Please modify the file names that are incorrect.')
    except FileNotFoundError:
        print('Metadata file not found: ' + excel_file_name)

# Upload files
upload_files(base_url, token, doi, excel_file_name)