# Upload a dataset to Zenodo

Install required modules listed in requirements.txt with pip:
```
$ pip install -r requirements.txt
```

Create a `.env` file in folder with following content:
```
ACCESS_TOKEN=myzenodotoken
```

### Load required module

In [25]:
# Import zenodo_helper module
import sys
sys.path.insert(0, '..')
from zenodo_helper import *
import pandas as pd
from dotenv import load_dotenv
ACCESS_TOKEN = os.getenv('ACCESS_TOKEN')
import hashlib

Load dotenv in IPython Notebook

### Load geoflow entities table with pandas

In [26]:
input = '/home/sylvain/Documents/IRD/DATA4'
file_entities = '20231018-164240_iso19115-metadata.csv'
df = {}
df = pd.read_csv(os.path.join(input, file_entities))
df.head()

Unnamed: 0,Identifier,Title,Description,Subject,Creator,Date,Type,Language,SpatialCoverage,TemporalCoverage,Relation,Rights,Provenance,Format,Data
0,20230524_REU-ermitage_UAV-02_1,"title:Images UAV du projet TELEMAC, Ermitage, ...","abstract:""This dataset is made of 324 images c...","theme[General]:TELEMAC,Réunion,Hermitage,drone...","author:sylvain.poulain@ird.fr,pascal.mouquet@i...",publication:2023-10-18_\nedition:2023-10-18,dataset,fra,,2023-05-24 07-35-31 - 2023-05-24 07-58-33,thumbnail:telemac@https://www.osureunion.fr/wp...,useLimitation:Utilisation libre sous réserve d...,"statement:""- Camera model and parameters:\n Ma...",resource:image/jpg_\ndistribution:application/...,source:SurveyMetadata.gpkg@/home/sylvain/Docum...


### Upload to Zenodo

In [27]:
### Upload to Zenodo
print("#### Upload zip files to Zenodo")
base_url = "https://zenodo.org/api/"
for zipul in range(len(df)):
    # print("Dataset:", zipul, "/" ,len(df))
    print('\nDataset:', zipul+1, "/" ,len(df), "\n    ",df.iloc[zipul]['Identifier'])
    zenodo_baseurl = base_url

    #### Extract source from Data in dataframe
    data_zip = df.iloc[zipul]['Data'].split('source:')[1].split('_\n')[0]
    if "," in data_zip:
        data_ziptemp = data_zip.split(',')
        data_zip = []
        for dt in range(len(data_ziptemp)):
            data_zip.append(data_ziptemp[dt].split('@')[0])
    else:
        data_zip = [data_zip.split('@')[0]]
    # data_zip = source_file
    print(data_zip)

    ### Put Metadata or verify if doi exists (Metadata not updated if doi exists)
    if 'doi:' in df.iloc[zipul]['Identifier']:
        doi_raw = df.iloc[zipul]['Identifier'].split('_\ndoi:')[1].split('\n')[0].split('.')[-1]
        print("DOI already present: ",doi_raw)
        getrecid = zenlist_single(zenodo_baseurl, ACCESS_TOKEN, str(doi_raw))
        zenval = zenvar(getrecid)
    else:
        print("Initialize deposit")
        r = check_token(zenodo_baseurl, ACCESS_TOKEN)
        zenval = zenvar(r)
        print("prereserved doi:"+zenval[1])
        print("Write DOI to dataframe")
        dfzen = df
        if 'id:' in dfzen.iloc[zipul]['Identifier']:
            pass
        else:
            dfzen.iloc[zipul, dfzen.columns.get_loc('Identifier')] = "id:" + dfzen.iloc[zipul]['Identifier'] + "_\ndoi:" + zenval[1]
            dfzen.iloc[zipul, dfzen.columns.get_loc("Provenance")] = dfzen.iloc[zipul]["Provenance"] + "_\nprocess:Raw dataset uploaded to " + base_url.split('api')[0] + "record/" + str(zenval[2])
            
        print("Enrich upload with metadata")
        zen_metadata = zenmdt(zenodo_baseurl, ACCESS_TOKEN, zenval[2], df, zipul)
        if zen_metadata.status_code > 400:
            print("error in metadata, please check there is no double keywords: \n" + zen_metadata.text)
            break
        
    # print(zen_metadata.text)       
    print("upload data")
    print("Trying upload number: 1")

    for file in data_zip:
        ul_count = 1
        ### Control Filename
        # while file not in zenlist_single_files(zenodo_baseurl, ACCESS_TOKEN, str(zenval[2])).text:
        ### Control md5 checksum:
        ful = os.path.join(input, df.iloc[zipul]['Identifier'].split('_\n')[0].split(':')[1], file)
        print(ful)
        with open(ful, "rb") as file_to_check:
            # Open,close, read file and calculate MD5 on its contents 
            # read contents of the file
            ## Python 3.10
            fh = file_to_check.read() 
            # pipe contents of the file through
            digest = hashlib.md5(fh)
            ## End Python 3.10
            # digest = hashlib.file_digest(file_to_check, "md5") ### Python 3.11 only
        print("    md5:", digest.hexdigest())
        while digest.hexdigest() not in zenlist_single_files(zenodo_baseurl, ACCESS_TOKEN, str(zenval[2])).text:
            #Reset connection every 15 tries
            if ul_count == 15:
                getrecid = zenlist_single(zenodo_baseurl, ACCESS_TOKEN, str(doi_raw))
                zenval = zenvar(getrecid)
            
            file_on_server = zenlist_single_files(zenodo_baseurl, ACCESS_TOKEN, str(zenval[2]))
            ### Clean wrong checksum
            if file in file_on_server.text:
                print("    clean wrong files")
                for fs in range(len(file_on_server.json())):
                    if file == file_on_server.json()[fs]['filename']:
                        # print("    sleep 5: computing checksum on server")
                        # time.sleep(15)
                        print("    checksum on server", file_on_server.json()[fs]['checksum'])
                        furl = file_on_server.json()[fs]["links"]["self"]
                        file_to_remove = zen_del_file(furl, ACCESS_TOKEN)
                        
            file_list = [file]            
            zen_upload = zenul(zenval[0], ACCESS_TOKEN, os.path.join(input, df.iloc[zipul]['Identifier'].split('_\n')[0].split(':')[1]), file_list)
            print("        ", zen_upload.text)
            if zen_upload.status_code == 404:
                print("Version doesn't exists ! Please check your record_id")
                break
            else:
                ul_count += 1
                print("    Retry number: " + str(ul_count))
                if zen_upload.status_code == 403:
                    print("        Friday 13th 2023 nightmare => permission denied!")
                    time.sleep(5)      


#### Upload zip files to Zenodo

Dataset: 1 / 1 
     20230524_REU-ermitage_UAV-02_1
['SurveyMetadata.gpkg']
Initialize deposit
Allowed to deposit some files
prereserved doi:10.5281/zenodo.10072349
Write DOI to dataframe
Enrich upload with metadata
upload data
Trying upload number: 1
/home/sylvain/Documents/IRD/DATA4/20230524_REU-ermitage_UAV-02_1/SurveyMetadata.gpkg
    md5: 9d635f5c3e30a2453e8fc9826fb2d202
    Sleep 5 seconds before new upload
upload: SurveyMetadata.gpkg
         {"created": "2023-11-05T06:34:41.313453+00:00", "updated": "2023-11-05T06:34:42.913152+00:00", "version_id": "f25e784c-d9dc-4d65-bae2-c34ba439555c", "key": "SurveyMetadata.gpkg", "size": 10149888, "mimetype": "application/octet-stream", "checksum": "md5:9d635f5c3e30a2453e8fc9826fb2d202", "is_head": true, "delete_marker": false, "links": {"self": "https://zenodo.org/api/files/84ee1d92-4614-41f2-b614-f329c8e94d92/SurveyMetadata.gpkg", "version": "https://zenodo.org/api/files/84ee1d92-4614-41f2-b614-f329c8e94d9

### Display DOI

In [31]:
for i in range(len(df)):
    print(df.iloc[i]['Identifier'].replace('_\n', '\n'))

id:20230524_REU-ermitage_UAV-02_1
doi:10.5281/zenodo.10072349


In [33]:
### Remove ACCESS_TOKEN
ACCESS_TOKEN = ''