# Template RGB Plot

### Importing modules for the Jupyter Notebook

In [4]:
import json
import numpy as np
import os
import pandas as pd
import re
import shutil
import sys
import textwrap
import urllib.request as urllib
import zipfile

from PIL import Image

## Ensure that you are in the correct directory:

In [5]:
if os.path.isfile("README.md"):
    os.chdir("./{{cookiecutter._project_name}}")

## Top-level Docstring. Change this to reflect your algorithm

#### Imports into the python file. Please add any additional import statements that will be needed for your algorithm below

#### Define the version number of your algorithm. Consider using [Semantic Versioning](https://semver.org/)

#### Provide information on the creator and contributors of this algorithm

#### Name and describe your algorithm

#### Provide citation information for algorithm publication. This includes the citation author, the citation title, and the citation year

#### Include the name(s) of the variable(s) used in the algorithm, separated by commas. Note that variable names cannot have comma's in them: use a different separator instead. Also, all white space is kept intact; don't add any extra whitespace since it may cause name comparisons to fail

#### Include the units and labels of the variables, matching the order of VARIABLE_NAMES, also separated by commas. VARIABLE_LABELS is an optional field and can be left empty.

#### Optional override for the generation of a BETYdb compatible csv file. Set to False to suppress the creation of a compatible file

#### Optional override for the generation of a TERRA REF Geostreams compatible csv file. Set the variable to False to suppress the creation of a compatible file

## Change the url below to get images from a different source. This assumes that the image files are contained within a .zip file. The code below will add the sample_plot_images chosen to the system

In [6]:
url = "https://de.cyverse.org/dl/d/4108BB75-AAA3-48E1-BBD4-E10B06CADF54/sample_plot_images.zip"

if os.path.isdir("sample_plot_images"):
    shutil.rmtree("sample_plot_images")
os.mkdir("sample_plot_images")
if not os.path.isfile("sample_plot_images.zip"):
    urllib.urlretrieve(url, "sample_plot_images.zip")
zipfile.ZipFile("sample_plot_images.zip", 'r').extractall("sample_plot_images")
os.remove("sample_plot_images.zip")

#### Define your calculate() function

## Update algorithm_rgb.py with your changes

In [23]:
def write_algorithm_rgb_file():
    cells = json.load(open("../template-rgb-plot.ipynb"))["cells"]
    with open("algorithm_rgb.py", "w") as outfile:
        for key in cells:
            toWrite = ""
            if key["cell_type"] == "markdown":
                for entry in key["source"]:
                    if entry[0:4] == "####":
                        entry = re.sub('####', '', entry).lstrip()
                        toWrite = toWrite + entry
                        toWrite = format_string(toWrite)
                        outfile.write("\n\n" + str(toWrite) + "\n")
            elif key["cell_type"] == "raw":
                for entry in key["source"]:
                    toWrite = toWrite + entry
                outfile.write(str(toWrite))
        outfile.write("\n")
                
def format_string(toWrite):
    returnStr = ""
    lines = textwrap.wrap(toWrite, width=115, break_long_words=False)
    for line in range(len(lines)):
        if line != len(lines)-1:
            returnStr = returnStr + "# " + lines[line] + "\n"
        else: 
            returnStr = returnStr + "# " + lines[line]
    return returnStr

write_algorithm_rgb_file()

## Test the calculate() function on the sample plot images located in the sample_plot_images folder

In [8]:
import algorithm_rgb
for filename in os.listdir("sample_plot_images"):
    img = Image.open("sample_plot_images/" + filename)
    img_arr = np.array(img)
    print(algorithm_rgb.calculate(img_arr))

35000
35000
35000
35000
35000
35000


## Next Generate your Dockerfile by running the generate.py script

In [9]:
cmd0 = "python generate.py"
os.system(cmd0)

0

## If there are leftover files from previous runs, delete them

In [10]:
filelist = ["result.json", "rgb_plot.csv", "rgb_plot_betydb.csv", "rgb_plot_geo.csv"]
for file in filelist:
    if os.path.isfile(file):
        os.remove(file)

## Now build the dockerfile (Currently this will have a default project name and project version)

In [11]:
cmd = "docker build -t " + algorithm_rgb.ALGORITHM_NAME + ":" + algorithm_rgb.VERSION + " ."
os.system(cmd)

0

## Next run the dockerfile for testing

In [12]:
cmd = 'docker run --rm --mount "src=`pwd`,target=/mnt,type=bind" ' + algorithm_rgb.ALGORITHM_NAME + ":" + algorithm_rgb.VERSION + ' --working_space "/mnt"'
for filename in os.listdir("sample_plot_images"):
    cmd += ' "/mnt/sample_plot_images/' + filename + '"'
os.system(cmd)

0

## Make sure that the correct files are generated and contain appropriate results

In [13]:
filelist = ["result.json", "rgb_plot.csv", "rgb_plot_betydb.csv", "rgb_plot_geo.csv"]
for file in filelist:
    assert os.path.isfile(file)
    if (file == "result.json"):
        result = json.load(open(file))[algorithm_rgb.ALGORITHM_NAME]
        assert result['version'] == algorithm_rgb.VERSION
        assert result['traits'] == algorithm_rgb.VARIABLE_NAMES
        assert result['units'] == algorithm_rgb.VARIABLE_UNITS
        assert result['labels'] == algorithm_rgb.VARIABLE_LABELS
        assert result['files_processed'] == str(len(os.listdir("sample_plot_images")))
        assert result['lines_written'] == str(len(os.listdir("sample_plot_images")))
        if (algorithm_rgb.WRITE_GEOSTREAMS_CSV == True):
            assert result['wrote_geostreams'] == "Yes"
        else:
            assert result['wrote_geostreams'] == "No"
        if (algorithm_rgb.WRITE_BETYDB_CSV == True):
            assert result['wrote_betydb'] == "Yes"
        else: 
            assert result['wrote_betydb'] == "No"

## View the output files. They will be displayed in the following order: 
## 1.) result.json
## 2.) rgb_plot.csv
## 3.) rgb_plot_betydb.csv
## 4.) rgb_plot_geo.csv

In [14]:
print(json.load(open("result.json")))

{'code': 0, 'file': [{'path': '/mnt/rgb_plot.csv', 'key': 'csv'}, {'path': '/mnt/rgb_plot_geo.csv', 'key': 'csv'}, {'path': '/mnt/rgb_plot_betydb.csv', 'key': 'csv'}], 'algorithm': {'version': '1.0', 'traits': 'size of image channels', 'units': 'pixels', 'labels': '', 'files_processed': '6', 'lines_written': '6', 'wrote_geostreams': 'Yes', 'wrote_betydb': 'Yes'}}


In [15]:
rgb_plot = pd.read_csv("rgb_plot.csv")
print(rgb_plot)

   species  site   timestamp        lat         lon      citation_author  \
0      NaN   NaN  2021-01-05  33.075194 -111.974953  add citation author   
1      NaN   NaN  2021-01-05  33.075949 -111.974888  add citation author   
2      NaN   NaN  2021-01-05  33.074727 -111.975043  add citation author   
3      NaN   NaN  2021-01-05  33.074547 -111.975027  add citation author   
4      NaN   NaN  2021-01-05  33.075697 -111.974937  add citation author   
5      NaN   NaN  2021-01-05  33.074691 -111.974888  add citation author   

   citation_year      citation_title  size of image channels (pixels)  
0           2020  add citation title                          35000.0  
1           2020  add citation title                          35000.0  
2           2020  add citation title                          35000.0  
3           2020  add citation title                          35000.0  
4           2020  add citation title                          35000.0  
5           2020  add citation titl

In [16]:
rgb_plot_betydb = pd.read_csv("rgb_plot_betydb.csv")
print(rgb_plot_betydb)

        local_datetime  access_level  species  site      citation_author  \
0  2021-01-05T21:58:41             2      NaN   NaN  add citation author   
1  2021-01-05T21:58:41             2      NaN   NaN  add citation author   
2  2021-01-05T21:58:41             2      NaN   NaN  add citation author   
3  2021-01-05T21:58:41             2      NaN   NaN  add citation author   
4  2021-01-05T21:58:41             2      NaN   NaN  add citation author   
5  2021-01-05T21:58:41             2      NaN   NaN  add citation author   

   citation_year      citation_title   method  size of image channels  
0           2020  add citation title  Unknown                 35000.0  
1           2020  add citation title  Unknown                 35000.0  
2           2020  add citation title  Unknown                 35000.0  
3           2020  add citation title  Unknown                 35000.0  
4           2020  add citation title  Unknown                 35000.0  
5           2020  add citation titl

In [17]:
rgb_plot_geo = pd.read_csv("rgb_plot_geo.csv")
print(rgb_plot_geo)

   site                   trait        lat         lon              dp_time  \
0   NaN  size of image channels  33.075194 -111.974953  2021-01-05T21:58:41   
1   NaN  size of image channels  33.075949 -111.974888  2021-01-05T21:58:41   
2   NaN  size of image channels  33.074727 -111.975043  2021-01-05T21:58:41   
3   NaN  size of image channels  33.074547 -111.975027  2021-01-05T21:58:41   
4   NaN  size of image channels  33.075697 -111.974937  2021-01-05T21:58:41   
5   NaN  size of image channels  33.074691 -111.974888  2021-01-05T21:58:41   

                                    source    value   timestamp  
0   /mnt/sample_plot_images/rgb_17_7_W.tif  35000.0  2021-01-05  
1  /mnt/sample_plot_images/rgb_40_11_W.tif  35000.0  2021-01-05  
2    /mnt/sample_plot_images/rgb_6_1_E.tif  35000.0  2021-01-05  
3    /mnt/sample_plot_images/rgb_1_2_E.tif  35000.0  2021-01-05  
4   /mnt/sample_plot_images/rgb_33_8_W.tif  35000.0  2021-01-05  
5   /mnt/sample_plot_images/rgb_5_11_W.tif  35000.