# Template RGB Plot

### Importing modules for the Jupyter Notebook

In [115]:
import sys
!{sys.executable} -m pip install numpy
!{sys.executable} -m pip install pandas
!{sys.executable} -m pip install pillow



In [116]:
import json
import numpy as np
import os
import pandas as pd
import re
import shutil
import sys
import textwrap
import urllib.request as urllib
import zipfile

from datetime import datetime
from pathlib import Path
from PIL import Image
from tkinter.filedialog import askdirectory

## Ensure that you are in the correct directory:

In [117]:
if os.path.isfile("README.md"):
    os.chdir("./{{cookiecutter._project_name}}")

## Top-level Docstring. Change this to reflect your algorithm

#### Imports into the python file. Please add any additional import statements that will be needed for your algorithm below

#### Define the version number of your algorithm. Consider using [Semantic Versioning](https://semver.org/)

#### Provide information on the creator and contributors of this algorithm

#### Name and describe your algorithm

#### Provide citation information for algorithm publication. This includes the citation author, the citation title, and the citation year

#### Include the name(s) of the variable(s) used in the algorithm, separated by commas. Note that variable names cannot have comma's in them: use a different separator instead. Also, all white space is kept intact; don't add any extra whitespace since it may cause name comparisons to fail

#### Include the units and labels of the variables, matching the order of VARIABLE_NAMES, also separated by commas. VARIABLE_LABELS is an optional field and can be left empty.

#### Optional override for the generation of a BETYdb compatible csv file. Set to False to suppress the creation of a compatible file

#### Optional override for the generation of a TERRA REF Geostreams compatible csv file. Set the variable to False to suppress the creation of a compatible file

## Change the url below to get images from a different source. This assumes that the image files are contained within a .zip file. The code below will add the sample_plot_images chosen to the system

In [118]:
url = "https://de.cyverse.org/dl/d/4108BB75-AAA3-48E1-BBD4-E10B06CADF54/sample_plot_images.zip"

if os.path.isdir("sample_plot_images"):
    shutil.rmtree("sample_plot_images")
os.mkdir("sample_plot_images")
if not os.path.isfile("sample_plot_images.zip"):
    urllib.urlretrieve(url, "sample_plot_images.zip")
zipfile.ZipFile("sample_plot_images.zip", 'r').extractall("sample_plot_images")
os.remove("sample_plot_images.zip")

#### Define your calculate() function

## Update algorithm_rgb.py with your changes

In [119]:
def write_algorithm_rgb_file():
    cells = json.load(open("../template-rgb-plot.ipynb"))["cells"]
    with open("algorithm_rgb.py", "w") as outfile:
        for key in cells:
            toWrite = ""
            if key["cell_type"] == "markdown":
                for entry in key["source"]:
                    if entry[0:4] == "####":
                        entry = re.sub('####', '', entry).lstrip()
                        toWrite = toWrite + entry
                        toWrite = format_string(toWrite)
                        outfile.write("\n\n" + str(toWrite) + "\n")
            elif key["cell_type"] == "raw":
                for entry in key["source"]:
                    toWrite = toWrite + entry
                outfile.write(str(toWrite))
        outfile.write("\n")
                
def format_string(toWrite):
    returnStr = ""
    lines = textwrap.wrap(toWrite, width=115, break_long_words=False)
    for line in range(len(lines)):
        if line != len(lines)-1:
            returnStr = returnStr + "# " + lines[line] + "\n"
        else: 
            returnStr = returnStr + "# " + lines[line]
    return returnStr

write_algorithm_rgb_file()

## Test the calculate() function on the sample plot images located in the sample_plot_images folder

In [120]:
import algorithm_rgb
for filename in os.listdir("sample_plot_images"):
    img = Image.open("sample_plot_images/" + filename)
    img_arr = np.array(img)
    print(algorithm_rgb.calculate(img_arr))

35000
35000
35000
35000
35000
35000


## Next Generate your Dockerfile by running the generate.py script

In [121]:
cmd0 = "python generate.py"
os.system(cmd0)

0

## If there are leftover files from previous runs, delete them

In [122]:
filelist = ["result.json", "rgb_plot.csv", "rgb_plot_betydb.csv", "rgb_plot_geo.csv"]
for file in filelist:
    if os.path.isfile(file):
        os.remove(file)

## Now build the dockerfile (Currently this will have a default project name and project version)

In [123]:
cmd = "docker build -t " + algorithm_rgb.ALGORITHM_NAME + ":" + algorithm_rgb.VERSION + " ."
os.system(cmd)

0

## Next run the dockerfile for testing

In [124]:
cmd = 'docker run --rm --mount "src=`pwd`,target=/mnt,type=bind" ' + algorithm_rgb.ALGORITHM_NAME + ":" + algorithm_rgb.VERSION + ' --working_space "/mnt"'
for filename in os.listdir("sample_plot_images"):
    cmd += ' "/mnt/sample_plot_images/' + filename + '"'
os.system(cmd)

0

## Make sure that the correct files are generated and contain appropriate results

In [125]:
filelist = ["result.json", "rgb_plot.csv", "rgb_plot_betydb.csv", "rgb_plot_geo.csv"]
saveDir = "outputs_" + str(datetime.now()).replace(" ", "").replace(":", ".")
Path.mkdir(Path.cwd() / saveDir)
saveDir = Path.cwd() / saveDir

for filename in filelist:
    assert os.path.isfile(filename)
    if (file == "result.json"):
        result = json.load(open(file))[algorithm_rgb.ALGORITHM_NAME]
        assert result['version'] == algorithm_rgb.VERSION
        assert result['traits'] == algorithm_rgb.VARIABLE_NAMES
        assert result['units'] == algorithm_rgb.VARIABLE_UNITS
        assert result['labels'] == algorithm_rgb.VARIABLE_LABELS
        assert result['files_processed'] == str(len(os.listdir("sample_plot_images")))
        assert result['lines_written'] == str(len(os.listdir("sample_plot_images")))
        if (algorithm_rgb.WRITE_GEOSTREAMS_CSV == True):
            assert result['wrote_geostreams'] == "Yes"
        else:
            assert result['wrote_geostreams'] == "No"
        if (algorithm_rgb.WRITE_BETYDB_CSV == True):
            assert result['wrote_betydb'] == "Yes"
        else: 
            assert result['wrote_betydb'] == "No"
    os.system("mv " + filename + " " + str(saveDir / Path(filename).name))

## View the output files. They will be displayed in the following order: 
## 1.) result.json
## 2.) rgb_plot.csv
## 3.) rgb_plot_betydb.csv
## 4.) rgb_plot_geo.csv

In [126]:
print(json.load(open((saveDir / "result.json"))))

{'code': 0, 'file': [{'path': '/mnt/rgb_plot.csv', 'key': 'csv'}, {'path': '/mnt/rgb_plot_geo.csv', 'key': 'csv'}, {'path': '/mnt/rgb_plot_betydb.csv', 'key': 'csv'}], 'algorithm': {'version': '1.0', 'traits': 'size of image channels', 'units': 'pixels', 'labels': '', 'files_processed': '6', 'lines_written': '6', 'wrote_geostreams': 'Yes', 'wrote_betydb': 'Yes'}}


In [127]:
rgb_plot = pd.read_csv((saveDir / "rgb_plot.csv"))
print(rgb_plot)

   species                site   timestamp        lat         lon  \
0  Unknown  sample_plot_images  2021-04-28  33.075194 -111.974953   
1  Unknown  sample_plot_images  2021-04-28  33.075949 -111.974888   
2  Unknown  sample_plot_images  2021-04-28  33.074727 -111.975043   
3  Unknown  sample_plot_images  2021-04-28  33.074547 -111.975027   
4  Unknown  sample_plot_images  2021-04-28  33.075697 -111.974937   
5  Unknown  sample_plot_images  2021-04-28  33.074691 -111.974888   

       citation_author  citation_year      citation_title  \
0  add citation author           2020  add citation title   
1  add citation author           2020  add citation title   
2  add citation author           2020  add citation title   
3  add citation author           2020  add citation title   
4  add citation author           2020  add citation title   
5  add citation author           2020  add citation title   

   size of image channels (pixels)  
0                          35000.0  
1             

In [128]:
rgb_plot_betydb = pd.read_csv((saveDir / "rgb_plot_betydb.csv"))
print(rgb_plot_betydb)

        local_datetime  access_level  species                site  \
0  2021-04-28T17:23:34             2  Unknown  sample_plot_images   
1  2021-04-28T17:23:34             2  Unknown  sample_plot_images   
2  2021-04-28T17:23:34             2  Unknown  sample_plot_images   
3  2021-04-28T17:23:34             2  Unknown  sample_plot_images   
4  2021-04-28T17:23:34             2  Unknown  sample_plot_images   
5  2021-04-28T17:23:34             2  Unknown  sample_plot_images   

       citation_author  citation_year      citation_title   method  \
0  add citation author           2020  add citation title  Unknown   
1  add citation author           2020  add citation title  Unknown   
2  add citation author           2020  add citation title  Unknown   
3  add citation author           2020  add citation title  Unknown   
4  add citation author           2020  add citation title  Unknown   
5  add citation author           2020  add citation title  Unknown   

   size of image channels

In [129]:
rgb_plot_geo = pd.read_csv((saveDir / "rgb_plot_geo.csv"))
print(rgb_plot_geo)

                 site                   trait        lat         lon  \
0  sample_plot_images  size of image channels  33.075194 -111.974953   
1  sample_plot_images  size of image channels  33.075949 -111.974888   
2  sample_plot_images  size of image channels  33.074727 -111.975043   
3  sample_plot_images  size of image channels  33.074547 -111.975027   
4  sample_plot_images  size of image channels  33.075697 -111.974937   
5  sample_plot_images  size of image channels  33.074691 -111.974888   

               dp_time                                   source    value  \
0  2021-04-28T17:23:34   /mnt/sample_plot_images/rgb_17_7_W.tif  35000.0   
1  2021-04-28T17:23:34  /mnt/sample_plot_images/rgb_40_11_W.tif  35000.0   
2  2021-04-28T17:23:34    /mnt/sample_plot_images/rgb_6_1_E.tif  35000.0   
3  2021-04-28T17:23:34    /mnt/sample_plot_images/rgb_1_2_E.tif  35000.0   
4  2021-04-28T17:23:34   /mnt/sample_plot_images/rgb_33_8_W.tif  35000.0   
5  2021-04-28T17:23:34   /mnt/sample_pl

## You can now generate your repo using the cookiecutter utility, documented at https://github.com/cookiecutter/cookiecutter/blob/master/README.md

In [130]:
template = open(Path.cwd().parent / "template_cookiecutter.json","r") 
contents = json.load(template)
template.close()
print(contents)

{'_project_name': 'Name your Project', '_url': 'https://github.com/AgPipeline/template-rgb-plot', '_version': 'Replace this text with the algorithm version', '_author(s)': ['Replace this text with the algorithm author(s), separated by commas'], '_author_email(s)': ['Replace this text with the algorithm author email(s), separated by commas'], '_contributors': ['Replace this text with the algorithm contributors, separated by commas'], '_algorithm_name': 'Replace this text with the name of the algorithm', '_algorithm_description': 'Replace this text with a description of the algorithm', '_citation_author': 'Replace this text with the citation author for publication', '_citation_title': 'Replace this text with the citation title for publication', '_citation_year': 'Replace this text with the citation year for publication', '_variable_names': ["Replace this text with your variable names. Note that variable names cannot have comma's in them. Additionally, don't add any whitespace since it ma

## Please update the fields below to reflect what is needed in your repository:

In [131]:
contents["_project_name"] = "NameyourProject"

contents["_url"] = "https://github.com/AgPipeline/template-rgb-plot"

contents["_version"] = "Replace this text with the algorithm version"

contents["_author(s)"] = ["Replace this text with the algorithm author(s), separated by commas"]

contents["_author_email(s)"] = ["Replace this text with the algorithm author email(s), separated by commas"]

contents["_contributors"] = ["Replace this text with the algorithm contributors, separated by commas"]

contents["_algorithm_name"] = "Replace this text with the name of the algorithm"

contents["_algorithm_description"] = "Replace this text with a description of the algorithm"

contents["_citation_author"] = "Replace this text with the citation author for publication"

contents["_citation_title"] = "Replace this text with the citation title for publication"

contents["_citation_year"] = "Replace this text with the citation year for publication"

contents["_variable_names"] = ["Replace this text with your variable names. Note that variable names cannot have comma's in them. Additionally, don't add any whitespace since it may cause name comparisons to fail."]

contents["_variable_units"] = ["Replace this text with your variable unit(s) and comma-separate values"]

contents["_variable_labels"] = ["Variable labels matching the order of variable names, also comma-separated"]

contents["_methods"] = ["add algorithm_rgb.py methods here", "_get_variables_header_fields",
    "print_usage", "check_arguments", "check_configuration", "run_test", "process_files"]

In [132]:
cookiecutter_file = open((Path.cwd().parent / "cookiecutter.json"),"w")
json.dump(contents,cookiecutter_file)
cookiecutter_file.close()

## Please specify the directory at which you would like for your repository to be located:

In [144]:
directory_to_use = str(Path.cwd().parent)

## Now you can generate your repository using cookiecutter. Make sure that you have cookiecutter installed. Installations can be found at: https://command-not-found.com/cookiecutter

In [145]:
if not os.path.isfile("README.md"):
    os.chdir("..")
os.system("cookiecutter -f " + str(Path.cwd()) + " -o " + directory_to_use)

0