# Welcome!
This notebook serves as an example of how to use code relevant to the publication that accompanies this [github repository](https://github.com/Grimblewald/Neural-networks-for-microplastic-fourier-transform-infrared-spectroscopy-data)

To begin, simply run the code block below. You can do this by clicking the "play button" that appears next to the code block, at the upper left corner.

In [None]:
# @markdown Example code block. If you run this succesfully you should see a message popup below this block of text
print("Perfect! simply do this for any of the relevant blocks, whenever prompted to run a code block")

In [None]:
# @markdown First we need to setup our environment. This begins with downloading the project files. Running this block will accomplish this. This should only take a few seconds.
!git clone https://github.com/Grimblewald/Neural-networks-for-microplastic-fourier-transform-infrared-spectroscopy-data.git

print("\n     Download of repository should be complete")

In [None]:
# @markdown Next, let us setup our environment. This means downloading relevant python libraries. This will take about 2 minutes, and will be followed with a prompt to reset the kernal to reflect changes. It is recommended you accept this and restart the kernal.
# @markdown After the restart, please run this block a second time. This should then finish succesfully without issues.
import os
import ipywidgets as widgets
from IPython.display import display, clear_output

if 'setup_ran' not in locals():
  os.chdir("Neural-networks-for-microplastic-fourier-transform-infrared-spectroscopy-data/")
  !pip install -r pip_requirements.txt
  print("finished setting up")
  setup_ran = True
elif setup_ran:
  print("it appears you have already run this block, it does not need to be run again")

In [None]:
# @markdown Lets create a new model object. this may take 7-10 seconds the first time you run this block, but should be near instant after the first time.
from functions.DNNModel import base_model
import os
os.chdir("/content/Neural-networks-for-microplastic-fourier-transform-infrared-spectroscopy-data")

try:
  my_model = base_model()
  print("model object created succesfully")
except:
  print("failed to create model object, reason unknown")

In [None]:
# @markdown Now that foundational things are set up, run this block to load the configuration file.
try:
  my_model.load_config("./config.yaml")
  print("loaded config succesfully")
except:
  print("an unknown error occured")

In [None]:
# @title # Editing Model Name
# @markdown We can use features built into interactive python notebook frameworks to create interactive elements to help ease use of tools. Running this code block will create a form which you can use to update the model name.

import ipywidgets as widgets
from IPython.display import display, clear_output
import pprint

# Initialize the datasets dictionary
model_name = my_model.config["model_name"]

# Function to add a new dataset entry to the dictionary
def update_name(b):
  with output:
    output.clear_output()  # Clear output area
    # Get values from the widgets
    my_model.config["model_name"] = name_input.value

    # Display the updated datasets
    print(f"Updated name to \"{my_model.config['model_name']}\"")

def get_current_name(b):
  with output:
    output.clear_output()
    print(f"Current name is: \"{my_model.config['model_name']}\"")

# Widgets for dataset entry
name_input = widgets.Text(description="Name:",
                          value=f"model_name")

# Buttons to add the dataset entry and to view the current datasets
add_button = widgets.Button(description="Update Name")
add_button.on_click(update_name)

view_button = widgets.Button(description="Check Name")
view_button.on_click(get_current_name)

# Place the buttons side by side in a horizontal box
button_box = widgets.HBox([add_button, view_button])

# Output widget to display the datasets dictionary
output = widgets.Output()

# Display widgets and output
display(name_input, button_box, output)

In [None]:
# @title # Setting up a dataset
# @markdown Next let us make sure we are setup with a dataset.
# @markdown Running this codeblock will load a form that lets us view existing configurations as well as add datasets or make changes.
# @markdown The defaults are for the example dataset present.
# @markdown If you wish to update a specific dataset, simply use the same name and it will over-write the previous configuration for this dataset.
import ipywidgets as widgets
from IPython.display import display, clear_output
import pprint
import pandas as pd

# Initialize the datasets dictionary
datasets = {}
datasets['datasets'] = {}

# Define dropdown options
format_options = [".csv"]
type_options = ["file", "folder"]

def is_float(value):
  try:
    float(value)
    return True
  except ValueError:
    return False

# Function to add a new dataset entry to the dictionary
def add_dataset_entry(b):
  with output:
    output.clear_output()  # Clear output area
    # Get values from the widgets
    name = name_input.value
    format_value = format_dropdown.value
    label_column = label_column_input.value
    path = path_input.value
    trainable = trainable_checkbox.value
    type_value = type_dropdown.value
    if os.path.isfile(path):
      try:
        # Attempt to read the first row of the CSV to check headers
        df = pd.read_csv(path, nrows=1)
        if label_column not in df.columns:
          col_options = list(df.columns)
          col_options = [str(i) for i in col_options if not is_float(i)]
          print(f"\nError: Column '{label_column}' not found in file header.\nAvailable columns: {col_options}\n")
          return
      except Exception as e:
        print(f"Error reading the file at '{path}': {e}")
        return
      # Add the new dataset entry
      datasets['datasets'][name] = {
          'format': format_value,
          'label_column': label_column,
          'name': name,
          'path': path,
          'trainable': trainable,
          'type': type_value
      }
    else:
      print(f"Error: File at path '{path}' not found. Please provide a valid file path.")
      return

    # Display the updated datasets
    print("Added your datasets to temporary config:")
    pprint.pprint(datasets)

def get_current_datasets(b):
  with output:
    output.clear_output()
    print("Current dataset list is:")
    pprint.pprint(my_model.config['datasets'])

def commit_changes_to_config(b):
  with output:
    output.clear_output()
    if datasets['datasets'] != my_model.config['datasets']:
      my_model.config['datasets'].update(datasets['datasets'])
      print("updated model config with your changes")
    else:
      print("It does not look like you made any changes, nothing was updated")

# Widgets for dataset entry
name_input = widgets.Text(description="Name:",
                          value="Example")
format_dropdown = widgets.Dropdown(options=format_options, value=".csv", description="Format:")
label_column_input = widgets.Text(description="Label Column:",
                                  value="class")
path_input = widgets.Text(description="Path:",
                          value="./data/Example_Data.csv")
trainable_checkbox = widgets.Checkbox(value=True, description="Use in training?")
type_dropdown = widgets.Dropdown(options=type_options, value="file", description="Type:")

# Buttons to add the dataset entry and to view the current datasets
add_button = widgets.Button(description="Add Dataset")
add_button.on_click(add_dataset_entry)

view_button = widgets.Button(description="List Datasets")
view_button.on_click(get_current_datasets)

commit_button = widgets.Button(description="Commit Changes")
commit_button.on_click(commit_changes_to_config)

# Place the buttons side by side in a horizontal box
button_box = widgets.HBox([add_button, commit_button, view_button])

# Output widget to display the datasets dictionary
output = widgets.Output()

# Display widgets and output
display(name_input, format_dropdown, label_column_input, path_input, trainable_checkbox, type_dropdown, button_box, output)

In [None]:
# @markdown Next, let us build the datasets. This will run code that will use the parameters set for datasets to gather files, then process these in preparation for training.
my_model.build_datasets()

In [None]:
# @markdown Next we can define our network architecture, changing it if we want to
import ipywidgets as widgets
from IPython.display import display
from copy import deepcopy as copy

parameter_inputs = {}

# Define available layers
layer_options = [
    "input",
    "output",
    "dense",
    "dropout"
]

activation_options = [
    "relu",
    "softmax",
    "sigmoid",
    "tanh"
]

# List to store layers as dictionaries
layer_list = my_model.run_config["network_layers"].copy()

# Function to dynamically generate parameter inputs
def update_parameter_inputs():
  # Clear current parameter inputs
  for widget in parameter_inputs.values():
    try:
      widget.close()
    except:
      pass

  layer_name = layer_dropdown.value
  parameter_inputs.clear()

  match layer_name:
    case "dense":
      # Parameters for Dense layer
      parameter_inputs["neurons"] = widgets.IntText(value=32, description="Number of Neurons")
      parameter_inputs["layer_activation"] = widgets.Dropdown(options=activation_options, description="Activation Fucntion")
      parameter_inputs["use_bias"] = widgets.Checkbox(value=True, description="Use Bias")
      # Regularization options
      parameter_inputs["L1"] = widgets.Checkbox(value=False, description="Use L1")
      parameter_inputs["L2"] = widgets.Checkbox(value=False, description="Use L2")

      layer_option_widget = widgets.HBox([parameter_inputs["L1"], parameter_inputs["L2"]])

    case "dropout":
      # Parameters for Dropout layer
      parameter_inputs["rate"] = widgets.FloatSlider(value=0.05, min=0, max=1, step=0.01, description="Rate")

    case "input":
      # Parameters for Dense layer
      parameter_inputs["layer_activation"] = widgets.Dropdown(options=activation_options, description="Activation Fucntion")
      parameter_inputs["use_bias"] = widgets.Checkbox(value=True, description="Use Bias")
      # Regularization options
      parameter_inputs["L1"] = widgets.Checkbox(value=True, description="Use L1")
      parameter_inputs["L2"] = widgets.Checkbox(value=True, description="Use L2")

      layer_option_widget = widgets.HBox([parameter_inputs["L1"], parameter_inputs["L2"]])

    case "output":
      # Parameters for Dense layer
      parameter_inputs["layer_activation"] = widgets.Dropdown(value=activation_options[1], options=activation_options, description="Activation Fucntion")
      parameter_inputs["use_bias"] = widgets.Checkbox(value=True, description="Use Bias")
      # Regularization options
      parameter_inputs["L1"] = widgets.Checkbox(value=False, description="Use L1")
      parameter_inputs["L2"] = widgets.Checkbox(value=False, description="Use L2")

      layer_option_widget = widgets.HBox([parameter_inputs["L1"], parameter_inputs["L2"]])

  # Display updated parameter inputs
  for widget in parameter_inputs.values():
    display(widget)

# Function to add layer
def add_layer(change):
  layer_name = layer_dropdown.value
  if layer_name:
      params = parameter_inputs

      layer_dict = {}

      # Handle layer type and parameters
      match layer_name:
        case "dense":
          regularization = None
          if params["L1"].value:
            regularization = "l1" if not params["L2"].value else "l1_l2"
          elif params["L2"].value:
            regularization = "l2"

          layer_dict = {
            "layer_group": "hidden",
            "layertype": "dense",
            "neurons": params["neurons"].value,
            "layer_activation": params["layer_activation"].value,
            "layer_bias": "bias" if params["use_bias"].value else "nobias",
            "regularization": regularization
          }

        case "dropout":
          layer_dict = {
            "layer_group": "hidden",
            "layertype": "dropout",
            "rate": params["rate"].value
          }

        case "input":
          regularization = None
          if params["L1"].value:
            regularization = "l1" if not params["L2"].value else "l1_l2"
          elif params["L2"].value:
            regularization = "l2"

          layer_dict = {
            "layer_group": "input",
            "layertype": "dense",
            "neurons": "auto",
            "layer_activation": params["layer_activation"].value,
            "layer_bias": "bias" if params["use_bias"].value else "nobias",
            "regularization": regularization
          }

        case "output":
          regularization = None
          if params["L1"].value:
            regularization = "l1" if not params["L2"].value else "l1_l2"
          elif params["L2"].value:
            regularization = "l2"

          layer_dict = {
            "layer_group": "output",
            "layertype": "dense",
            "neurons": "auto",
            "layer_activation": params["layer_activation"].value,
            "layer_bias": "bias" if params["use_bias"].value else "nobias",
            "regularization": regularization
          }


      layer_list.append(layer_dict)
      update_layer_display()

# Function to remove layer
def remove_layer(change):
  if layer_list:
    layer_list.pop()
    update_layer_display()

# Function to clear configuration
def clear_layers(change):
  global layer_list
  layer_list = []
  update_layer_display()

# Function to update display of current layers
def update_layer_display():
  layer_summary.value = '\n'.join([str(layer) for layer in layer_list])

def update_network_config(b):
  my_model.run_config["network_layers"] = layer_list.copy()
  pprint.pprint(my_model.run_config["network_layers"])

# Dropdown and button for layer selection
layer_dropdown = widgets.Dropdown(options=layer_options, description="Layer")
layer_dropdown.observe(lambda change: update_parameter_inputs(), names='value')

add_button = widgets.Button(description="Add Layer")
add_button.on_click(add_layer)

remove_button = widgets.Button(description="Remove Last Layer")
remove_button.on_click(remove_layer)

clear_button = widgets.Button(description="Clear Config")
clear_button.on_click(clear_layers)

commit_network = widgets.Button(description="Commit Network Config")
commit_network.on_click(update_network_config)

button_widget = widgets.HBox([add_button, remove_button, commit_network, clear_button])

# Text area to display current layer configuration
layer_summary = widgets.Textarea(value="", description="Summary", layout=widgets.Layout(width="600px", height="200px"))

# Display widgets
display(widgets.HBox([layer_dropdown, button_widget]), layer_summary)
update_layer_display()

# Initially update parameter inputs
update_parameter_inputs()


In [None]:
# @markdown Next let us construct our model. The buttons will let you construct a model, view a summary of the model, or graph it.

import tensorflow as tf
import ipywidgets as widgets
from IPython.display import display, clear_output

def compile_model(b):
  with output:
    output.clear_output()
    my_model.build_model()

def model_summary(b):
  with output:
    output.clear_output()
    my_model.run_config["model"].summary()

def plot_model(b):
  with output:
    output.clear_output()
    display(tf.keras.utils.plot_model(my_model.run_config["model"]))

# Buttons to add the dataset entry and to view the current datasets
add_button = widgets.Button(description="Compile Model")
add_button.on_click(compile_model)

view_button = widgets.Button(description="Model Summary")
view_button.on_click(model_summary)

commit_button = widgets.Button(description="Plot Model")
commit_button.on_click(plot_model)

# Place the buttons side by side in a horizontal box
button_box = widgets.HBox([add_button, commit_button, view_button])

# Output widget to display the datasets dictionary
output = widgets.Output()

# Display widgets and output
display(button_box, output)

In [None]:
# @markdown We are now in a position where we can train the model. The buttons will let you train the model for the specified number of epochs, using the specified batchsize.
import ipywidgets as widgets
from IPython.display import display

# Define the sliders and checkbox
epochs_slider = widgets.IntSlider(
  value=1,
  min=1,
  max=100,
  step=1,
  description='Epochs:'
)

batch_size_slider = widgets.IntSlider(
  value=16,
  min=1,
  max=128,
  step=1,
  description='Batch Size:'
)

save_best_checkbox = widgets.Checkbox(
  value=True,
  description='Save Best'
)

# Define the button
run_button = widgets.Button(
  description='Run Training',
  button_style='success'
)

# Placeholder for output
output = widgets.Output()

# Define what happens on button click
def on_button_click(b):
  with output:
    output.clear_output()  # Clear previous output
    print("Starting training with:")
    print(f"  Epochs: {epochs_slider.value}")
    print(f"  Batch Size: {batch_size_slider.value}")
    print(f"  Save Best (slows things a bit): {save_best_checkbox.value}")
    # Replace with actual training call
    my_model.train(
      epochs=epochs_slider.value,
      batch_size=batch_size_slider.value,
      save_best=save_best_checkbox.value
    )
    print("Training completed.")

# Attach the event handler to the button
run_button.on_click(on_button_click)

# Display the widgets and output
display(widgets.VBox([epochs_slider, batch_size_slider, save_best_checkbox, run_button, output]))

In [None]:
# @markdown Now we evaluate the model on the datasets we've defined, both those marked for trianing and those not marked for trianing
my_model.evaluate()

In [None]:
# @markdown  Finally, let's generate some plots to look at model performance
try:
  my_model.graph_evaluations()
except:
  print("Oop! that failed.\nThe reason is probably that you didn't fully train/evaluate an earlier run\nand a new run is set each time you generate datasets.\nCode to catch these exceptions in a more elegant way is on the to-do list.\nUntil it's fixed, do a full round of \n     build data > build model > train model > evaluate model\nbefore building datasets anew to avoid this issue.")

In [None]:
# @markdown Finally we could repeat the process of building dataset and model, trianing, and evaluating, using the configuration file and the details specified within.

# @markdown We are now in a position where we can train the model. The buttons will let you train the model for the specified number of epochs, using the specified batchsize.
import ipywidgets as widgets
from IPython.display import display

# Define the sliders and checkbox

Number_of_models_slider = widgets.IntSlider(
  value=3,
  min=1,
  max=10,
  step=1,
  description='Number of Models:'
)

# Define the button
run_button = widgets.Button(
  description='Run Training',
  button_style='success'
)

# Placeholder for output
output = widgets.Output()

# Define what happens on button click
def on_button_click(b):
  with output:
    output.clear_output()  # Clear previous output
    print("Starting training with:")
    print(f"  Number of Models: {Number_of_models_slider.value}")
    # Replace with actual training call
    my_model.do_runs(Number_of_models_slider.value)
    print("Training completed.")

# Attach the event handler to the button
run_button.on_click(on_button_click)

# Display the widgets and output
display(widgets.VBox([Number_of_models_slider, run_button, output]))