In [2]:
import matplotlib.pyplot as plt
import os
import seaborn as sns
import numpy as np
import tensorflow as tf
import tensorflow_datasets as tfds

from tensorflow_examples.lite.model_maker.core.export_format import ExportFormat
from tensorflow_examples.lite.model_maker.core.task import image_preprocessing

from tflite_model_maker import image_classifier
from tflite_model_maker import ImageClassifierDataLoader
from tflite_model_maker.image_classifier import ModelSpec

2023-06-12 04:13:41.943103: W tensorflow/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory
2023-06-12 04:13:41.943144: I tensorflow/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine.
  from .autonotebook import tqdm as notebook_tqdm
 The versions of TensorFlow you are currently using is 2.8.4 and is not supported. 
Some things might work, some things might not.
If you were to encounter a bug, do not file an issue.
If you want to make sure you're using a tested and supported configuration, either change the TensorFlow version or the TensorFlow Addons's version. 
You can find the compatibility matrix in TensorFlow Addon's readme:
https://github.com/tensorflow/addons


In [14]:
# Retrieve the cassava plant disease dataset and splits into training, validation and test datasets

tfds_name = 'cassava'
# tfdsload function is used to load the dataset cassava as well as the splits to load 
(ds_train, ds_validation, ds_test), ds_info = tfds.load(
    name=tfds_name,
    split=['train', 'validation', 'test'],
    with_info=True,
    # loads the dataset in a format that can be used for supervised learning 
    as_supervised=True)
TFLITE_NAME_PREFIX = tfds_name

In [1]:
# Construct the list of labels and loads the training and validation datasets

label_names = ds_info.features['label'].names

train_data = ImageClassifierDataLoader(ds_train,
                                       ds_train.cardinality(),
                                       label_names)
validation_data = ImageClassifierDataLoader(ds_validation,
                                            ds_validation.cardinality(),
                                            label_names)

# This selects the name of the model to use (mobilenet v3)

model_name = 'mobilenet_v3_large_100_224' 

# this maps specific model name to the URLs for their crresponding pre-trained weights to the tensorflow hub 
map_model_name = {
    'cropnet_cassava':
        'https://tfhub.dev/google/cropnet/feature_vector/cassava_disease_V1/1',
    'cropnet_concat':
        'https://tfhub.dev/google/cropnet/feature_vector/concat/1',
    'cropnet_imagenet':
        'https://tfhub.dev/google/cropnet/feature_vector/imagenet/1',
    'mobilenet_v3_large_100_224':
        'https://tfhub.dev/google/imagenet/mobilenet_v3_large_100_224/feature_vector/5',
}

model_handle = map_model_name[model_name]

image_model_spec = ModelSpec(uri=model_handle)

NameError: name 'ds_info' is not defined

In [16]:
# Constants
MODEL_WEIGHTS_FILE = 'trained_model_weights.h5'

# Check if the model weights file exists
if not os.path.exists(MODEL_WEIGHTS_FILE):
    # Build the model by training the model with the training dataset
    model = image_classifier.create(
        train_data,
        model_spec=image_model_spec,
        batch_size=128,
        learning_rate=0.03,
        epochs=5,
        shuffle=True,
        train_whole_model=True,
        validation_data=validation_data)

    # Save the trained model's weights
    model.model.save_weights(MODEL_WEIGHTS_FILE)
else:
    # Create a new model with the same architecture
    new_model = image_classifier.create(
        train_data,
        model_spec=image_model_spec,
        batch_size=128,
        learning_rate=0.03,
        epochs=0,  # Set epochs to 0 to avoid training
        shuffle=True,
        train_whole_model=True,
        validation_data=validation_data)
    
    # Load the saved model's weights
    new_model.model.load_weights(MODEL_WEIGHTS_FILE)
    model = new_model

INFO:tensorflow:Retraining the models...


INFO:tensorflow:Retraining the models...


Model: "sequential_2"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 hub_keras_layer_v1v2_2 (Hub  (None, 1280)             4226432   
 KerasLayerV1V2)                                                 
                                                                 
 dropout_2 (Dropout)         (None, 1280)              0         
                                                                 
 dense_2 (Dense)             (None, 5)                 6405      
                                                                 
Total params: 4,232,837
Trainable params: 4,208,437
Non-trainable params: 24,400
_________________________________________________________________
None


In [23]:
# This is a mapping dictionary of the disease codes to names

name_map = dict(
    cmd='Mosaic Disease',
    cbb='Bacterial Blight',
    cgm='Green Mite',
    cbsd='Brown Streak Disease',
    healthy='Healthy',
    unknown='Unknown')

[(name_map[x],x) for x in label_names]

[('Bacterial Blight', 'cbb'),
 ('Brown Streak Disease', 'cbsd'),
 ('Green Mite', 'cgm'),
 ('Mosaic Disease', 'cmd'),
 ('Healthy', 'healthy')]

In [18]:
# Use the trained model to make predictions off the standard test dataset split from cassava 

test_data = ImageClassifierDataLoader(ds_test, ds_test.cardinality(),
                                      label_names)
model.predict_top_k(test_data)

[[('healthy', 0.56150264)],
 [('cmd', 0.89032584)],
 [('cgm', 0.991747)],
 [('cbsd', 0.9461923)],
 [('cmd', 0.8463677)],
 [('cmd', 0.7813167)],
 [('cmd', 0.84853363)],
 [('cmd', 0.6516916)],
 [('cgm', 0.9877293)],
 [('cgm', 0.75671536)],
 [('cmd', 0.78785694)],
 [('cmd', 0.5241862)],
 [('cmd', 0.49590406)],
 [('cbsd', 0.9810425)],
 [('cmd', 0.8674433)],
 [('cbsd', 0.6540499)],
 [('cgm', 0.8659014)],
 [('cmd', 0.54186386)],
 [('cmd', 0.8820886)],
 [('cmd', 0.44836128)],
 [('cbsd', 0.9626703)],
 [('cbsd', 0.6549386)],
 [('healthy', 0.58226454)],
 [('cbsd', 0.9905009)],
 [('cbsd', 0.9435163)],
 [('cgm', 0.8404975)],
 [('cbsd', 0.9927408)],
 [('cgm', 0.9582349)],
 [('cbsd', 0.8172173)],
 [('cbsd', 0.82031125)],
 [('cgm', 0.7932843)],
 [('cmd', 0.94161373)],
 [('cbsd', 0.9045541)],
 [('cmd', 0.84492373)],
 [('cmd', 0.76550806)],
 [('cbsd', 0.9948813)],
 [('cmd', 0.63947004)],
 [('cbsd', 0.97441804)],
 [('cbsd', 0.46145955)],
 [('cmd', 0.61152357)],
 [('cbsd', 0.5854273)],
 [('cmd', 0.660892

In [3]:
# Retrieve the image files from Amazon S3
import boto3
import io
# loading the the downloaded images as Numpy arrays 
import matplotlib.image as mpimg

# Read the bucket name from input.log, used to create or update a table with same name
with open('/home/ubuntu/ML/input.log', 'r') as file:
    bucket_name = file.read().replace('\n', '')

# boto3 resource using default credentials in .aws
s3 = boto3.resource('s3', region_name='us-west-2')

# the bucket that triggered the lambda based on the timestamp upload event
bucket = s3.Bucket(bucket_name)

s3_client = boto3.client('s3', region_name='us-west-2')
# Get the contents of the time_stamp.txt object in the bucket
timestamp_obj = s3_client.get_object(Bucket=bucket_name, Key='time_stamp.txt')
timestamp = timestamp_obj['Body'].read().decode('utf-8').strip()

# ses setup
email_client = boto3.client("ses", region_name="us-west-2")

# dict for storing images
images = {}

# list for storing each plant_id
plant_ids = []

# get all objects from the bucket
summaries = bucket.objects.all()

# loop through all objects in the bucket
for f in summaries:
    # BytesIO object holds image data
    bstream = io.BytesIO()

    # check if the current object is a JPEG
    if str(f.key).upper().endswith(".JPG") or str(f.key).upper().endswith(".JPEG"):
        # download image into the BytesIO object
        bucket.Object(f.key).download_fileobj(bstream)
        image = tf.image.decode_jpeg(bstream.getvalue(), channels=3)
        
        # check if the image is the correct size
        if image.shape[0] != 224 or image.shape[1] != 224:
            # Resize the image to be 224x224
            image = tf.image.resize(image, [224, 224])
        
        
        # read image data and store it in the images dict
        images[f.key] = image.numpy()
        # extract the number from the image file name, file name will always be formatted as "Plant_{N}", where {N} is any number
        plant_id = f.key.split('_')[1].split('.')[0] # split the string on the underscore, split the remaining string on the dot, select the string before the dot 
        plant_ids.append(plant_id)
        print(f"{f.key}, id = {plant_id}, current # of id's = {len(plant_ids)}")

Plant_0.jpg, id = 0, current # of id's = 1
Plant_1.jpg, id = 1, current # of id's = 2
Plant_2.jpg, id = 2, current # of id's = 3
Plant_3.jpg, id = 3, current # of id's = 4
Plant_4.jpg, id = 4, current # of id's = 5
Plant_5.jpg, id = 5, current # of id's = 6
Plant_6.jpg, id = 6, current # of id's = 7
Plant_7.jpg, id = 7, current # of id's = 8


In [5]:
# Construct a new dataset using my image files 
v = [x for x in images.values() if x.shape[0] == 224 and x.shape[1] == 224 and x.shape[2] == 3]

# Convert the list of images to a numpy array and normalize it to 0-1 range
input_1 = np.stack(v, axis=0)  # Stack images along a new dimension to form a 4D tensor
input_1 = input_1 / 255.0  # Normalize to 0-1 range

# Get top-k predictions
predicted_prob = model.model.predict(input_1)

# Get the top-k predictions
topk_prob, topk_id = tf.math.top_k(predicted_prob, k=1)

# Map the label ids back to labels
topk_label = np.array(label_names)[topk_id.numpy()]

index_name_map = {i: name_map[name] for i, name in enumerate(label_names)}

# Convert tensors to numpy arrays before using them as dictionary keys
labels = [(index_name_map[x[0]], x[1]) for x in zip(topk_id.numpy().flatten(), topk_prob.numpy().flatten())]


NameError: name 'np' is not defined

In [4]:
from decimal import Decimal
from boto3.dynamodb.conditions import Key
from botocore.exceptions import ClientError

# dynamodb setup
dynamodb = boto3.resource('dynamodb', region_name='us-west-2')
table_name = bucket_name  # use the bucket_name as table_name

# check if table exists
try: 
    table = dynamodb.create_table(
        TableName=table_name,
        KeySchema=[
            {
                'AttributeName': 'plant_id',
                'KeyType': 'HASH' # this makes plant_id essentially the primary key
            }
        ],
        AttributeDefinitions=[
            {
                'AttributeName': 'plant_id',
                'AttributeType': 'S'
            }
        ],
        ProvisionedThroughput={
            'ReadCapacityUnits': 10,
            'WriteCapacityUnits': 10
        }
    )
    
    # wait until the table is created
    table.meta.client.get_waiter('table_exists').wait(TableName=table_name)
    print(f"Created {table_name} table")
except ClientError as ce:
    if ce.response['Error']['Code'] == "ResourceInUseException":
        print(f"Table {table_name} already exists")
        table = dynamodb.Table(table_name)
    else:
        print(f"Unknown exception occurred while querying for {table_name} table")
        print(ce)
        
# Get the labels for the test data using the trained model
disease = True

for plant_id, (label, probabilities) in zip(plant_ids, labels):
    print(label)
    if label == "Healthy":
        diseased = False
        
    # check if the item exists in the table
    response = table.query(
        KeyConditionExpression=Key('plant_id').eq(plant_id)
    )
    items = response['Items']
    
    # table has matching items already
    if items:
        # update existing item
        item = items[0]
        item["metadata"].append(
            {
                "disease": label,
                "probability": Decimal(str(probabilities)),  
                "time_stamp": timestamp
            }
        )
        table.put_item(Item=item)
    else:
        # no existing item found, create one with new plant_id
        item = {
            "plant_id": str(f"Plant_{plant_id}"),
            "metadata": [
                {
                    "name": str(f"Plant_{plant_id}"),
                    "disease": label,
                    "probability": Decimal(str(probabilities)),
                    "time_stamp": timestamp 
                }
            ]
        }
        table.put_item(Item=item)
    
    # body_data = str(garden_id) + " - " + label + " detected"
    # body_page = """
    #     <html>
    #     <head></head>
    #     <body>
    #     <h2>Disease Detected - """ + label + """</h2>""" + """
    #     <p>Found in GardenId = """ + str(garden_id) + """
    #     </body>
    #     </html> """
    # if diseased:
    #     try:
    #         email_client.send_email(Source="ttsega03@gmail.com", 
    #                                Destination={
    #                                    "ToAddresses": [
    #                                        "mesteddy14@gmail.com"
    #                                    ]
    #                                }, Message={
    #                                    "Subject": {
    #                                        "Data": "Disease Detected",
    #                                        "Charset": "UTF-8"
    #                                    },
    #                                    "Body": {
    #                                        "Text": {
    #                                            "Data": body_data,
    #                                            "Charset": "UTF-8"
    #                                        },
    #                                        "Html": {
    #                                            "Data": body_page,
    #                                            "Charset": "UTF-8"
    #                                        }
    #                                    }
    #                                }
    #                                )
    #     except Exception as e:
    #         print(e)
    # garden_id += 1
    print("done!")

Table ec2-test-garden already exists


NameError: name 'labels' is not defined