# SAKD Accuracy Test

This test measures the accuracy of the decision engine with images from the validation set of the image classifier.
The ontology is fed with sensor data that is in the range specified in the expert rules for the ground truth label with a probability $\geq 60\%$.

In [None]:
import datetime
import pathlib
import time

import pandas
import pandas as pd
import requests
from tqdm.notebook import tqdm
from numpy.random import default_rng
import numpy as np

Define the sensor value ranges, so they can be used as boundaries of a uniform distribution.
The boundaries are chosen in such a way that the expert rule will be fulfilled with a probability p where $0.6 \leq p \leq 1$.
This should simulate, that a disease is likely to be encountered in a favorable environment for the disease.

In [None]:
SENSOR_RANGES = {
    'cassava_bacterial_blight': {
        'SoilMoisture': (0.1, 1),
        'SoilPH': (6.3, 7.5),
        'SoilTemperature': (23, 31),
    },
    'cassava_brown_streak_disease': {
        'RelativeHumidity': (0.65, 0.88),
        'SoilMoisture': (0.0, 1.0),
        'SoilTemperature': (5, 33),
    },
    'cassava_green_mottle': {
        'SoilMoisture': (0.55, 1.0),
        'RelativeHumidity': (0.6, 1.0),
        'SoilTemperature': (24, 40)
    },
    'cassava_mosaic_disease': {
        'Temperature': (20, 50),
        'SoilMoisture': (0.1, 1),
        'SoilTemperature': (18, 34),
        'RelativeHumidity': (0.7, 1)
    },
    'healthy': {
        'Temperature': (0, 50),
        'SoilMoisture': (0.2, 0.8),
        'SoilTemperature': (0, 40),
        'RelativeHumidity': (0.2, 0.8),
        'SoilPH': (3.0, 10.0),
    }
}

SENSOR_RULE_RANGES = {
    'cassava_bacterial_blight': {
        'SoilMoisture': (0.3, 1),
        'SoilPH': (6.5, 7.2),
        'SoilTemperature': (25, 30),
    },
    'cassava_brown_streak_disease': {
        'RelativeHumidity': (0.7, 0.85),
        'SoilMoisture': (0.1, 1.0),
        'SoilTemperature': (10, 32),
    },
    'cassava_green_mottle': {
        'SoilMoisture': (0.7, 1.0),
        'RelativeHumidity': (0.7, 1.0),
        'SoilTemperature': (27, 40)
    },
    'cassava_mosaic_disease': {
        'Temperature': (30, 50),
        'SoilMoisture': (0.3, 1),
        'SoilTemperature': (20, 32),
        'RelativeHumidity': (0.8, 1)
    },
    'healthy': {
        'Temperature': (0, 50),
        'SoilMoisture': (0.2, 0.8),
        'SoilTemperature': (0, 40),
        'RelativeHumidity': (0.2, 0.8),
        'SoilPH': (3.0, 10.0),
    }
}

PLANTS_FOR_DISEASES = {
    'cassava_bacterial_blight': 1,
    'cassava_brown_streak_disease': 2,
    'cassava_green_mottle': 3,
    'cassava_mosaic_disease': 4,
    'healthy': 5
}

SOIL_PROPERTIES = [
    'SoilMoisture',
    'SoilTemperature',
    'SoilPH'
]

FIELD_PROPERTIES = [
    'Temperature',
    'RelativeHumidity'
]

Configure service endpoints.

In [None]:
ONTO_URL = 'http://localhost:8001'
DECISION_URL = 'http://localhost:8000'
DATA_PATH = '../data/val_images/'

rng = default_rng(12345)

DISTRIBUTION = "uniform"
FULL_TEST = True

In [None]:
def get_range(_property: str, disease: str):
    try:
        return SENSOR_RANGES[disease][_property]
    except KeyError:
        return SENSOR_RANGES['healthy'][_property]

def generate_observation(_property: str, disease: str, distribution='uniform'):
    _range = get_range(_property, disease)
    if distribution == "uniform":
        value = rng.uniform(*_range)
    elif distribution == "normal":
        value = rng.normal(np.mean(_range), (_range[1] - _range[0]/8))
    return {
       'timestamp': str(datetime.datetime.now()),
       'value': value,
       'observed_property': _property
    }

def simulate_sensors(disease: str):
    id = PLANTS_FOR_DISEASES[disease]
    field_url = f"{ONTO_URL}/fields/{id}/observations"
    soil_url = f"{ONTO_URL}/soils/{id}/observations"

    headers = {'content-type': 'application/json',
               'accept': 'application/json'}

    for _property in SOIL_PROPERTIES:
        observation = generate_observation(_property, disease, DISTRIBUTION)
        requests.post(soil_url, json=observation, headers=headers)

    for _property in FIELD_PROPERTIES:
        observation = generate_observation(_property, disease, DISTRIBUTION)

        requests.post(field_url, json=observation, headers=headers)

In [None]:
def test_decision_engine() -> pandas.DataFrame:
    results = []

    headers = {
               'accept': 'application/json'}

    diseases = [d.stem for d in pathlib.Path(DATA_PATH).glob("./*")]
    diseases = ['cassava_bacterial_blight']
    for disease in tqdm(diseases):
        plant_id = PLANTS_FOR_DISEASES[disease]
        for i, image in enumerate(tqdm(pathlib.Path(DATA_PATH).joinpath(disease).glob('*'))):
            # test fewer samples for tuning
            if i >= 100 and not FULL_TEST:
                break

            simulate_sensors(disease)

            start_time = time.time()
            with open(image, 'rb') as f:
                files = {'image': (image.name, f, 'image/jpg')}
                response = requests.post(DECISION_URL + f"/plants/{plant_id}/predict-disease",
                                         files=files, headers=headers)
            end_time = time.time()
            decision_time = end_time - start_time
            response.raise_for_status()

            pred = response.json()

            results.append({
                'ground_truth': disease,
                'prediction': pred['disease'],
                'visual_certainty': pred['visual_certainty'],
                'knowledge_certainty': pred['knowledge_certainty'],
                'image_classification_time': pred['image_classification_time'],
                'reasoner_time': pred['reasoner_time'],
                'decision_time': decision_time,
                'image': str(image)
            })

    return pd.DataFrame.from_records(results)

In [None]:
result = test_decision_engine()

  0%|          | 0/1 [00:00<?, ?it/s]

0it [00:00, ?it/s]

KeyboardInterrupt: 

In [None]:
matches = result[result['ground_truth'] == result['prediction']]
accuracy = len(matches)/len(result)
print(f"Accuracy is {accuracy}")
errors = result[result['ground_truth'] != result['prediction']]
print(errors)


testing with uniform distribution.

| Image Weight | Knowledge Weight | Accuracy 100 | Accuracy Full | classifier |
|--------------|-----------|------------|-------------|------------------------|
| 0.5 | 0.2      | 0.702        | -  |0.8403 |
| 1.0 | 1.0 | 0.746 |0.8480304150385468 |
| 1.0 | 0.8 | 0.73 | |
| 1.0 | 1.2 | 0.748 |0.8488752772204035 |0.8403|
| 1.0 | 1.4 | 0.744 | |


testing with normal distribution. (`std_deviation = (range[1] -range[0]) / 2`)

| Image Weight | Knowledge Weight | Accuracy 100 | Accuracy Full | classifier |
|--------------|-----------|------------|-------------|------------------------|
| 1.0 | 1.0 | 0.706|- |0.8403|
| 1.0 | 1.2 | 0.682|- |0.8403|
| 1.0 | 0.8 | 0.7|- |0.8403|
| 1.0 | 0.5 | 0.704|- |0.8403|

testing with normal distribution. (`std_deviation = (range[1] -range[0]) / 4`)

| Image Weight | Knowledge Weight | Accuracy 100 | Accuracy Full | classifier |
|--------------|-----------|------------|-------------|------------------------|
| 1.0 | 1.0 | 0.696|- |0.8403|

testing with normal distribution. (`std_deviation = (range[1] -range[0]) / 8`)

| Image Weight | Knowledge Weight | Accuracy 100 | Accuracy Full | classifier |
|--------------|-----------|------------|-------------|------------------------|
| 1.0 | 1.0 |0.698 |- |0.8403|


testing with uniform (only in rule range)

| Image Weight | Knowledge Weight | Accuracy 100 | Accuracy Full | classifier |
|--------------|-----------|------------|-------------|------------------------|
| 1.0 | 1.0 | 0.808|0.8852043510402365 |0.8403|
| 1.0 | 1.2 | 0.83|- |0.8403|
| 1.0 | 1.4 | 0.858|- |0.8403 |
| 1.0 | 0.5 | 0.726|- |0.8403|

In [None]:
result.to_csv("./times.csv", columns=['decision_time',
                                      'image_classification_time',
                                      'reasoner_time'] )

print(f"avg_decision_time: {result['decision_time'].mean()}")
print(f"avg_classification_time: {result['image_classification_time'].mean()}")
print(f"avg_reasoner_time: {result['reasoner_time'].mean()}")

```
avg_decision_time: 3.6626797550796444
avg_classification_time: 0.21957266697071337
avg_reasoner_time: 3.4327719566600225
```