# Examine Deployment Inferences

In [1]:
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
from sklearn.metrics import confusion_matrix

## Set up the plotting functions

- plot_order_vs_species_confidence: to determine whether these correlate
- plot_order_binary_consistency: to determine whether the binary classification is consistent with the order classification

In [92]:
def plot_order_vs_species_confidence(df, species_conf_column='top_1_confidence'):
    df_moth = df.loc[df['order_name'].str.contains('Lepidoptera', na=False), ]

    # plot scatter plot with best fit line
    plt.scatter(df_moth['order_confidence'], df_moth[species_conf_column])

    # add best fit line
    plt.plot(np.unique(df_moth['order_confidence']),
            np.poly1d(np.polyfit(df_moth['order_confidence'], df_moth[species_conf_column], 1))(np.unique(df_moth['order_confidence'])),
            'r')


def plot_order_binary_consistency(df):
    # remove empty order_name rows
    df = df.dropna(subset=['order_name'])


    # new column where moth if order_name contains Lepidoptera, non-moth otherwise
    df['order_moth'] = df['order_name'].apply(lambda x: 'moth' if 'Lepidoptera' in x else 'nonmoth')

    breakdown = pd.DataFrame(df[['order_moth', 'class_name']].value_counts())

    # plot confusion matrix of order_moth vs class_name
    fig, ax = plt.subplots()
    im = ax.imshow(breakdown.values)

    # We want to show all ticks...
    ax.set_xticks(np.arange(len(breakdown.columns)))
    ax.set_yticks(np.arange(len(breakdown.index)))
    # ... and label them with the respective list entries
    ax.set_xticklabels(breakdown.columns)
    ax.set_yticklabels(breakdown.index)

    plt.show()


## Load in the data and apply the functions

In [None]:
plot_order_vs_species_confidence(df0)
plot_order_binary_consistency(df0)

In [4]:
df0 = pd.read_csv('./dep000082.csv')

df1 = pd.read_csv('./data/farralia/dep000080/dep000080_results.csv')

df2 = pd.read_csv('/home/users/katriona/amber-inferences/data/solar/gbr/dep000072.csv')

In [None]:
# three plot figure, with 3 columns and 1 row
fig, axs = plt.subplots(1, 3, figsize=(15, 5))

df0['top_1_confidence'].hist(bins=100, ax=axs[0])
df2['top_1_confidence'].hist(bins=100, ax=axs[1])
df1['species_confidence'].hist(bins=100, ax=axs[2])

axs[0].set_title('Nettlebed')
axs[1].set_title('dep000072')
axs[2].set_title('dep000080')

# plt.sutitle('Top Species Confidence for some UK deployments')

plt.suptitle('Top Species Confidence for some UK deployments')


# supxlabel
axs[1].set_xlabel('Confidence')
axs[0].set_ylabel('Number of crops')

plt.show()

## Troubleshooting blurry images

df0.head()


In [None]:
crops = df0['cropped_image_path'][0:10]

crops

In [None]:
image_path = crops[0]

# load in the image and display it
fig, ax = plt.subplots(figsize=(3, 3))
img = plt.imread(image_path)
plt.imshow(img)

In [34]:
# Determine how blurry the image is
from PIL import Image
import cv2

def variance_of_laplacian(image):
    return cv2.Laplacian(image, cv2.CV_64F).var()

def blurriness(image_path):
    image = cv2.imread(image_path)
    gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
    fm = variance_of_laplacian(gray)
    return fm


def calc_blurriness(df):
    df = df.dropna(subset=['cropped_image_path'])
    df['blurriness'] = [blurriness(x) for x in df['cropped_image_path']]
    return df

def plot_blurriness(df, threshold=100):
    df['blurriness'].hist(bins=100)

    plt.annotate('Less blurry ->', xy=(100, 1000), xytext=(100, 2000),
                arrowprops=dict(facecolor='black', shrink=0.05))
    plt.annotate('<- More blurry', xy=(1000, 1000), xytext=(1000, 2000),
                arrowprops=dict(facecolor='black', shrink=0.05))

    plt.xlabel('Sharpness')
    plt.ylabel('Number of Crops')

    plt.show()

In [None]:
df0_blur = calc_blurriness(df0)

In [None]:
hist_of_bluriness(df0_blur, 100)

In [None]:
hist_of_bluriness(df1)

In [None]:
# for each directory in ./data/harlequin_flatbugs run 05_combine_outputs

import os
import subprocess

all_files = os.listdir('./data/harlequin_flatbug')
# strip all files that are not directories
all_directories = [x for x in all_files if x.endswith('.csv')]
all_directories = list(set([x.split("_")[0] for x in all_directories]))

print(all_directories)

for dep in all_directories:
    print(dep)


    # run the script
    subprocess.run(['python', '05_combine_outputs.py',
                    f'./data/harlequin_flatbug/{dep}*.csv',
                    f'./data/harlequin_flatbug/final_outputs_{dep}*.csv'])



In [None]:
hist_of_bluriness(df2)