In [48]:
import glob as glob
import os

import cv2
import numpy as np
import pandas as pd
from IPython.display import clear_output
from sklearn.metrics import (
    classification_report,
    cohen_kappa_score,
    confusion_matrix,
    roc_auc_score,
)

In [49]:
# Load the labels
LABELS = open("/Users/tasanders/Google Drive/Square Eyes (DP20)/5 - Data collection and management/Image Blurring Test/Yolo/openimages.names").read().strip().split("\n")

In [50]:
# Get the weights and config
configpath = "/Users/tasanders/Google Drive/Square Eyes (DP20)/5 - Data collection and management/Image Blurring Test/Yolo/yolov3-openimages.cfg"
weightspath = "/Users/tasanders/Google Drive/Square Eyes (DP20)/5 - Data collection and management/Image Blurring Test/Yolo/yolov3-openimages.weights"

net = cv2.dnn.readNetFromDarknet(configpath, weightspath)

# Determine the output layer names
ln = net.getLayerNames()
ln = [ln[i[0]-1] for i in net.getUnconnectedOutLayers()]

In [51]:
# Function to make a prediction and save to df
def predict_and_save(folder, coded_data, conf_thresh=0.1, nms_thresh=0.15):
    predict_files = glob.glob(folder + "/*/*.jpg")

    predictor, prob, image_id = [], [], []
    final = pd.DataFrame(columns=["id", "prediction", "confidence", "image"])

    not_coded = []

    for index, image in enumerate(predict_files):
        if coded_data["filename"].str.contains(os.path.basename(image)).any():
            clear_output(wait=True)
            print(f"Working on image {index} of {len(predict_files)-1}")

            im = cv2.imread(image)
            (H, W) = im.shape[:2]

            # Create the blob
            blob = cv2.dnn.blobFromImage(
                im, 1 / 255.0, (416, 416), swapRB=True, crop=False
            )
            net.setInput(blob)
            layerOutputs = net.forward(ln)

            # Translate the predictions
            boxes = []
            confidences = []
            classIDs = []

            for output in layerOutputs:
                for detection in output:
                    scores = detection[5:]
                    classID = np.argmax(scores)
                    confidence = scores[classID]

                    if confidence > conf_thresh:
                        box = detection[0:4] * np.array([W, H, W, H])
                        (centerX, centerY, width, height) = box.astype("int")
                        x = int(centerX - (width / 2))
                        y = int(centerY - (height / 2))
                        # update our list of bounding box coordinates, confidences, and class IDs
                        boxes.append([x, y, int(width), int(height)])
                        confidences.append(float(confidence))
                        classIDs.append(classID)

            # apply non-maxima suppression to suppress weak, overlapping bounding boxes
            idxs = cv2.dnn.NMSBoxes(boxes, confidences, conf_thresh, nms_thresh)

            # Append to df
            if len(idxs):
                for i in idxs.flatten():
                    final = final.append(
                        {
                            "id": classIDs[i],
                            "prediction": LABELS[classIDs[i]],
                            "confidence": confidences[i],
                            "image": os.path.basename(image),
                        },
                        ignore_index=True,
                    )
            else:  # no predictions made
                final = final.append(
                    {
                        "id": None,
                        "prediction": None,
                        "confidence": None,
                        "image": os.path.basename(image),
                    },
                    ignore_index=True,
                )

    return final

##  Test against Bridget's coding

In [None]:
%%time
coded_data = pd.read_csv("/Volumes/M&B/Screen_Time_Measure_Development/SNAP_IT/Coding Framework Test Images/Screen Time Coding Data - Device.csv")
folder = "/Volumes/M&B/Screen_Time_Measure_Development/SNAP_IT/Coding Framework Test Images"
df = predict_and_save(folder, coded_data)

Working on image 2892 of 4495


In [None]:
df_backup = df.copy()
df.to_csv(
    "/Volumes/M&B/Screen_Time_Measure_Development/SNAP_IT/Coding Framework Test Images/YOLO-OpenImages.csv",
    index=False,
)

In [None]:
cat_maybe = [
    "Computer keyboard",
    "Printer",
    "Computer mouse",
    "Remote control",
]

cat_def = [
    "Laptop",
    "Computer monitor",
    "Mobile phone",
    "Television",
    "Tablet computer",
    "Ipod",
]

In [None]:
df['screen_def'] = np.where(df["prediction"].isin(cat_def),1,0)
df['screen_maybe'] = np.where(df["prediction"].isin(cat_def + cat_maybe),1,0)

In [None]:
def conf_def(df, confthresh):
    if df["confidence"] > confthresh and df["screen_def"]==1:
        return 1
    else:
        return 0
def conf_maybe(df, confthresh):
    if df["confidence"] > confthresh and df["screen_maybe"]==1:
        return 1
    else:
        return 0

In [None]:
for x in np.arange(0.1, 0.501,0.05):
    df["screen_def_"+str(x)] = df.apply(conf_def, confthresh=x, axis=1)
    df["screen_maybe_"+str(x)] = df.apply(conf_maybe, confthresh=x, axis=1)

In [None]:
df = df.merge(coded_data,left_on="image", right_on="filename")
df.drop(columns="filename", inplace=True)
df[["device","device_excl_bkg"]] = df[["device","device_excl_bkg"]].astype(int)
df = df.drop(columns=["prediction","confidence","id"]).groupby(["image"]).any()

In [None]:
true_devices = ['device', 'device_excl_bkg']
predicted_devices = df.drop(columns=['device', 'device_excl_bkg','screen_def', 'screen_maybe',])


In [None]:
for true_device in true_devices:
    for predicted_device in predicted_devices:
        print(f"Comparing: {true_device} & {predicted_device}")
        print(classification_report(df[true_device], df[predicted_device]))

In [None]:
print(f"Comparing: device & screen_maybe_0.3")
print(classification_report(df["device"], df["screen_maybe_0.3"]))
print(f"AUC: {roc_auc_score(df['device'], df['screen_maybe_0.3'])}")
print(f"Comparing: device_excl_bkg & screen_maybe_0.3")
print(classification_report(df["device_excl_bkg"], df["screen_maybe_0.3"]))
print(cohen_kappa_score(df["device_excl_bkg"], df["screen_maybe_0.3"]))
print(f"AUC: {roc_auc_score(df['device_excl_bkg'], df['screen_maybe_0.3'])}")