Test the accuracy of the default model without additional training data

Libraries

In [162]:
import glob as glob
import numpy as np
import cv2
import pandas as pd
import keras
import os
from keras.applications.inception_v3 import preprocess_input, decode_predictions
from sklearn.metrics import classification_report, confusion_matrix
from IPython.display import clear_output

In [2]:
# Import the pre-trained model
from keras.applications import InceptionV3

In [3]:
# Start the model
model = InceptionV3()
labels = pd.read_csv("categories.csv")

In [99]:
# make a function to make a prediction on a folder of images
def predict_and_save(folder, coded_data):
    predict_files = glob.glob(folder+'/*/*.jpg')
    
    predictor, prob, image_id = [], [], []
    final = pd.DataFrame(columns=['id','prediction','confidence','image'])
    
    not_coded = []
    
    for index, i in enumerate(predict_files):
        if coded_data["filename"].str.contains(os.path.basename(i)).any():
            clear_output(wait=True)
            print(f"Working on image {index} of {len(predict_files)-1}")
            im = cv2.imread(i)
            im = cv2.resize(cv2.cvtColor(im, cv2.COLOR_BGR2RGB), (299, 299)).astype(np.float32) / 255.0
            im = np.expand_dims(im, axis =0)
            preds = model.predict(im)
            preds = decode_predictions(preds)[0]        

            temp_df = pd.DataFrame(preds,columns=['id','prediction','confidence'])
            temp_df['image'] = i.rsplit("/")[-1]
            final = final.append(temp_df)
            if index % 50 == 0:
                final.to_csv(folder+'_test.csv',index=False)
        else:
            not_coded.append(i)
        
    final.to_csv(folder+'_test.csv',index=False)
    return final

## Test against Bridget's coding

In [100]:
coded_data = "/Volumes/M&B/Screen_Time_Measure_Development/SNAP_IT/Coding Framework Test Images/Screen Time Coding Data - Device.csv"
folder = "/Volumes/M&B/Screen_Time_Measure_Development/SNAP_IT/Coding Framework Test Images"

In [101]:
coded_data = pd.read_csv(coded_data)

In [102]:
%%time
df = predict_and_save(folder, coded_data)

Working on image 4495 of 4495
CPU times: user 3h 4min 34s, sys: 7min 56s, total: 3h 12min 30s
Wall time: 1h 4min 39s


In [114]:
cat_maybe = ['computer_keyboard','projector', 'joystick',]

cat_def = ['desktop_computer', 'monitor', 'screen',
        'television', 'laptop',  
       'hand-held_computer', 'web_site',  'iPod',
       'home_theater', 'entertainment_center',
       'cellular_telephone']

In [118]:
df['screen_def'] = np.where(df["prediction"].isin(cat_def),1,0)
df['screen_maybe'] = np.where(df["prediction"].isin(cat_def + cat_maybe),1,0)

In [137]:
def conf_def(df, confthresh):
    if df["confidence"] > confthresh and df["screen_def"]==1:
        return 1
    else:
        return 0
def conf_maybe(df, confthresh):
    if df["confidence"] > confthresh and df["screen_maybe"]==1:
        return 1
    else:
        return 0

In [138]:
for x in np.arange(0.05, 0.501,0.05):
    df["screen_def_"+str(x)] = df.apply(conf_def, confthresh=x, axis=1)
    df["screen_maybe_"+str(x)] = df.apply(conf_maybe, confthresh=x, axis=1)

In [152]:
df = df.merge(coded_data,left_on="image", right_on="filename")
df.drop(columns="filename", inplace=True)
df[["device","device_excl_bkg"]] = df[["device","device_excl_bkg"]].astype(int)
df = df.drop(columns=["prediction","confidence","id"]).groupby(["image"]).any()

In [171]:
true_devices = ['device', 'device_excl_bkg']
predicted_devices = ['screen_def_0.05', 'screen_maybe_0.05',
       'screen_def_0.1', 'screen_maybe_0.1', 'screen_def_0.15000000000000002',
       'screen_maybe_0.15000000000000002', 'screen_def_0.2',
       'screen_maybe_0.2', 'screen_def_0.25', 'screen_maybe_0.25',
       'screen_def_0.3', 'screen_maybe_0.3', 'screen_def_0.35000000000000003',
       'screen_maybe_0.35000000000000003', 'screen_def_0.4',
       'screen_maybe_0.4', 'screen_def_0.45', 'screen_maybe_0.45',
       'screen_def_0.5', 'screen_maybe_0.5']

In [177]:
for true_device in true_devices:
    for predicted_device in predicted_devices:
        print(f"Comparing: {true_device} & {predicted_device}")
        print(classification_report(df[true_device], df[predicted_device]))

Comparing: device & screen_def_0.05
              precision    recall  f1-score   support

       False       0.50      0.81      0.62      1484
        True       0.86      0.60      0.71      3011

    accuracy                           0.67      4495
   macro avg       0.68      0.71      0.66      4495
weighted avg       0.74      0.67      0.68      4495

Comparing: device & screen_maybe_0.05
              precision    recall  f1-score   support

       False       0.52      0.78      0.63      1484
        True       0.86      0.65      0.74      3011

    accuracy                           0.69      4495
   macro avg       0.69      0.71      0.68      4495
weighted avg       0.75      0.69      0.70      4495

Comparing: device & screen_def_0.1
              precision    recall  f1-score   support

       False       0.48      0.90      0.62      1484
        True       0.91      0.51      0.66      3011

    accuracy                           0.64      4495
   macro avg       