# Testing the Accuracy of the detectNum Function


In [1]:
import numpy as np
from pathlib import Path
from ocr import detectNum
from tqdm import tqdm
from sklearn.metrics import accuracy_score

In [2]:
root = Path('./') # define root path
data_path = Path('data') # define data path
raw_path = data_path.joinpath('raw')

In [3]:
def predict_num(path,pattern):
    '''
    Function that recognises the number on the A4 paper
    input: 
    path: path to data
    pattern: e.g. '*.jpg' , '*.MOV' , '*.JPG'
    '''
    true_classes = []
    pred_classes = []
    image_paths = []
    for child in tqdm(path.iterdir()):
        if str(child).split('.')[-1] == 'DS_Store' or str(child).split('/')[-1] == 'Group': # ignore mac os DS_Store file
            pass
        else:
            for image_path in child.glob(pattern):
                class_id = str(child.absolute()).split('/')[-1]
                image_paths.append(image_path)
                number = detectNum(str(image_path.absolute()))
                if number != []:
                    pred_classes.append(number[0])
                else:
                    pred_classes.append('0')
                true_classes.append(class_id)
    return true_classes,pred_classes,image_paths


## 1 Images with .jpg ending, taken with iPhone 7 Plus and the large black folder background

### 1.1 Without Sliding Window

In [None]:
tru,pred,imgpaths = predict_num(raw_path,'*.jpg')

In [29]:
print('Dataset size: {}'.format(len(pred)))

Dataset size: 237


In [25]:
tru_in = [int(i) for i in tru]
tru_in2 = [i-100 if i>100 else i for i in tru_in] # convert numbers > 100 to labels on the digit. E.g. 107 -> 07
pred_in = [int(i) for i in pred]
accuracy_score(tru_in2,pred_in)

0.8649789029535865

### 1.2 With sliding window

In [4]:
truSW,predSW,imgpathsSW = predict_num(raw_path,'*.jpg')

7it [00:13,  1.87s/it]

No numbers were found in the detected rectangles, now trying the sliding window approach. This may take a minute or two.


12it [01:17,  6.46s/it]

No numbers were found in the detected rectangles, now trying the sliding window approach. This may take a minute or two.


14it [02:01,  8.65s/it]

No numbers were found in the detected rectangles, now trying the sliding window approach. This may take a minute or two.


16it [02:42, 10.18s/it]

No numbers were found in the detected rectangles, now trying the sliding window approach. This may take a minute or two.
No numbers were found in the detected rectangles, now trying the sliding window approach. This may take a minute or two.
No numbers were found in the detected rectangles, now trying the sliding window approach. This may take a minute or two.


23it [04:17, 11.20s/it]

No numbers were found in the detected rectangles, now trying the sliding window approach. This may take a minute or two.
No numbers were found in the detected rectangles, now trying the sliding window approach. This may take a minute or two.


29it [05:44, 11.88s/it]

No numbers were found in the detected rectangles, now trying the sliding window approach. This may take a minute or two.
No numbers were found in the detected rectangles, now trying the sliding window approach. This may take a minute or two.


39it [06:46, 10.42s/it]

No numbers were found in the detected rectangles, now trying the sliding window approach. This may take a minute or two.


49it [08:02,  9.84s/it]

No numbers were found in the detected rectangles, now trying the sliding window approach. This may take a minute or two.


55it [08:59,  9.81s/it]


In [5]:
print('Dataset size: {}'.format(len(predSW)))

Dataset size: 237


In [6]:
tru_int = [int(i) for i in truSW]
tru_int2 = [i-100 if i>100 else i for i in tru_int] # convert numbers > 100 to labels on the digit. E.g. 107 -> 07
pred_int = [int(i) for i in predSW]
accuracy_score(tru_int,pred_int)

0.9029535864978903

In [7]:
tru_int2 = [i-100 if i>100 else i for i in tru_int]

In [8]:
accuracy_score(tru_int2,pred_int)

0.9873417721518988

## 2 Images with .JPG ending taken with iPhone 7 and iphone SE, without the large black folder

### 2.1 Without sliding window

In [30]:
trueJ,predJ,imgpathsJ = predict_num(raw_path,'*.JPG')

55it [06:06,  6.66s/it]


In [32]:
print('Dataset size: {}'.format(len(predJ)))

Dataset size: 262


In [31]:
trueJ_int = [int(i) for i in trueJ]
predJ_int = [int(i) for i in predJ]
accuracy_score(trueJ_int,predJ_int)

0.30916030534351147

The bad performance on this dataset results from the fact that our algorithm was designed to detect the white rectangle (A4 paper) on top of a slighly larger black rectangle. As there are some photos that were taken without the black rectangle, it is not able to correctly identify those. To account for such cases, a sliding window approach was implemented. This approach takes far longer but we hope that this will improve accuracy on this set substantially. 

## 2.1 With sliding window approach

In [9]:
trueJW,predJW,imgpathsJW = predict_num(raw_path,'*.JPG')

0it [00:00, ?it/s]

No numbers were found in the detected rectangles, now trying the sliding window approach. This may take a minute or two.
No numbers were found in the detected rectangles, now trying the sliding window approach. This may take a minute or two.
No numbers were found in the detected rectangles, now trying the sliding window approach. This may take a minute or two.
No numbers were found in the detected rectangles, now trying the sliding window approach. This may take a minute or two.
No numbers were found in the detected rectangles, now trying the sliding window approach. This may take a minute or two.
No numbers were found in the detected rectangles, now trying the sliding window approach. This may take a minute or two.


2it [00:44, 22.34s/it]

No numbers were found in the detected rectangles, now trying the sliding window approach. This may take a minute or two.
No numbers were found in the detected rectangles, now trying the sliding window approach. This may take a minute or two.
No numbers were found in the detected rectangles, now trying the sliding window approach. This may take a minute or two.
No numbers were found in the detected rectangles, now trying the sliding window approach. This may take a minute or two.
No numbers were found in the detected rectangles, now trying the sliding window approach. This may take a minute or two.
No numbers were found in the detected rectangles, now trying the sliding window approach. This may take a minute or two.
No numbers were found in the detected rectangles, now trying the sliding window approach. This may take a minute or two.
No numbers were found in the detected rectangles, now trying the sliding window approach. This may take a minute or two.
No numbers were found in the det

3it [04:24, 88.20s/it]

No numbers were found in the detected rectangles, now trying the sliding window approach. This may take a minute or two.
No numbers were found in the detected rectangles, now trying the sliding window approach. This may take a minute or two.
No numbers were found in the detected rectangles, now trying the sliding window approach. This may take a minute or two.
No numbers were found in the detected rectangles, now trying the sliding window approach. This may take a minute or two.
No numbers were found in the detected rectangles, now trying the sliding window approach. This may take a minute or two.


4it [06:31, 97.89s/it]

No numbers were found in the detected rectangles, now trying the sliding window approach. This may take a minute or two.
No numbers were found in the detected rectangles, now trying the sliding window approach. This may take a minute or two.
No numbers were found in the detected rectangles, now trying the sliding window approach. This may take a minute or two.
No numbers were found in the detected rectangles, now trying the sliding window approach. This may take a minute or two.
No numbers were found in the detected rectangles, now trying the sliding window approach. This may take a minute or two.
No numbers were found in the detected rectangles, now trying the sliding window approach. This may take a minute or two.


6it [07:14, 72.50s/it]

No numbers were found in the detected rectangles, now trying the sliding window approach. This may take a minute or two.
No numbers were found in the detected rectangles, now trying the sliding window approach. This may take a minute or two.
No numbers were found in the detected rectangles, now trying the sliding window approach. This may take a minute or two.
No numbers were found in the detected rectangles, now trying the sliding window approach. This may take a minute or two.
No numbers were found in the detected rectangles, now trying the sliding window approach. This may take a minute or two.
No numbers were found in the detected rectangles, now trying the sliding window approach. This may take a minute or two.
No numbers were found in the detected rectangles, now trying the sliding window approach. This may take a minute or two.
No numbers were found in the detected rectangles, now trying the sliding window approach. This may take a minute or two.
No numbers were found in the det

11it [11:19, 61.81s/it]

No numbers were found in the detected rectangles, now trying the sliding window approach. This may take a minute or two.
No numbers were found in the detected rectangles, now trying the sliding window approach. This may take a minute or two.
No numbers were found in the detected rectangles, now trying the sliding window approach. This may take a minute or two.
No numbers were found in the detected rectangles, now trying the sliding window approach. This may take a minute or two.
No numbers were found in the detected rectangles, now trying the sliding window approach. This may take a minute or two.
No numbers were found in the detected rectangles, now trying the sliding window approach. This may take a minute or two.


17it [13:48, 48.75s/it]

No numbers were found in the detected rectangles, now trying the sliding window approach. This may take a minute or two.
No numbers were found in the detected rectangles, now trying the sliding window approach. This may take a minute or two.
No numbers were found in the detected rectangles, now trying the sliding window approach. This may take a minute or two.
No numbers were found in the detected rectangles, now trying the sliding window approach. This may take a minute or two.
No numbers were found in the detected rectangles, now trying the sliding window approach. This may take a minute or two.
No numbers were found in the detected rectangles, now trying the sliding window approach. This may take a minute or two.
No numbers were found in the detected rectangles, now trying the sliding window approach. This may take a minute or two.
No numbers were found in the detected rectangles, now trying the sliding window approach. This may take a minute or two.
No numbers were found in the det

18it [17:28, 58.25s/it]

No numbers were found in the detected rectangles, now trying the sliding window approach. This may take a minute or two.
No numbers were found in the detected rectangles, now trying the sliding window approach. This may take a minute or two.
No numbers were found in the detected rectangles, now trying the sliding window approach. This may take a minute or two.
No numbers were found in the detected rectangles, now trying the sliding window approach. This may take a minute or two.
No numbers were found in the detected rectangles, now trying the sliding window approach. This may take a minute or two.
No numbers were found in the detected rectangles, now trying the sliding window approach. This may take a minute or two.
No numbers were found in the detected rectangles, now trying the sliding window approach. This may take a minute or two.
No numbers were found in the detected rectangles, now trying the sliding window approach. This may take a minute or two.
No numbers were found in the det

22it [21:35, 58.90s/it]

No numbers were found in the detected rectangles, now trying the sliding window approach. This may take a minute or two.
No numbers were found in the detected rectangles, now trying the sliding window approach. This may take a minute or two.


24it [21:56, 54.87s/it]

No numbers were found in the detected rectangles, now trying the sliding window approach. This may take a minute or two.
No numbers were found in the detected rectangles, now trying the sliding window approach. This may take a minute or two.
No numbers were found in the detected rectangles, now trying the sliding window approach. This may take a minute or two.
No numbers were found in the detected rectangles, now trying the sliding window approach. This may take a minute or two.
No numbers were found in the detected rectangles, now trying the sliding window approach. This may take a minute or two.
No numbers were found in the detected rectangles, now trying the sliding window approach. This may take a minute or two.
No numbers were found in the detected rectangles, now trying the sliding window approach. This may take a minute or two.
No numbers were found in the detected rectangles, now trying the sliding window approach. This may take a minute or two.
No numbers were found in the det

30it [25:39, 51.31s/it]

No numbers were found in the detected rectangles, now trying the sliding window approach. This may take a minute or two.
No numbers were found in the detected rectangles, now trying the sliding window approach. This may take a minute or two.
No numbers were found in the detected rectangles, now trying the sliding window approach. This may take a minute or two.
No numbers were found in the detected rectangles, now trying the sliding window approach. This may take a minute or two.
No numbers were found in the detected rectangles, now trying the sliding window approach. This may take a minute or two.
No numbers were found in the detected rectangles, now trying the sliding window approach. This may take a minute or two.


33it [28:16, 51.42s/it]

No numbers were found in the detected rectangles, now trying the sliding window approach. This may take a minute or two.
No numbers were found in the detected rectangles, now trying the sliding window approach. This may take a minute or two.
No numbers were found in the detected rectangles, now trying the sliding window approach. This may take a minute or two.
No numbers were found in the detected rectangles, now trying the sliding window approach. This may take a minute or two.
No numbers were found in the detected rectangles, now trying the sliding window approach. This may take a minute or two.


34it [30:29, 53.81s/it]

No numbers were found in the detected rectangles, now trying the sliding window approach. This may take a minute or two.
No numbers were found in the detected rectangles, now trying the sliding window approach. This may take a minute or two.
No numbers were found in the detected rectangles, now trying the sliding window approach. This may take a minute or two.
No numbers were found in the detected rectangles, now trying the sliding window approach. This may take a minute or two.
No numbers were found in the detected rectangles, now trying the sliding window approach. This may take a minute or two.
No numbers were found in the detected rectangles, now trying the sliding window approach. This may take a minute or two.
No numbers were found in the detected rectangles, now trying the sliding window approach. This may take a minute or two.
No numbers were found in the detected rectangles, now trying the sliding window approach. This may take a minute or two.
No numbers were found in the det

35it [35:19, 60.57s/it]

No numbers were found in the detected rectangles, now trying the sliding window approach. This may take a minute or two.
No numbers were found in the detected rectangles, now trying the sliding window approach. This may take a minute or two.
No numbers were found in the detected rectangles, now trying the sliding window approach. This may take a minute or two.
No numbers were found in the detected rectangles, now trying the sliding window approach. This may take a minute or two.
No numbers were found in the detected rectangles, now trying the sliding window approach. This may take a minute or two.
No numbers were found in the detected rectangles, now trying the sliding window approach. This may take a minute or two.
No numbers were found in the detected rectangles, now trying the sliding window approach. This may take a minute or two.
No numbers were found in the detected rectangles, now trying the sliding window approach. This may take a minute or two.
No numbers were found in the det

38it [39:49, 62.89s/it]

No numbers were found in the detected rectangles, now trying the sliding window approach. This may take a minute or two.
No numbers were found in the detected rectangles, now trying the sliding window approach. This may take a minute or two.
No numbers were found in the detected rectangles, now trying the sliding window approach. This may take a minute or two.
No numbers were found in the detected rectangles, now trying the sliding window approach. This may take a minute or two.
No numbers were found in the detected rectangles, now trying the sliding window approach. This may take a minute or two.


42it [42:01, 60.03s/it]

No numbers were found in the detected rectangles, now trying the sliding window approach. This may take a minute or two.
No numbers were found in the detected rectangles, now trying the sliding window approach. This may take a minute or two.
No numbers were found in the detected rectangles, now trying the sliding window approach. This may take a minute or two.
No numbers were found in the detected rectangles, now trying the sliding window approach. This may take a minute or two.
No numbers were found in the detected rectangles, now trying the sliding window approach. This may take a minute or two.
No numbers were found in the detected rectangles, now trying the sliding window approach. This may take a minute or two.
No numbers were found in the detected rectangles, now trying the sliding window approach. This may take a minute or two.
No numbers were found in the detected rectangles, now trying the sliding window approach. This may take a minute or two.
No numbers were found in the det

43it [46:27, 64.82s/it]

No numbers were found in the detected rectangles, now trying the sliding window approach. This may take a minute or two.
No numbers were found in the detected rectangles, now trying the sliding window approach. This may take a minute or two.
No numbers were found in the detected rectangles, now trying the sliding window approach. This may take a minute or two.
No numbers were found in the detected rectangles, now trying the sliding window approach. This may take a minute or two.
No numbers were found in the detected rectangles, now trying the sliding window approach. This may take a minute or two.
No numbers were found in the detected rectangles, now trying the sliding window approach. This may take a minute or two.
No numbers were found in the detected rectangles, now trying the sliding window approach. This may take a minute or two.
No numbers were found in the detected rectangles, now trying the sliding window approach. This may take a minute or two.
No numbers were found in the det

46it [50:25, 65.77s/it]

No numbers were found in the detected rectangles, now trying the sliding window approach. This may take a minute or two.
No numbers were found in the detected rectangles, now trying the sliding window approach. This may take a minute or two.
No numbers were found in the detected rectangles, now trying the sliding window approach. This may take a minute or two.
No numbers were found in the detected rectangles, now trying the sliding window approach. This may take a minute or two.
No numbers were found in the detected rectangles, now trying the sliding window approach. This may take a minute or two.
No numbers were found in the detected rectangles, now trying the sliding window approach. This may take a minute or two.
No numbers were found in the detected rectangles, now trying the sliding window approach. This may take a minute or two.
No numbers were found in the detected rectangles, now trying the sliding window approach. This may take a minute or two.
No numbers were found in the det

48it [54:06, 67.63s/it]

No numbers were found in the detected rectangles, now trying the sliding window approach. This may take a minute or two.
No numbers were found in the detected rectangles, now trying the sliding window approach. This may take a minute or two.
No numbers were found in the detected rectangles, now trying the sliding window approach. This may take a minute or two.
No numbers were found in the detected rectangles, now trying the sliding window approach. This may take a minute or two.
No numbers were found in the detected rectangles, now trying the sliding window approach. This may take a minute or two.


50it [56:10, 67.42s/it]

No numbers were found in the detected rectangles, now trying the sliding window approach. This may take a minute or two.
No numbers were found in the detected rectangles, now trying the sliding window approach. This may take a minute or two.
No numbers were found in the detected rectangles, now trying the sliding window approach. This may take a minute or two.
No numbers were found in the detected rectangles, now trying the sliding window approach. This may take a minute or two.
No numbers were found in the detected rectangles, now trying the sliding window approach. This may take a minute or two.
No numbers were found in the detected rectangles, now trying the sliding window approach. This may take a minute or two.
No numbers were found in the detected rectangles, now trying the sliding window approach. This may take a minute or two.
No numbers were found in the detected rectangles, now trying the sliding window approach. This may take a minute or two.


55it [59:28, 64.88s/it]


In [11]:
print('Dataset size: {}'.format(len(predJW)))

Dataset size: 262


In [18]:
trueJW_int = [int(i) for i in trueJW]
trueJW_int2 = [i-100 if i>100 else i for i in trueJW_int]
predJW_int = [int(i) for i in predJW]
print(accuracy_score(trueJW_int,predJW_int))
print(accuracy_score(trueJW_int2,predJW_int))

0.49236641221374045
0.6946564885496184
