### Imports

In [11]:
import enum
import pandas as pd

import os
import pickle
from skimage import io

from spear.labeling import labeling_function, ABSTAIN, continuous_scorer
from spear.labeling import preprocessor

### Functions

### CLASS Declaration

In [3]:
class pixelLabels(enum.Enum):
    TEXT = 1
    NOT_TEXT = 0

### Preprocessors

In [None]:
@preprocessor(name = "LOWER_CASE")
def convert_to_lower(x):
    return x.lower().strip()

# lower = convert_to_lower("RED")
# print(lower)

### Labeling Functions

##### Stanford NLP LFs

1. Indentation (up, down or same)
2. Indentation after erasing numbering
3. Centering
4. Line Break before right margin
5. Page Change
6. Within top 15% of the page
7. Within bottom 15% of the page
8. Larger Line spacing
9. Justified with spaces in between
10. Similar text in similar position
11. Emphasis by spaces between characters
12. Emphasis by paranthesis

In [None]:
@labeling_function(label=pixelLabels.TEXT, name="DOCTR")
def DOCTR_LABEL(x):
    
    
@labeling_function(label=pixelLabels.TEXT, name="CHULL_PURE")
def CONVEX_HULL_LABEL_PURE(pixel):
    if(pixel==True):
        return ABSTAIN
    else:
        return NON_TEXT
    
@labeling_function(label=pixelLabels.TEXT, name="CHULL_NOISE")
def CONVEX_HULL_LABEL_NOISE(pixel):
    if(pixel==True):
        return TEXT
    else:
        return NON_TEXT
    

@labeling_function(label=pixelLabels.TEXT, name="SKIMAGE_EDGES")
def EDGES_LABEL(pixel):
    if(pixel==True):
        return TEXT
    else:
        return NON_TEXT
    
    
@labeling_function(label=pixelLabels.TEXT, name="PILLOW_EDGES")
def PILLOW_EDGES_LABEL(pixel):
    if(pixel==True):
        return TEXT
    else:
        return NON_TEXT
    
    
@labeling_function(label=pixelLabels.TEXT, name="PILLOW_EDGES")
def PILLOW_EDGES_LABEL(pixel):
    if(pixel==True):
        return TEXT
    else:
        return NON_TEXT

In [None]:
@labeling_function(label=pixelLabels.PIXEL,name="CHECK_PIXEL_LABELING")
def PIXEL_LABEL(x):
    '''A sample labeling function which predicts red when x is "red"
    label=1 argument in decorator indicates that this lf is corresponding to class red'''
    if(x == "red"):
        return sampleLabels.red
    else:
        return ABSTAIN
    
@continuous_scorer(name="INVERSE  SCORER")
def continious(x):
    if x<1:
        return x
    else:
        return 1/x

label, _ = sample_labeling("red")
print(label)

### Read Data

In [18]:
input_dir = './data/images/'
results_dir = './data/results/img_processing'
X = []

images = []


for img_file in os.listdir(input_dir):
    image = io.imread(input_dir + img_file)
    X.append(image)
    images.append(img_file)

### Fetch Information

In [19]:
# with open(results_dir + 'doctr/dimensions.pkl', 'rb') as f:
#     doctr = pickle.load(f)
    
with open(results_dir + 'img_processing/convex_hull_results.pkl', 'rb') as f:
    chull = pickle.load(f)
    
with open(results_dir + 'img_processing/edges_results.pkl', 'rb') as f:
    edges = pickle.load(f)
    
with open(results_dir + 'img_processing/pillow_edges_results.pkl', 'rb') as f:
    edges = pickle.load(f)

In [20]:
chull

{'docbank_test_page-0017.jpg': array([[False, False, False, ..., False, False, False],
        [False, False, False, ..., False, False, False],
        [False, False, False, ..., False, False, False],
        ...,
        [False, False, False, ..., False, False, False],
        [False, False, False, ..., False, False, False],
        [False, False, False, ..., False, False, False]]),
 'docbank_test_page-0005.jpg': array([[False, False, False, ..., False, False, False],
        [False, False, False, ..., False, False, False],
        [False, False, False, ..., False, False, False],
        ...,
        [False, False, False, ..., False, False, False],
        [False, False, False, ..., False, False, False],
        [False, False, False, ..., False, False, False]]),
 'docbank_test_page-0018.jpg': array([[False, False, False, ..., False, False, False],
        [False, False, False, ..., False, False, False],
        [False, False, False, ..., False, False, False],
        ...,
        [Fal

### LFSET

In [6]:
from spear.labeling import LFSet

In [None]:
LFS = [PIXEL_LABEL,
    LF2,
    LF3,
    LF4
      ]

rules = LFSet("DETECTION_LF")
rules.add_lf_list(LFS)

In [None]:
from spear.labeling import PreLabels

R = np.zeros((X.shape[0],len(rules.get_lfs())))

sms_noisy_labels = PreLabels(name="sms",
                               data=X,
                               data_feats = X_feats,
                               gold_labels=Y,
                               rules=rules,
                               labels_enum=ClassLabels,
                               num_classes=2)
L,S = sms_noisy_labels.get_labels()

In [None]:
from spear.labeling import LFAnalysis

analyse = sms_noisy_labels.analyse_lfs(plot=True)

result = analyse.head(16)
display(result)