Transfer Learning Using https://towardsdatascience.com/building-your-own-object-detector-pytorch-vs-tensorflow-and-how-to-even-get-started-1d314691d4ae tutorial

In [1]:
import pycocotools
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import torch
import torch.utils.data
from PIL import Image
from torchvision.models.detection.faster_rcnn import FastRCNNPredictor
import cv2
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
import torchvision
from torch.utils.data import Dataset, DataLoader
from torchvision import transforms, utils
import pickle

In [2]:
import re
cv2.startWindowThread()

class ExpressionImageDataset(Dataset):
    """
    An expression-level dataset.
    """
    def __init__(self, pickle_file, transform=None):
        """
        Args:
            pickle_file (string): Path to dataset pickle file.
            transform (callable, optional): Optional transform to be applied
                    on a sample.
        """
        with open(pickle_file, 'rb') as f:
            self.df_data = pd.DataFrame(pickle.load(f))
            # print(self.df_data['img_path'].iloc[1000])
            self.df_data["img_path"] = self.df_data["img_path"].apply(lambda x:"\\".join(x.split("\\")[2:]))
            # print(self.df_data['img_path'].iloc[1000])
            self.df_data["img_path"] = self.df_data["img_path"].apply(lambda x: "/content/drive/My Drive/10617 Data/"  +  re.sub(r'\\', "/", x))#/10617 Data
            #/content/drive/My Drive/10617 Data/train/images
            
            # print(self.df_data['img_path'].iloc[1000])

        self.transform = transform
    
    def __len__(self):
        return len(self.df_data)
    
    def __getitem__(self, idx):
        if torch.is_tensor(idx):
            idx = idx.tolist()
            
        row = self.df_data.iloc[idx]
                
        traces_data = row['traces_data']
        img_path = row['img_path']
        tokens = row['tokens']
        latex = row['latex']
        
        # print(img_path)
        # CV2 will read the image with white being 255 and black being 0, but since
        # our token-level training set uses binary arrays to represent images, we
        # need to binarize our image here as well.
        image_raw = cv2.imread(img_path, cv2.IMREAD_GRAYSCALE)
        image_binarized = cv2.threshold(image_raw, 127, 255, cv2.THRESH_BINARY)[1]
        image_bitmap = image_binarized / 255.0
        
        sample = {
            'image': image_binarized,
            'image_bitmap': image_bitmap,
            'traces_data': traces_data,
            'tokens': tokens,
            'latex': latex
        }
        
        if self.transform:
            sample = self.transform(sample)

        return sample

In [3]:
train_exp_path = "/content/drive/My Drive/10617 Data/train/train.pickle" #10617 Data
test_exp_path = '/content/drive/My Drive/10617 Data/test/test.pickle' #10617 Data/

# print('train')
train_exp_set = ExpressionImageDataset(train_exp_path)
# print('test')
test_exp_set = ExpressionImageDataset(test_exp_path)

In [4]:
sample = train_exp_set.__getitem__(15)
sample = test_exp_set.__getitem__(15)
# /content/drive/My Drive/10617 Data/test/images/KME1G3_0_sub_10.inkml.png

In [5]:
# train_traces_data = train_exp_set[2]['traces_data']
test_traces_data = test_exp_set[2]['traces_data']


def get_traces_data_stats(traces_data):
    all_coords = []
    for pattern in traces_data:
        for trace in pattern['trace_group']:
            all_coords.extend(trace)
        
    all_coords = np.array(all_coords)
    
    x_min, y_min = np.min(all_coords, axis=0)
    width, height = np.max(all_coords, axis=0) - [x_min, y_min] + 1
    
    return x_min, y_min, width, height

def get_trace_group_bounding_box(trace_group):
    all_coords = []
    for t in trace_group:
        all_coords.extend(t)
        
    all_coords = np.array(all_coords)
    
    x_min, y_min = np.min(all_coords, axis=0)
    width, height = np.max(all_coords, axis=0) - [x_min, y_min] + 1
    
    return x_min, y_min, width, height
    

def draw_traces_data(traces_data):
    im_x_min, im_y_min, width, height = get_traces_data_stats(traces_data)
    
    # Scale the image down.
    max_dim = 1000 # Maximum dimension pre-pad.
    sf = 1000 / max(height, width)
    scaled_height = int(height * sf)
    scaled_width = int(width * sf)
    
    image = np.ones((scaled_height, scaled_width))
    
    # Draw the traces on the unscaled image.
    for pattern in traces_data:
        for trace in pattern['trace_group']:
            trace = np.array(trace)
            trace -= np.array([im_x_min, im_y_min])
            trace = (trace.astype(np.float64) * sf).astype(int)
            
            for coord_idx in range(1, len(trace)):
                cv2.line(image, tuple(trace[coord_idx - 1]), tuple(trace[coord_idx]), color=(0), thickness=5)
            
    # Pad the scaled image.
    pad_factor = 0.05
    pad_width = ((int(pad_factor * scaled_height), int(pad_factor * scaled_height)), 
                 (int(pad_factor * scaled_width), int(pad_factor * scaled_width)))
    image = np.pad(image, 
                     pad_width=pad_width, 
                     mode='constant', 
                     constant_values=1)
    
    # Binarize.
    image = (image > 0).astype(int) 
    
    # Open CV wants images to be between 0 and 255.
    image *= 255
    image = image.astype(np.uint8)
    
    boxes = []
    
    # Get bounding boxes.
    for pattern in traces_data:
        trace_group = pattern['trace_group']
        rect_x_min, rect_y_min, rect_width, rect_height = get_trace_group_bounding_box(trace_group)
        
        rect_x_min = (rect_x_min - im_x_min) * sf + pad_width[1][0]
        rect_y_min = (rect_y_min - im_y_min) * sf + pad_width[0][0]
        
        rect_width *= sf
        rect_height *= sf
        
        # Convert bounding box coords to integers.
        rect_x_min = int(rect_x_min)
        rect_y_min = int(rect_y_min)
        rect_width = int(rect_width)
        rect_height = int(rect_height)
              
        # Draw the rectangle.
#         image = cv2.rectangle(image, 
#                               (int(rect_x_min), int(rect_y_min)), 
#                               (int(rect_x_min + rect_width), int(rect_y_min + rect_height)), 
#                               (0), 
#                               5)
        
        boxes.append((rect_x_min, rect_y_min, rect_x_min + rect_width, rect_y_min + rect_height))
    
#     plt.imshow(image, cmap='gray')
#     plt.show()
    
    return image, boxes
    
image, boxes = draw_traces_data(test_traces_data)
print(image.shape)
print(boxes)

(397, 1100)
[(50, 213, 123, 284), (164, 238, 199, 269), (338, 222, 1038, 250), (299, 123, 340, 127), (368, 70, 419, 169), (447, 109, 492, 182), (556, 18, 1049, 193), (656, 86, 711, 169), (746, 68, 773, 123), (779, 157, 806, 159), (862, 98, 916, 176), (937, 135, 983, 174), (995, 146, 1030, 183), (596, 301, 642, 378), (648, 323, 711, 374)]


### Train and Test data prep for Object recognition:
train df output: obj_df.csv

test df output: test_obj_recognition_csv.csv

Have tokens, labels (ohe), boxes (true values), latex, and numpy images

In [9]:
%%time
box_list = {}
numpy_list = {}
possible_errors = {}

for i in range(len(train_exp_set.df_data)):
  try:
    if i %1000 == 0:
      print(i)
    #get the specific row:
    curr_row = train_exp_set[i]
    test_traces_data = curr_row['traces_data']
    test_tokens = "".join(curr_row['tokens'])

    #get trace data for row:
    image, boxes = draw_traces_data(test_traces_data)

    #double check right row:
    df_token = "".join(train_exp_set.df_data.iloc[i]["tokens"])
    if test_tokens != df_token: #check to make srue traces same:
      possible_errors[i] = True
    else:
      possible_errors[i] = False
    #add to dict regardless

    box_list[i] = boxes
    numpy_list[i] = image
  except:
    print("error at {}".format(i))


err = possible_errors.values()
print(np.sum(list(err))) # #errors

0
100
200
300
400
500
600
700
800
900
1000
1100
1200
1300
1400
1500
1600
1700
1800
1900
2000
2100
2200
2300
2400
2500
2600
2700
2800
2900
3000
3100
3200
3300
3400
3500
3600
3700
3800
3900
4000
4100
4200
4300
4400
4500
4600
4700
4800
4900
5000
5100
5200
5300
5400
5500
5600
5700
5800
5900
6000
6100
6200
6300
6400
6500
6600
6700
6800
6900
7000
7100
7200
7300
7400
7500
7600
7700
7800
7900
8000
8100
CPU times: user 1min 19s, sys: 6.43 s, total: 1min 26s
Wall time: 1min 48s


In [6]:
%%time
box_list = {}
numpy_list = {}
possible_errors = {}

for i in range(len(test_exp_set.df_data)):
  try:
    if i %100 == 0:
      print(i)
    #get the specific row:
    curr_row = test_exp_set[i]
    test_traces_data = curr_row['traces_data']
    test_tokens = "".join(curr_row['tokens'])

    #get trace data for row:
    image, boxes = draw_traces_data(test_traces_data)

    #double check right row:
    df_token = "".join(test_exp_set.df_data.iloc[i]["tokens"])
    if test_tokens != df_token: #check to make srue traces same:
      possible_errors[i] = True
    else:
      possible_errors[i] = False
    #add to dict regardless

    box_list[i] = boxes
    numpy_list[i] = image
  except:
    print("error at {}".format(i))

err = possible_errors.values()
print(np.sum(list(err))) # #errors


0
100
200
300
400
500
600
700
800
900
1000
1100
1200
1300
CPU times: user 14.4 s, sys: 1.18 s, total: 15.5 s
Wall time: 13min 37s


In [15]:
import gc 
#to clear space before
gc.collect()

#OHE labels
from sklearn.preprocessing import OneHotEncoder as OHE

#want OHE labels
tokens = train_exp_set.df_data["tokens"].sum()
ohe_categories = pd.Series(tokens).unique()
ohe_categories, len(ohe_categories)

handle = "ignore" #or error or ignore... maybe ignore is safer

ohe = OHE(categories = [np.array(sorted(ohe_categories))],  handle_unknown=handle)

ohe_input = train_exp_set.df_data["tokens"].apply(lambda x: ohe.fit_transform(np.array(x).reshape(-1,1)))

print(len(ohe.categories_[0]))

train_exp_set.df_data["true_location"] = list(box_list.values())
train_exp_set.df_data["numpy_image"] = list(numpy_list.values())
train_exp_set.df_data["labels"] = ohe_input

#xport to csv
train_exp_set.df_data.to_csv("obj_df.csv")

In [22]:
#use same ohe as train:
ohe_input = test_exp_set.df_data["tokens"].apply(lambda x: ohe.fit_transform(np.array(x).reshape(-1,1)))
print(len(ohe.categories_[0]))

test_exp_set.df_data["true_location"] = list(box_list.values())
test_exp_set.df_data["numpy_image"] = list(numpy_list.values())
test_exp_set.df_data["labels"] = ohe_input

#export to csv
obj_df = test_exp_set.df_data[["tokens", "latex", "true_location", "img_path", "numpy_image", "labels"]]
obj_df.to_csv("test_obj_recognition_csv.csv")