# Project Model v1

## Library

###Import

In [1]:
import numpy as np
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.utils.data as utilData
import torchvision
import matplotlib.pyplot as plt
import cv2
import os
import ast
import pandas as pd

###Fcn: Split Data

In [2]:
def splitData(ratio=[0.8, 0.1, 0.1], iPath="../data/working-wheat-data/train", oPath="../saved/splitData", koPath="../data/working-wheat-data/train.csv"):
    """
    Function that takes the given input path (iPath) splits the image set into a given ratio then saves the names of images in each list to files
    output to output path (oPath)

    Arguments:
        ratio: a len-3 list containing the ratios of training, validation and testing data. default = [0.8, 0.1, 0.1]
        iPath: input path. default = "../data/working-wheat-data/train"
        oPath: output path. default = "../saved/splitData"
        koPath: known bbox output csv path
    Returns:
        boolean of 0 or 1. fail or success.
    Creates Files:
        trainData
        valData
        testData

        files are saved via torch.save. each contains a dictionary.
        Keys of dictionaries are file names
        Values of dictionaries are a list of bboxes in the form [[bbox1], [bbox2], ...]
    """

    if sum(ratio) != 1:
        print("<splitData> error in ratio: does not sum to 1")
    if len(ratio) != 3:
        print("<splitData> error in ratio: input must be of length 3")

    try: os.makedirs(oPath)  # Making directory w/ error check
    except FileExistsError: None
    except: print("<splitData> error creating folder {}".format(oPath)); return(0)
    else: None

    np.random.seed(1234)  # Shuffling the data
    files = [f for f in os.listdir(iPath) if os.path.isfile(os.path.join(iPath, f))]
    np.random.shuffle(files)

    # Splitting images
    tLen = len(files); trainIndex=round(tLen*ratio[0])
    trainList=files[:trainIndex]; remain=files[trainIndex:]
    rLen = len(remain); valIndex=round(rLen*(ratio[1]/(ratio[1]+ratio[2])))
    valList=remain[:valIndex]; testList=remain[valIndex:]

    # append .csv info into dictionary
    trainDict = appendKnownOutputs(trainList, koPath)
    valDict   = appendKnownOutputs(valList  , koPath)
    testDict  = appendKnownOutputs(testList , koPath)

    # Save dictionary
    torch.save(trainDict, oPath+"/trainData")
    torch.save(valDict  , oPath+"/valData"  )
    torch.save(testDict , oPath+"/testData" )

    return(1)

###Fcn: Append Known Outputs

In [3]:
def appendKnownOutputs(imgList, koPath):
    """
    helper function that takes a list of image file names, finds them in the train.csv file, and then creates a dictionary based on the results
    key of dictionary is the file name, value of dictionary is a list of bbox lists [[bbox1], [bbox2], ...]
    
    Arguments:
        imgList: List of image files names
        koPath : Path to the CSV file

    Returns:
        the dictionary as described above
    """
    imgDict = {}  # predefine dictionary
    db = pd.read_csv(koPath, header=0)  # Open csv
    for img in imgList:
        imgN = img.split(".jpg")[0]
        mask = db['image_id'].isin([imgN])  # Create a mask for the specific image name
        relRow = db.loc[mask]  # Find entries only with the specific image name
        imgDict[img] = [ast.literal_eval(bbox) for bbox in relRow['bbox']]  # Save all bboxes to dictionary
    return(imgDict)

###Fcn: Prev Images

In [4]:
def prevImages(dataPath="../saved/splitData/trainData", imgFolder="../data/working-wheat-data/train"):
    """
    Function to simply test images and the bboxes
    Arguments:
        dataPath: path to csv file
        imgFolder: path to images
    """
    imgDict = torch.load(dataPath)  # Load Dictionary as generated by splitData
    for i, (name, bboxs) in enumerate(imgDict.items()):
        img = cv2.imread(imgFolder+"/"+name)  # read from image folder the image requested
        # Add bboxes
        [cv2.rectangle(img,(int(bbox[0]), int(bbox[1])),(int(bbox[0]+bbox[2]), int(bbox[1]+bbox[3])),(0,0,255),3) for bbox in bboxs]
        cv2.imshow('image', img)  # Show bboxes
        cv2.waitKey(0)  # wait for key press before moving to next image
        if i > 20: break  # Break after 20 images

###Fcn: OpenCV Image Convert

In [5]:
def openCVImgConvert(func, oPath, iPath="../data/working-wheat-data/train"):
    """
    Funtion to help quickly apply an openCV image transformation and save the outputs
    Examples of Open CV features:
    https://opencv-python-tutroals.readthedocs.io/en/latest/py_tutorials/py_imgproc/py_canny/py_canny.html#canny
    https://opencv-python-tutroals.readthedocs.io/en/latest/py_tutorials/py_imgproc/py_gradients/py_gradients.html#gradients
    https://opencv-python-tutroals.readthedocs.io/en/latest/py_tutorials/py_imgproc/py_morphological_ops/py_morphological_ops.html#morphological-ops
    https://opencv-python-tutroals.readthedocs.io/en/latest/py_tutorials/py_imgproc/py_filtering/py_filtering.html#filtering
    Arguments:
        func: a function which takes in an image, transforms it, and then returns the np array
        oPath: the folder to which the new images are saved to
        iPath: the folder to the original images
    """

    files = [f for f in os.listdir(iPath) if os.path.isfile(os.path.join(iPath, f))]  # Creating a list of files in iPathDirectory
    try: os.makedirs(oPath)  # Make the requested oPath
    except FileExistsError: None
    except: print("<openCVImgConvert> error creating folder {}".format(oPath)); return(0)
    else: None
    
    for i, f in enumerate(files):  # apply the given func() to every image in file
        cv2.imwrite(oPath+"/"+f, func(cv2.imread(iPath+"/"+f)))
        if i%200==0: print("Converted {:.2f}% of images".format(100*i/len(files)))
    print("Finished Conversion of Images")

###Class: Image Loader

In [6]:
class imgLoader(utilData.Dataset):
    """
    Custom pytorch dataset for loading images
    __init__:
        Arguments:
            dataPath: path to dictionary created by splitData()
            imgPath : path to an image folder; note, as the class functions by looking up the file name, different folder directories can be given
                      with the same dataPath as long as the images in those folders have a the same name as the original folder.
    __len__ :
        Function which is called when one uses the len() function on an imgLoader Object, returns the number of images
    __getitem__(self, idx):
        Function as required by a Map-style dataset. https://pytorch.org/docs/stable/data.html#map-style-datasets
        returns:
            img: A tensor version of the image
            noBbox: The number of bboxes for this given image
            imgName: name of image (for debug purposes)
            bboxList: list of bounding boxes as defined as [[bbox1], [bbox2], ...] (for debug purposes)

    """
    def __init__(self, dataPath, imgPath):  # Defining inital variables
        self.imgDict = torch.load(dataPath)
        self.imgPath = imgPath + "/"
        self.keyList = list(self.imgDict.keys())

    def __len__(self):
        return(len(self.imgDict))

    def __getitem__(self, idx):
        imgName = self.keyList[idx]  # grab the imageName of the given idx
        img = cv2.imread(self.imgPath+imgName)  # load the image from file
        # transform the file into a tensor, convert into a float
        transform = torchvision.transforms.Compose([torchvision.transforms.ToTensor()])
        img = transform(img).float()

        # return FOUR things, as described above.
        return(img,  float(len(self.imgDict[imgName])), imgName, self.imgDict[imgName])

###Fcn: Draw Results

In [7]:
def drawResults(modelpath, iters, trainLosses, valLosses, trainAcc, valAcc):
    """
    Function used to quickly graph the results of training
    Arguments:
        modelpath             : path to save the image to
        iters                 : list of epoch numbers
        trainLosses, valLosses: lists of calculated losses per epoch
        trainAcc, valAcc      : lists of calculated accuracies per epoch
    """
    plt.plot(iters, trainAcc, '.-', label =  "Training")
    plt.plot(iters,   valAcc, '.-', label = "Validation")
    plt.title("Model Accuracy against Epoch No")
    plt.xlabel("Epoch"); plt.ylabel("Accuracy")
    plt.legend(); plt.grid()
    plt.savefig(modelpath+"Accuracy Graph.png")
    plt.show()
    plt.cla()

    plt.plot(iters, trainLosses, '.-', label =   "Training")
    plt.plot(iters,   valLosses, '.-', label = "Validation")
    plt.title("Model Loss against Epoch No")
    plt.xlabel("Epoch"); plt.ylabel("Loss")
    plt.legend(); plt.grid()
    plt.savefig(modelpath+"Loss Graph.png")
    plt.show()

###Fcn: Load Data

In [8]:
def loadData(batchsize, dictPath = "../saved/splitData", inPath = "../data/working-wheat-data/train"):
    """
    Function to quickly batch generate a DataLoader
    Arguments:
        batchsize: requested batchsize
        dataPath : path to dictionary created by splitData()
        inPath   : path to an image folder; note, as the class functions by looking up the file name, different folder directories can be given
                      with the same dataPath as long as the images in those folders have a the same name as the original folder.
    Returns:
        trainLoader, valLoder, testLoader: The DataLoaders batched as reqested
    """
    trainData = imgLoader(dictPath+"/trainData", inPath)
    valData   = imgLoader(dictPath+"/valData"  , inPath)
    testData  = imgLoader(dictPath+"/testData" , inPath)

    trainLoader = utilData.DataLoader(trainData, batch_size=batchsize, shuffle=1)
    valLoader   = utilData.DataLoader(valData  , batch_size=batchsize, shuffle=1)
    testLoader  = utilData.DataLoader(testData , batch_size=batchsize, shuffle=1)

    return(trainLoader, valLoader, testLoader)

###Fcn: Evaluate Regression

In [9]:
def evalRegress(net, loader, criterion, optimizer, isTraining, gpu=1):
    """
    Function used in trainNet() to evaluate a given net for one epoch
    Arguments:
        net       : The net object
        loader    : the loader whose images are being put through the network for evaluation
        criterion : the criterion function
        optimizer : the optimizer function
        isTraining: Boolean to indicate if training should occur with evaluation, if True, optimizer will perform step
        gpu       : Boolean to indicate if cuda is to be utilized
    Returns:
        Accuracy: The calculated accuracy over the epoch
        avgLoss : The calculated average loss over the entire epoch
    """
    lossTot = 0
    for img, noBbox, _, _ in loader:  # if isTraining, computing loss and training, if not, then computing loss
        if gpu and torch.cuda.is_available(): img = img.cuda(); noBbox = noBbox.cuda(); noBbox = noBbox.float()
        pred = net(img); pred=torch.squeeze(pred, 1)
        loss = criterion(pred, noBbox); lossTot += loss
        if isTraining:
            loss.backward()
            optimizer.step()
            optimizer.zero_grad()
    
    acc = []
    for img, noBbox, _, _ in loader:  # Computing accuracy
        if gpu and torch.cuda.is_available: img = img.cuda(); noBbox = noBbox.cuda()
        pred = net(img)
        acc += [torch.sum((pred-noBbox)**2)]

    accuracy = sum(acc)/len(loader)
    avgLoss = lossTot/len(loader)
    return(accuracy, avgLoss)

###Fcn: Train Net

In [10]:
def trainNet(net, data, batchsize, epochNo, lr, oPath="../saved", trainType='RegAdam', evaluate=evalRegress, isCuda=1, draw=1):
    """
    Big boy function that actually brings all of the function above together and actually trains the model
    Arguments:
        net      : the neural net object
        data     : a 2 list of data loaders; [trainLoader, valLoader]
        batchsize: the chosen batchsize
        epochNo  : the chosen max epochNo
        lr       : the chosen learning rate
        oPath    : root output path for all files. if '/root' is given, will save to '/root/TrainingRuns/<Folder>/
        trainType: string used to easily choose particular parameters such as criterion or optimizer
        evaluate : name of the evaluation function, default to evalRegress
        isCuda   : boolean to indicate if cuda should be used
        draw     : boolean to indicate if the graph should be drawn
    Returns:
        iters, trainLosses, valLosses, trainAcc, valAcc: for debug purposes. All lists of the values at each epoch
    """
    # Defining a saving path for ease of use
    if trainType == 'RegAdam':
        # Define criterion and optimizers
        criterion = nn.MSELoss()
        optimizer = torch.optim.Adam(net.parameters(), lr=lr)
        functionName = "RegAdamTrainer"  # Name of the function used (incase we decide to use different optimizers, use alexnet etc)

    modelpath = oPath+"/TrainingRuns/{}/{}_b{}_te{}_lr{}/".format(functionName, net.name, batchsize, epochNo, lr)
    torch.manual_seed(1000)
    try: os.makedirs(modelpath)  # Make the directory
    except FileExistsError: None
    except: print("Error Creating File"); return()
    else: None

    trainData, valData = data[0], data[1]  # Loading Required Data

    iters, trainLosses, valLosses, trainAcc, valAcc = [], [], [], [], []  # variables to graph and save
    for epoch in range(epochNo):
        if isCuda and torch.cuda.is_available():
            start = torch.cuda.Event(enable_timing=True)
            end = torch.cuda.Event(enable_timing=True)

        if isCuda and torch.cuda.is_available(): start.record()
        iters += [epoch]
        #evaluate(net=net, loader=trainData, criterion=criterion, optimizer=optimizer, isTraining=True)
        trainResults = evaluate(net=net, loader=trainData, criterion=criterion, optimizer=optimizer, isTraining=True)  # Calculating training error and loss
        valResults   = evaluate(net=net, loader=  valData, criterion=criterion, optimizer=optimizer, isTraining=False)

        if isCuda and torch.cuda.is_available(): end.record; torch.cuda.synchronize()

        trainLosses += [trainResults[0]]; trainAcc += [trainResults[1]]  # Appending results
        valLosses   += [  valResults[0]]; valAcc   += [  valResults[1]]
        torch.save(net.state_dict(), modelpath+"model_epoch{}".format(epoch))

        print("Epoch {} | Time Taken: {:.2f}s | Train acc: {:.10f},\
                Train loss: {:.10f} | Validation acc: {:.10f}, Validation loss: {:.10f}\
                ".format(epoch, start.elapsed_time(end)*0.001, trainAcc[epoch], trainLosses[epoch], valAcc[epoch], valLosses[epoch]))

    if draw: drawResults(modelpath, iters, trainLosses, valLosses, trainAcc, valAcc)
    return(iters, trainLosses, valLosses, trainAcc, valAcc)

###Sample code to run training module

In [11]:
""" Eg (1)
### Use example for imageLoader

dictPath = "../saved/splitData/trainData"
inPath   = "../data/working-wheat-data/train"

loader   = imgLoader(dictPath, inPath)
for imgName, img, output in loader:
    ...
"""

""" Eg (2)
### Use example for openCVImgConvert

outPath = "../data/working-wheat-data/cv2_Canny_100_200"
inPath  = "../data/working-wheat-data/train"

edgeDetect = lambda oImg: cv2.Canny(oImg, 100, 200)  # The
openCVImgConvert(edgeDetect, outPath, inPath)  # Note: the images that are outputted have the same name as the original images, they are just in a different folder
"""

""" Eg (3)
### Use example for imageLoader, with the feature-detected images produced from Eg (2)

dictPath = "../saved/splitData/trainData"  # NOTE: DICT PATH IS THE SAME AS WAS IN Eg (1). IT DOES NOT NEED TO CHANGE
inPath   = "../data/working-wheat-data/cv2_Canny_100_200"  # ONLY THE IMAGE DIRECTORY HAS CHANGED

loader   = imgLoader(dictPath, inPath)
for imgName, img, output in loader:
    ...
"""

""" Eg (4)
### Use example of prevImages with converted images from Eg (2)
dictPath = "../saved/splitData/trainData"  # NOTE: DICT PATH IS THE SAME AS WAS IN Eg (1). IT DOES NOT NEED TO CHANGE
inPath   = "../data/working-wheat-data/cv2_Canny_100_200"  # ONLY THE IMAGE DIRECTORY HAS CHANGED

prevImages(dataPath=dictPath, imgFolder=inPath)
"""

"""
# BASE CODE FOR a neural net module to push into trainNet class
class exNetClass(nn.Module):
    def __init__(self, name):
        super(exNetClass, self).__init__()
        self.name = name

        self.conv1 = nn.Conv2d(3,   15,  6, stride=2)  # n = 1024 -> 510
        self.conv2 = nn.Conv2d(15,  30,  6, stride=2)  # n = 510  -> 255
        self.pool1 = nn.MaxPool2d(3, 2)                # n = 255  -> 127
        self.conv3 = nn.Conv2d(30,  60,  6, stride=2)  # n = 127  -> 62
        self.pool2 = nn.MaxPool2d(4, 2)                # n = 62   -> 30

        self.fc1   = nn.Linear(29*29*60, 20)
        self.fc2   = nn.Linear(20, 1)

    def forward(self, x):
        x = self.pool1(F.relu(self.conv1(x)))
        x = F.relu(self.conv2(x))
        x = self.pool2(F.relu(self.conv3(x)))
        x = x.view(-1, 29*29*60)
        x = F.relu(self.fc1(x))
        x = self.fc2(x)
        return(x)


batchsize=64; lr=0.001; epochNo=10
trainLoader, valLoader, _ = loadData(batchsize)
netA = exNetClass("netA"); netA.cuda()
netATrain = trainNet(netA, [trainLoader, valLoader], batchsize, epochNo, lr)
netATrain.train()
"""

'\n# BASE CODE FOR a neural net module to push into trainNet class\nclass exNetClass(nn.Module):\n    def __init__(self, name):\n        super(exNetClass, self).__init__()\n        self.name = name\n\n        self.conv1 = nn.Conv2d(3,   15,  6, stride=2)  # n = 1024 -> 510\n        self.conv2 = nn.Conv2d(15,  30,  6, stride=2)  # n = 510  -> 255\n        self.pool1 = nn.MaxPool2d(3, 2)                # n = 255  -> 127\n        self.conv3 = nn.Conv2d(30,  60,  6, stride=2)  # n = 127  -> 62\n        self.pool2 = nn.MaxPool2d(4, 2)                # n = 62   -> 30\n\n        self.fc1   = nn.Linear(29*29*60, 20)\n        self.fc2   = nn.Linear(20, 1)\n\n    def forward(self, x):\n        x = self.pool1(F.relu(self.conv1(x)))\n        x = F.relu(self.conv2(x))\n        x = self.pool2(F.relu(self.conv3(x)))\n        x = x.view(-1, 29*29*60)\n        x = F.relu(self.fc1(x))\n        x = self.fc2(x)\n        return(x)\n\n\nbatchsize=64; lr=0.001; epochNo=10\ntrainLoader, valLoader, _ = loadDat

## Model

In [None]:
dataPath = ''
imgPath  = ''
img_dataset = imgLoader(dataPath=dataPath, imgPath=imgPath)