In [3]:
import pickle 
import pandas as pd 
import os
import numpy as np 
import cv2
from keras.preprocessing import image 
from sklearn.model_selection import train_test_split
import matplotlib.pyplot as plt
from keras.utils import to_categorical
from PyFunctions import var
import random



def get_edged(img, dim): 
    '''This function will convert an image into an edged version using Gaussian filtering''' 
    blurred = cv2.GaussianBlur(img, (3,3), 0)
    wide = cv2.Canny(blurred, 10,200)
    wide = cv2.resize(wide, dim, interpolation = cv2.INTER_CUBIC)
    return wide


def get_image_value(path, dim, edge = False): 
    '''This function will read an image and convert to a specified version and resize depending on which algorithm is being used.  If edge is specified as true, it will pass the img array to get_edged which returns a filtered version of the img'''
    if edge == True: 
        img = cv2.imread(path)
        edged = get_edged(img, dim)
        return edged
    else: 
        img = image.load_img(path, target_size = dim)
        img = image.img_to_array(img)
        return img/255

def get_img_array(img_paths, dim, edge): 
    '''This fucntion takes a list of image paths and returns the np array corresponding to each image.  It also takes the dim and whether edge is specified in order to pass it to another function to apply these parameters.  This function uses get_image_value to perform these operations'''
    from tqdm import tqdm
    final_array = []
    for path in tqdm(img_paths): 
        img = get_image_value(path, dim, edge)
        final_array.append(img)
    final_array = np.array(final_array)
    if edge:
        return final_array.reshape(final_array.shape[0], final_array.shape[1], final_array.shape[2], 1)
    else: 
        return final_array
        
def get_pickles(nn_type, edge = False, balance = False):
    '''This function will creates a pickled file given the type of neural network architecture.  
    Using the Var.py file, the function will determine the specified dimension of the algorithm and create pickles given the NN type.  For this function to work, you must create a folder outside the repo called Pickles'''
    if nn_type == 'normal': 
        DIM =  var.norm_dimension 
    elif nn_type == 'mobilenet': 
        DIM = var.mobilenet_dimension
    
    elif nn_type == 'inceptionnet': 
        DIM = var.inception_dimension
        
    elif nn_type == 'vgg16': 
        DIM = var.vgg_dimension
    elif nn_type == 'alexnet': 
        DIM = var.alex_dimension

#Using Seperated ROI ang hand data 
    pistol_paths = [f'../Separated/FinalImages/Pistol/{i}' for i in os.listdir('../Separated/FinalImages/Pistol')] 

    rifle_paths = [f'../Separated/FinalImages/Rifle/{i}' for i in os.listdir('../Separated/FinalImages/Rifle')] 
    
    neg_paths = [f'../Separated/FinalImages/NoWeapon/{i}' for i in os.listdir('../Separated/FinalImages/NoWeapon')]
    random.shuffle(neg_paths)
    neg_paths = neg_paths[:len(pistol_paths)]
    
    if balance == True: 
        random.shuffle(pistol_paths)
        pistol_paths = pistol_paths[:len(rifle_paths)+150]
        neg_paths = neg_paths[:len(rifle_paths)+150]
        
        
    pistol_labels = [1 for i in range(len(pistol_paths))]
    rifle_labels = [2 for i in range(len(rifle_paths))]    
    neg_labels = [0 for i in range(len(neg_paths))]
    
    print(len(pistol_paths), len(rifle_paths), len(neg_paths))

    paths = pistol_paths + rifle_paths + neg_paths
    labels = pistol_labels + rifle_labels + neg_labels
    
    x_train, x_test, y_train, y_test = train_test_split(paths, labels, stratify = labels, train_size = .90, random_state = 10)

    if edge == True:      
        new_x_train = get_img_array(x_train, DIM, edge = True)
        new_x_test = get_img_array(x_test, DIM, edge = True)
    else: 
        new_x_train = get_img_array(x_train, DIM, edge = False)
        new_x_test = get_img_array(x_test, DIM, edge = False)
    
    print('Train Value Counts')
    print(pd.Series(y_train).value_counts())
    print('~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~')
    print('Test Value Counts')
    print(pd.Series(y_test).value_counts())
    print('~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~')
    print('X Train Shape')
    print(new_x_train.shape)
    print('~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~')
    print('X Test Shape')
    print(new_x_test.shape)
    print('~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~')

    y_train = np.array(y_train)
    y_test = np.array(y_test)
    
    tts = (new_x_train, new_x_test, y_train, y_test)
    if edge == True:
        pickle.dump(tts, open(f'../Pickles/edge_{nn_type}_tts.p', 'wb'), protocol=4)
    else:
        pickle.dump(tts, open(f'../Pickles/{nn_type}_tts.p', 'wb'), protocol=4)
    
    return tts
        
        
def get_samples(nn_type, edge = False): 
    '''After performing the get_pickles function above, this function can be used to retrieve the pickled files given a specific NN type.  '''
    if edge == True: 
        x_train, x_test, y_train, y_test = pickle.load(open(f'../Pickles/edge_{nn_type}_tts.p', 'rb'))
    
    else: 
        x_train, x_test, y_train, y_test = pickle.load(open(f'../Pickles/{nn_type}_tts.p', 'rb'))
    
    y_test = to_categorical(y_test)
    y_train = to_categorical(y_train)

    return x_train, x_test, y_train, y_test

In [5]:
x_train, x_test, y_train, y_test = get_pickles('normal', edge = False, balance = True)

1520 1370 1520


100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 3969/3969 [00:13<00:00, 289.38it/s]
100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 441/441 [00:04<00:00, 99.61it/s]


Train Value Counts
1    1368
0    1368
2    1233
dtype: int64
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
Test Value Counts
1    152
0    152
2    137
dtype: int64
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
X Train Shape
(3969, 150, 150, 3)
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
X Test Shape
(441, 150, 150, 3)
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~


In [None]:
from PyFunctions.Functions import *
import cv2

- This notebook involves creating pickled train_test_splits that correspond to a specific dimension and image type.
- Passing `edge = True` creates a pickled file that contains edged photos rather than the originals 
- this notebook was created separately in order to test how it works and not interupt the flow of the CNN notebook
- To change the dimensions of the models, alter the variables found in Var.py within the PyFunctions folder

In [None]:
x_train, x_test, y_train, y_test = get_pickles('normal', edge = True)

In [None]:
#See if the edging worked
cv2.imshow('test', x_train[0])
cv2.waitKey(0)
cv2.destroyAllWindows()

In [None]:
# get_pickles('mobilenet')

In [None]:
# get_pickles('vgg16')