In [9]:
import os
import numpy as np
import cv2

In [10]:
from google.colab import drive
drive.mount('/content/drive',  force_remount=True)

Mounted at /content/drive


## Convert Schadrack's Train Images to a New Array

In [21]:
original_root = '/content/drive/MyDrive/CapStone/data/SPLIT_DATA_WITH_OG/Train'
new_root = '/content/drive/MyDrive/CapStone/data/SPLIT_DATA_WITH_OG/Train_arr'

In [22]:
for root, dirs, files in os.walk(original_root):
  rel_path = os.path.relpath(root, original_root)
  new_dir = os.path.join(new_root, rel_path)
  os.makedirs(new_dir, exist_ok = True)
  for file in files:
    if file.lower().endswith((".jpg")):
      # convert image to npy file
      new_path = os.path.join(root, file)
      img = cv2.resize(cv2.imread(new_path), (50,50))
      arr = np.array(img)
      filename = os.path.splitext(file)[0] + ".npy"
      np.save(os.path.join(new_dir, filename), arr)

## Convert Schadrack's Val Images to a New Array

In [23]:
original_root = '/content/drive/MyDrive/CapStone/data/SPLIT_DATA_WITH_OG/Valid'
new_root = '/content/drive/MyDrive/CapStone/data/SPLIT_DATA_WITH_OG/Valid_arr'

In [24]:
for root, dirs, files in os.walk(original_root):
  rel_path = os.path.relpath(root, original_root)
  new_dir = os.path.join(new_root, rel_path)
  os.makedirs(new_dir, exist_ok = True)
  for file in files:
    if file.lower().endswith((".jpg")):
      # convert image to npy file
      new_path = os.path.join(root, file)
      img = cv2.resize(cv2.imread(new_path), (50,50))
      arr = np.array(img)
      filename = os.path.splitext(file)[0] + ".npy"
      np.save(os.path.join(new_dir, filename), arr)

## Convert Schadrack's Test Images to a New Array

In [25]:
original_root = '/content/drive/MyDrive/CapStone/data/SPLIT_DATA_WITH_OG/Test'
new_root = '/content/drive/MyDrive/CapStone/data/SPLIT_DATA_WITH_OG/Test_arr'

In [26]:
for root, dirs, files in os.walk(original_root):
  rel_path = os.path.relpath(root, original_root)
  new_dir = os.path.join(new_root, rel_path)
  os.makedirs(new_dir, exist_ok = True)
  for file in files:
    if file.lower().endswith((".jpg")):
      # convert image to npy file
      new_path = os.path.join(root, file)
      img = cv2.resize(cv2.imread(new_path), (50,50))
      arr = np.array(img)
      filename = os.path.splitext(file)[0] + ".npy"
      np.save(os.path.join(new_dir, filename), arr)

## Convert all Images in the Database and store as Arrays with size of (224, 224)

In [None]:
original_root = '/content/drive/MyDrive/CapStone/data/LOW_QUALITY_WITH_OG/'
new_root = '/content/drive/MyDrive/CapStone/data/NPY_OG_224'

In [None]:
for root, dirs, files in os.walk(original_root):
  rel_path = os.path.relpath(root, original_root)
  new_dir = os.path.join(new_root, rel_path)
  os.makedirs(new_dir, exist_ok = True)
  for file in files:
    if file.lower().endswith((".jpg")):
      # convert image to npy file
      new_path = os.path.join(root, file)
      img = cv2.resize(cv2.imread(new_path), (224,224))
      arr = np.array(img)
      filename = os.path.splitext(file)[0] + ".npy"
      np.save(os.path.join(new_dir, filename), arr)

## Create Training and Test set

In [None]:
def npread(path, size=[224, 224], resize=False, color=1, div=1):
  """
    Function to resize image to a give
    size
  """
  img_arr = np.load(path, allow_pickle=True)
  #print(img_arr.shape)
  if resize:
    img_arr = img_arr.reshape((size[0], size[1], 3))
    return img_arr/div
  else:
    return img_arr/div

def get_data(path):
    data = {}
    for folder in os.listdir(path):
        _, key = folder.split('_') # get cattle ID
        data[key] = []
        for file in os.listdir(path + folder):
            file_path = path + folder + '/' + file
            data[key].append(file_path)
    return data

def train_test_split(data, ratio = 0.2):
    train = {}
    test = {}
    for key in data.keys():
        vals = data[key]
        split = int(len(vals)*ratio)
        train[key] = vals[split:]
        test[key] = vals[:split]
    return train,test

def get_data_label(path,ratio = 0.2, resize=False, size=[224,224]):
    """
    Given path returns train and test images and labels associated with it
    """
    _data = get_data(path)
    _train,_test = train_test_split(_data, ratio = ratio)
    train_image = []
    train_labels = []
    test_image = []
    test_labels = []
    for keys, vals in _train.items():
        train_image += [np.array([npread(files, div=255, resize=resize, size=size) for files in vals])] # Do we resize? This can distort due to aspect ratio
        train_labels += [keys]

    for keys, vals in _test.items():
        test_image += [np.array([npread(files, div=255, resize=resize, size=size)  for files in vals])]
        test_labels += [keys]

    return np.array(train_image), np.array(train_labels), \
np.array(test_image), np.array(np.array(test_labels)),_test

In [None]:
datapath = '/content/drive/MyDrive/CapStone/data/NPY_OG_224/'
x_train,y_train,x_test,y_test,testfiles = get_data_label(datapath,ratio = 0.2, resize=True)

In [None]:
# save list of x_train as .npz
with open('/content/drive/MyDrive/CapStone/data/NPY_SPLIT/x_train.npz', 'wb') as f:
  np.savez(f, *x_train)

In [None]:
# save list of y_train as .npz
with open('/content/drive/MyDrive/CapStone/data/NPY_SPLIT/y_train.npz', 'wb') as f:
  np.savez(f, *y_train)

In [None]:
# save list of x_test as .npz
with open('/content/drive/MyDrive/CapStone/data/NPY_SPLIT/x_test.npz', 'wb') as f:
  np.savez(f, *x_test)

In [None]:
# save list of y_test as .npz
with open('/content/drive/MyDrive/CapStone/data/NPY_SPLIT/y_test.npz', 'wb') as f:
  np.savez(f, *y_test)

# Save all (224 x 224) mages as one Giant Concatenated Numpy Array

In [None]:
original_root = '/content/drive/MyDrive/CapStone/data/NPY_OG_224'
new_root = '/content/drive/MyDrive/CapStone/data/NPY_OG_FULL'

In [None]:
data = []
labels  = []

In [None]:
for root, dirs, files in os.walk(original_root):
  for file in files:
    # concatenate 224x224 arrays
    new_path = os.path.join(root, file)
    arr = np.load(new_path)
    label = file.split('_')[1]

    data.append(arr) # store data
    labels.append(label) # store label for data

In [None]:
# create data_arr and labels_arr
data_arr = np.array(data)
labels_arr = np.array(labels)

In [None]:
# save data and the labels
# Ensure the directory exists
os.makedirs(new_root, exist_ok=True)
np.save(os.path.join(new_root, 'data.npy'), data_arr)
np.save(os.path.join(new_root, 'labels.npy'), labels_arr)

## Save all 50x50 images of Schadrack's Train as one Array

In [27]:
original_root = '/content/drive/MyDrive/CapStone/data/SPLIT_DATA_WITH_OG/Train_arr'
new_root = '/content/drive/MyDrive/CapStone/data/NPY_OG_FULL'

data = []
labels  = []

for root, dirs, files in os.walk(original_root):
  for file in files:
    # concatenate 50x50 arrays
    new_path = os.path.join(root, file)
    arr = np.load(new_path)
    label = file.split('_')[1]

    data.append(arr) # store data
    labels.append(label) # store label for data


# create data_arr and labels_arr
data_arr = np.array(data)
labels_arr = np.array(labels)

# save data and the labels
# Ensure the directory exists
os.makedirs(new_root, exist_ok=True)
np.save(os.path.join(new_root, 'train_sch.npy'), data_arr)
np.save(os.path.join(new_root, 'labels_train_sch.npy'), labels_arr)

## Save all 50x50 images of Schadrack's Valid as one Array

In [28]:
original_root = '/content/drive/MyDrive/CapStone/data/SPLIT_DATA_WITH_OG/Valid_arr'
new_root = '/content/drive/MyDrive/CapStone/data/NPY_OG_FULL'

data = []
labels  = []

for root, dirs, files in os.walk(original_root):
  for file in files:
    # concatenate 50x50 arrays
    new_path = os.path.join(root, file)
    arr = np.load(new_path)
    label = file.split('_')[1]

    data.append(arr) # store data
    labels.append(label) # store label for data


# create data_arr and labels_arr
data_arr = np.array(data)
labels_arr = np.array(labels)

# save data and the labels
# Ensure the directory exists
os.makedirs(new_root, exist_ok=True)
np.save(os.path.join(new_root, 'valid_sch.npy'), data_arr)
np.save(os.path.join(new_root, 'labels_valid_sch.npy'), labels_arr)

## Save all 50x50 images of Schadrack's Test as one Array

In [29]:
original_root = '/content/drive/MyDrive/CapStone/data/SPLIT_DATA_WITH_OG/Test_arr'
new_root = '/content/drive/MyDrive/CapStone/data/NPY_OG_FULL'

data = []
labels  = []

for root, dirs, files in os.walk(original_root):
  for file in files:
    # concatenate 50x50 arrays
    new_path = os.path.join(root, file)
    arr = np.load(new_path)
    label = file.split('_')[1]

    data.append(arr) # store data
    labels.append(label) # store label for data


# create data_arr and labels_arr
data_arr = np.array(data)
labels_arr = np.array(labels)

# save data and the labels
# Ensure the directory exists
os.makedirs(new_root, exist_ok=True)
np.save(os.path.join(new_root, 'test_sch.npy'), data_arr)
np.save(os.path.join(new_root, 'labels_test_sch.npy'), labels_arr)

## Resize Arrays to 50x50

In [None]:
# load and reshape data
path = '/content/drive/MyDrive/CapStone/data/NPY_OG_FULL'
data = np.load(f"{path}/data.npy")

In [None]:
from skimage.transform import resize

# Assuming arr is your NumPy array of shape (224, 224, 3)
resized_arr = resize(data, (data.shape[0] ,50, 50, 3), anti_aliasing=True)

In [None]:
resized_arr.shape

(13936, 50, 50, 3)

In [None]:
# save new array
np.save(f"{path}/data50.npy", resized_arr)