In [1]:
import pandas as pd
import os, shutil
import numpy as np
import cv2
import matplotlib.pyplot as plt
import ast


### Adding resized and original size dimensions of images as a column to the BeetleMeasurements.csv

In [2]:
#read in CSV
beetle_df = pd.read_csv('/home/ramirez.528/2018-NEON-beetles/BeetleMeasurements.csv')
print(beetle_df.shape)

# define folders for original and resized imgs
image_folder = 'group_images'
resized_folder = '/home/ramirez.528/BeetlePalooza/beetle_images_resized'


(39064, 24)


In [4]:
#get image dimensions for originals and resized
image_dims = {}
resized_image_dims = {}
for i, p_id in enumerate(beetle_df.pictureID.unique()):
    if i % 100 == 0:
        print(i)
    try:
        im = plt.imread(f"{image_folder}/{p_id}")
        resized_im = plt.imread(f"{resized_folder}/{p_id}")
        image_dims[p_id] = im.shape
        resized_image_dims[p_id] = resized_im.shape
    except:
        print(f'File {p_id} does not exist!')

0
100
200
300
400
500
File A00000069245.jpg does not exist!


In [5]:
#fix the image that doesnt exist with a placeholder val
image_dims['A00000069245.jpg'] = (0,0,0)
resized_image_dims['A00000069245.jpg'] = (0,0,0)

In [6]:
#add columns for size dimensions of resized images
beetle_df['image_dim'] = beetle_df['pictureID']
beetle_df['resized_image_dim'] = beetle_df['pictureID']

# map pID to image dims, respectively
beetle_df['image_dim'] = beetle_df['image_dim'].map(image_dims)
beetle_df['resized_image_dim'] = beetle_df['resized_image_dim'].map(resized_image_dims)

In [14]:
beetle_df.head(2)

Unnamed: 0,pictureID,scalebar,cm_pix,individual,structure,lying_flat,coords_pix,dist_pix,dist_cm,scientificName,...,plotID,user_name,workflowID,genus,species,combinedID,measureID,file_name,image_dim,resized_image_dim
0,A00000032929.jpg,"{""x1"": 815, ""y1"": 244, ""x2"": 892, ""y2"": 244}",77.0,1,ElytraLength,Yes,"{""x1"": 1055, ""y1"": 154, ""x2"": 1163, ""y2"": 149}",108.115679,1.4041,Carabus goryi,...,HARV_001,IsaFluck,21652,Carabus,goryi,A00000032929_1,581c1309-6b06-4445-9ed5-55ebe366f6ed,group_images/A00000032929.jpg,"(3712, 5568, 3)","(1299, 1949, 3)"
1,A00000032929.jpg,"{""x1"": 815, ""y1"": 244, ""x2"": 892, ""y2"": 244}",77.0,1,ElytraWidth,Yes,"{""x1"": 1053, ""y1"": 129, ""x2"": 1057, ""y2"": 179}",50.159745,0.651425,Carabus goryi,...,HARV_001,IsaFluck,21652,Carabus,goryi,A00000032929_1,464836fd-853e-40d5-861c-8c279aec6a55,group_images/A00000032929.jpg,"(3712, 5568, 3)","(1299, 1949, 3)"


In [9]:
#save changes
# beetle_df.to_csv('BeetleMeasurements.csv', index=False)

### Rescale coordinates and add those as a column to the csv as well

In [3]:
def rescale_coordinates(old_x, old_y, old_width, old_height, new_width, new_height):
    # Calculate the scaling factors
    x_scale = new_width / old_width
    y_scale = new_height / old_height

    # Calculate the new coordinates
    new_x = old_x * x_scale
    new_y = old_y * y_scale

    return new_x, new_y

def get_scaled_coords(coords, resized_image_shape, image_shape):

    old_width, old_height = resized_image_shape[0], resized_image_shape[1]  # Original image size
    new_width, new_height = image_shape[0], image_shape[1]  # New image size

    x1_new, y1_new = rescale_coordinates(coords['x1'], coords['y1'], old_width, old_height, new_width, new_height)
    x2_new, y2_new = rescale_coordinates(coords['x2'], coords['y2'], old_width, old_height, new_width, new_height)

    new_coords = {'x1': int(x1_new), 'y1': int(y1_new), 'x2': int(x2_new), 'y2': int(y2_new)}

    return new_coords

In [4]:
#add a new column to our dataframe with the scaled coordinates
beetle_df["coords_pix_scaled_up"] = [None] * len(beetle_df)
for i, row in beetle_df.iterrows():
    try:
        coords = ast.literal_eval(row.coords_pix)
        resized_image_shape = ast.literal_eval(row.resized_image_dim) #dims of resized img (1MB)
        group_image_shape = ast.literal_eval(row.image_dim) #dims of original, larger img

        # scale coords using img size
        new_coords = get_scaled_coords(coords, resized_image_shape, group_image_shape)
        beetle_df.at[i, "coords_pix_scaled_up"] = new_coords
    except ZeroDivisionError:
        print(f"ZeroDivisionError error at row {i}")
        beetle_df.at[i, "coords_pix_scaled_up"] = (0,0,0)

ZeroDivisionError error at row 38534
ZeroDivisionError error at row 38535


In [5]:
beetle_df.resized_image_dim.tolist()[38534]

'(0, 0, 0)'

In [43]:
#save changes
# beetle_df.to_csv('/home/ramirez.528/2018-NEON-beetles/BeetleMeasurements.csv', index=False)