In [1]:
!pip install opencv-python pillow
!pip install torchvision matplotlib
!pip install pillow pandas pyarrow



In [2]:
import pandas as pd
import numpy as np
import pyarrow.parquet as pq
import cv2
import torch
import torch.nn as nn
import torch.optim as optim
import numpy as np
from torch.utils.data import Dataset, DataLoader
from torch.nn.utils.rnn import pad_sequence
import torch.nn.functional as F
import torchvision.transforms as transforms

In [3]:
easy_images_df = pd.read_parquet('easy-500/images.parquet')
easy_labels_df = pd.read_parquet('easy-500/labels.parquet')

In [4]:
easy_images_df.head(1)

Unnamed: 0,id,image
0,0,b'\xff\xd8\xff\xe0\x00\x10JFIF\x00\x01\x01\x00...


In [5]:
easy_labels_df.head(23)

Unnamed: 0,image_id,x,y,orientation,radius,class
0,0,269,450,0.0,17,0
1,0,533,299,0.663225,45,1
2,0,539,427,0.610865,46,1
3,0,365,148,0.488692,45,1
4,0,472,136,2.426008,40,1
5,0,846,448,4.660029,41,1
6,0,613,248,6.003933,41,1
7,0,287,214,2.391101,48,1
8,0,657,387,5.393067,44,1
9,0,752,311,2.792527,41,1


In [6]:
easy_images_df.shape , easy_labels_df.shape

((500, 2), (11500, 6))

In [7]:
def decode_image(image_data):
    nparr = np.frombuffer(image_data, np.uint8)
    img = cv2.imdecode(nparr, cv2.IMREAD_COLOR)
    return img

easy_images_df['image'] = easy_images_df['image'].apply(decode_image)

# Size of each image
print(f"\nShape of the first image: {easy_images_df['image'].iloc[0].shape}")


Shape of the first image: (512, 1024, 3)


In [8]:
easy_images_df.head()

Unnamed: 0,id,image
0,0,"[[[17, 123, 57], [18, 124, 58], [18, 124, 58],..."
1,1,"[[[17, 123, 57], [18, 124, 58], [18, 124, 58],..."
2,2,"[[[17, 123, 57], [18, 124, 58], [18, 124, 58],..."
3,3,"[[[17, 123, 57], [18, 124, 58], [18, 124, 58],..."
4,4,"[[[17, 123, 57], [18, 124, 58], [18, 124, 58],..."


# Data preprocessing

In [109]:
import numpy as np
import torch
import cv2
from torchvision import transforms

class Preprocessor:
    def __init__(self, images, labels, grid_size, scale_factor=0.5):
        self.grid_size = grid_size
        self.scale_factor = scale_factor
        self.images = images
        self.labels = labels
        self.transform = transforms.Compose([transforms.ToTensor()])
        
    def __len__(self):
        return len(self.images)  # Return the length of the dataset

    def __getitem__(self, idx):
        image_data = self.images.loc[idx, 'image']
        image_data = self.transform(image_data)
        labels_data = self.labels.loc[idx].values.astype(np.float32)  # Convert labels to numpy array and then to tensor
        labels_data = torch.tensor(labels_data)
        return image_data, labels_data

    def scale_image(self, image):
        width = int(image.shape[1] * self.scale_factor)
        height = int(image.shape[0] * self.scale_factor)
        resized_image = cv2.resize(image, (width, height))
        return resized_image

    def normalize_image(self, image):
        normalized_image = image / 255.0  # Assuming image is in uint8 format
        return normalized_image

    def calculate_relative_coordinates(self, x, y, radius, orientation, image_width, image_height):
        grid_width = image_width / self.grid_size[1]
        grid_height = image_height / self.grid_size[0]
        x = x * self.scale_factor
        y = y * self.scale_factor
        r = radius * self.scale_factor
        grid_x = int(x / grid_width)
        grid_y = int(y / grid_height)
        relative_x = round((x - grid_x * grid_width) / grid_width, 4)
        relative_y = round((y - grid_y * grid_height) / grid_height, 4)
        relative_radius = round(r, 4)
        o = round(orientation, 4)
        return relative_x, relative_y, relative_radius, o

    def preprocess_labels(self, image_width, image_height):
        num_images = len(self.images)
        labels_array = np.zeros((num_images, self.grid_size[0], self.grid_size[1], 8), dtype=np.float32)

        for idx, image_id in enumerate(self.labels['image_id'].unique()):
            image_labels = self.labels[self.labels['image_id'] == image_id]
            for _, row in image_labels.iterrows():
                x, y, radius, orientation = row['x'], row['y'], row['radius'], row['orientation']
                relative_x, relative_y, relative_radius, o = self.calculate_relative_coordinates(x, y, radius, orientation, image_width, image_height)

                grid_width = image_width / self.grid_size[1]
                grid_height = image_height / self.grid_size[0]
                grid_x = int(x / grid_width)
                grid_y = int(y / grid_height)

                grid_x = min(max(grid_x, 0), self.grid_size[1] - 1)
                grid_y = min(max(grid_y, 0), self.grid_size[0] - 1)

                confidence_score = 1
                class_label = int(row['class'])
                one_hot_class = np.zeros(3)
                one_hot_class[class_label] = 1

                labels_array[idx, grid_y, grid_x, 0] = relative_x
                labels_array[idx, grid_y, grid_x, 1] = relative_y
                labels_array[idx, grid_y, grid_x, 2] = relative_radius
                labels_array[idx, grid_y, grid_x, 3] = o
                labels_array[idx, grid_y, grid_x, 4] = confidence_score
                labels_array[idx, grid_y, grid_x, 5:] = one_hot_class

        return torch.tensor(labels_array)

    def preprocess(self):
        self.images['scaled_image'] = self.images['image'].apply(self.scale_image)
        self.images['normalized_image'] = self.images['scaled_image'].apply(self.normalize_image)
        labels_array = []

        for i, row in self.images.iterrows():
            image_height, image_width, _ = row['scaled_image'].shape
            labels_array.append(self.preprocess_labels(image_width, image_height))

        return self.images, torch.stack(labels_array)


In [110]:
grid_size = (7, 7)  # Define your grid size
easy_images = easy_images_df[:5]
easy_labels = easy_labels_df[:115]
preprocessor = Preprocessor(easy_images, easy_labels, grid_size=grid_size)
processed_images_df, processed_labels_df = preprocessor.preprocess()


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self.images['scaled_image'] = self.images['image'].apply(self.scale_image)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self.images['normalized_image'] = self.images['scaled_image'].apply(self.normalize_image)


In [111]:
processed_labels_df.head()

AttributeError: 'Tensor' object has no attribute 'head'

In [12]:
#processed_images_df.head()

In [13]:
#processed_labels_df.rename(columns={'class': 'class_label'}, inplace=True)

In [14]:
import matplotlib.pyplot as plt
import numpy as np
import cv2

def plot_circles_on_image_from_df(processed_images_df, processed_labels_df, grid_size, image_id):
    
    # Get image data from the DataFrame based on image_id
    image_data = processed_images_df.loc[image_id, 'image']
    
    # Check if image data is None
    if image_data is None:
        print("Error: Image data is None.")
        return
    
    # Decode image data
    image_array = np.array(image_data)
    
    # Create figure and axis
    fig, ax = plt.subplots()
    
    # Display the image
    ax.imshow(image_array)
    
    # Get image height and width
    image_height, image_width = image_array.shape[:2]
    
    # Calculate grid dimensions
    grid_width = image_width / grid_size[1]
    grid_height = image_height / grid_size[0]
    
    # Get labels for the selected image
    image_labels = processed_labels_df[processed_labels_df['image_id'] == image_id]
    
    # Iterate through labels
    scale_factor = 0.5
    
    for label in image_labels.itertuples():
        # Get relative coordinates and radius
        relative_x, relative_y, relative_radius = label.relative_x, label.relative_y, label.red_radius
        
        # Calculate the grid indices
        grid_x = int(label.x/grid_width)
        grid_y = int(label.y/grid_height)
        
        # Calculate absolute coordinates
        absolute_x = (grid_x + relative_x) * grid_width
        absolute_y = (grid_y + relative_y) * grid_height
        absolute_r = relative_radius * 2
        
        # Plot circle
        circle = plt.Circle((absolute_x, absolute_y), absolute_r, color='y', fill=False)
        ax.add_artist(circle)

    # Show plot
    plt.show()

plot_circles_on_image_from_df(processed_images_df, processed_labels_df, grid_size=(7, 7), image_id=1)


NameError: name 'processed_images_df' is not defined

# Data loader

In [116]:
dataset = Preprocessor(easy_images_df, easy_labels_df, grid_size = (7, 7))

In [117]:
dataset

<__main__.Preprocessor at 0x151c6a5d0>

In [118]:
data_loader = DataLoader(dataset, batch_size = 32, shuffle = True ) 

In [120]:
for batch in data_loader:
    images, labels = batch
    print(images.shape)
    print(labels.shape)

IndexError: too many indices for tensor of dimension 4