<a href="https://colab.research.google.com/github/GuidoGiacomoMussini/Born_rule_Image_Classification/blob/main/DeTR_Features.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import numpy as np
import torch
from transformers import DetrForObjectDetection, DetrImageProcessor
from tensorflow.keras.datasets import cifar10
import cv2
#import timm
import pandas as pd
from tqdm.notebook import tqdm


In [None]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
device

In [None]:
# CIFAR10
(x_train, y_train), (x_test, y_test) = cifar10.load_data()
X = np.concatenate((x_train, x_test), axis=0)
y = np.concatenate((y_train, y_test), axis=0).flatten()

# DeTR
processor = DetrImageProcessor.from_pretrained('facebook/detr-resnet-50', revision="no_timm")
model = DetrForObjectDetection.from_pretrained('facebook/detr-resnet-50', revision="no_timm").to(device)

In [None]:
def extract_bounding_boxes(images, model, processor, max_objects=5, device='cuda'):
    results = []

    model.eval()
    model.to(device)

    for idx, image in tqdm(enumerate(images), total=len(images)):

        image_rgb = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
        image_rgb = Image.fromarray(image_rgb)

        # Preprocess
        inputs = processor(images=image_rgb, return_tensors="pt").to(device)

        # model outputs
        with torch.no_grad():
            outputs = model(**inputs)

        # Bounding box
        target_sizes = torch.tensor([image_rgb.size[::-1]])  # size format: (height, width)
        detection_results = processor.post_process_object_detection(outputs, target_sizes=target_sizes)[0]

        boxes = detection_results["boxes"].detach().cpu().numpy()
        scores = detection_results["scores"].detach().cpu().numpy()

        #filter the results
        sorted_indices = np.argsort(scores)[::-1]
        selected_indices = sorted_indices[:max_objects]
        filtered_boxes = boxes[selected_indices]

        # store
        results.append({
            'image_index': idx,
            'bounding_boxes': filtered_boxes.tolist()
        })

    return results


In [None]:
results = extract_bounding_boxes(X, model, processor, max_objects=5, device='cuda')

df = pd.DataFrame(results)

df['image'] = [X[idx] for idx in df['image_index']]
df['label'] = [y[idx] for idx in df['image_index']]

df.head()

In [None]:

df.to_pickle('/content/drive/MyDrive/Colab Notebooks/Universita/Tesi/Script_Definitivi/bounding_boxes_correct.pkl')

In [None]:
# df=pd.read_pickle('/content/drive/MyDrive/Colab Notebooks/Universita/Tesi/Script_Definitivi/bounding_boxes_correct.pkl')
# df['check_'] = [1 if len(df.bounding_boxes[i]) == 0 else 0 for i in range(len(df))]
# df = df[df.check_ == 0]
# df = df.drop(columns = ['image_index', 'check_']).reset_index(drop=True)

In [None]:
import numpy as np
import tensorflow as tf
from keras.applications.resnet50 import ResNet50, preprocess_input
from keras.models import Model
from keras.preprocessing.image import array_to_img, img_to_array
import cv2
import pandas as pd
from PIL import Image

def extract_rounding_box_pixel_features(image, bounding_boxes):
    features = []
    for box in bounding_boxes:
        box = [int(b) for b in box]
        cropped_image = image.crop((box[0], box[1], box[2], box[3]))
        cropped_image = cropped_image.resize((32, 32))
        cropped_image = np.array(cropped_image).flatten()
        features.append(cropped_image)
    return features

def extract_rounding_box_resnet_features(image, bounding_boxes, model):
    features = []
    for box in bounding_boxes:
        box = [int(b) for b in box]
        cropped_image = image.crop((box[0], box[1], box[2], box[3]))  # crop the image using bounding box
        cropped_image = cropped_image.resize((32, 32))  # resize
        cropped_image = cropped_image.convert("RGB")

        # img2array
        cropped_image_array = np.array(cropped_image)
        cropped_image_array = np.expand_dims(cropped_image_array, axis=0)
        cropped_image_array = preprocess_input(cropped_image_array)

        # embed w resnet
        feature_vector = model.predict(cropped_image_array)
        feature_vector = feature_vector.flatten()

        features.append(feature_vector)
    return features



In [None]:
print("Num GPUs Available: ", len(tf.config.experimental.list_physical_devices('GPU')))

base_model = ResNet50(weights='imagenet', include_top=False, input_shape=(32, 32, 3))
model_resnet = Model(inputs=base_model.input, outputs=base_model.output)

In [None]:
# extract features
data = []

for idx, row in tqdm(df.iterrows(), total=len(df)):
    image = Image.fromarray(row['image'])

    bounding_boxes = row['bounding_boxes'] #bounding box

    # pixel features
    pixel_features = extract_rounding_box_pixel_features(image, bounding_boxes)

    # ResNet features
    resnet_features = extract_rounding_box_resnet_features(image, bounding_boxes, model_resnet)

    # aggregate results
    for pixel_feature, resnet_feature in zip(pixel_features, resnet_features):
        data.append({
            'image': row['image'],
            'pixel_feature': pixel_feature,
            'resnet_feature': resnet_feature,
            'label': row['label']
        })


final_df = pd.DataFrame(data)
final_df.head()


In [None]:
final_df.to_pickle('/content/drive/MyDrive/Colab Notebooks/Universita/Tesi/Script_Definitivi/final_features.pkl')