In [2]:
import sys
import os
import time 
import gc
from PIL import Image
import time
import requests
import torch
from torch import nn
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import pickle
import cv2
import torchvision.transforms.functional as F
import torchvision.models.segmentation
from torchvision.models.detection import maskrcnn_resnet50_fpn_v2, MaskRCNN_ResNet50_FPN_V2_Weights
from torchvision import transforms as transforms3
from torchvision.utils import draw_bounding_boxes


In [None]:
# Check CUDA availability and GPU information
print('Is Cuda available: ', str(torch.cuda.is_available()), 
      '\nAvailable GPUs: ', str(torch.cuda.device_count()), 
      '\nList of GPUs: \n', [torch.cuda.get_device_name(i) for i in range(torch.cuda.device_count())])

In [4]:
# Load images change path if necessar to wherever image_date_data_.pkl is 
pkl_path = '/home/adm_gpu/projects/masterthesis/code/advertisements/image_date_data_.pkl'
with open(pkl_path, 'rb') as f:
    og_l_images = pickle.load(f)

final_frame = pd.DataFrame(og_l_images[:], columns=['journal', 'date', 'image'])

In [None]:
final_frame.head()

In [None]:
raw_images = final_frame.image.tolist()
print('Loaded Images: ', str(len(raw_images)))

In [None]:
target_size = (600, 800)  # Adjust dimensions

# As most of the images are scans we need to resize them for the model to guarantee uniform dimensions
def resize_and_pad(image, target_size):

    resized_image = image.resize(target_size)

    new_image = Image.new("RGB", target_size, (255, 255, 255))
    
    new_image.paste(resized_image, ((target_size[0] - resized_image.size[0]) // 2, (target_size[1] - resized_image.size[1]) // 2))

    return new_image

# Resize images
resized_images = [resize_and_pad(img, target_size) for img in raw_images]

# verify amount of images
print("Resized images: ", len(resized_images))

In [8]:

# Model setup
weights = MaskRCNN_ResNet50_FPN_V2_Weights.DEFAULT
transforms = weights.transforms()

model = maskrcnn_resnet50_fpn_v2(weights=weights, progress=False).cuda() # important to leverage cuda
model = torch.nn.DataParallel(model, device_ids=[0]) # does not work with multi gpu 
model = model.eval()



In [None]:
# measure run
start_time = time.time()

# Process images in batches
segmented_images = []

# categories from model weights metadata
categories_list = weights.meta['categories']

# transform images in batch to cuda due to vram limit
for i in range(0, len(resized_images), 2 ): 
    selection = resized_images[i:i + 2]
    
    transformed = [transforms3.functional.pil_to_tensor(element).to('cuda') for element in selection]
    images = [transforms(element) for element in transformed]

    print(len(images))

    batch_tensor = torch.stack(images).to('cuda')
    with torch.no_grad():
        outputs = model(batch_tensor)

    print("Images annotated: " + str(len(outputs)))

    for output in outputs:

        labels_tensor = output['labels']
        scores_tensor = output['scores']

        filtered_labels = labels_tensor[scores_tensor > 0.65]
        segmented_images.append([categories_list[label.item()] for label in filtered_labels])
print(len(segmented_images))

final_frame['information'] = segmented_images

end_time = time.time()
elapsed_time = end_time - start_time

print('Elapsed Time: ', elapsed_time, '\n', 30 * '=', '\n', segmented_images[0:26:5])

In [None]:
final_frame.shape

In [None]:
final_frame.head()

In [None]:

# change if necessary
path = '/home/adm_gpu/projects/masterthesis/code/main/dataframe.pkl'

start_time = time.time()

with open(path, 'wb') as file:
    pickle.dump(final_frame, file)

end_time = time.time()
time_taken = end_time - start_time

print(f"Time taken to save final_frame: {time_taken:.4f} seconds")

## Verfify write operation

In [13]:
# Load frame
path = '/home/adm_gpu/projects/masterthesis/code/main/dataframe.pkl'
with open(path, 'rb') as f:
    dataframe = pickle.load(f)

In [None]:
dataframe.head()

In [None]:
dataframe.shape

## Textfulness

In [1]:
# imports 
from transformers import CLIPSegProcessor, CLIPSegForImageSegmentation
from PIL import Image
import time
import requests
import torch
from torch import nn
import matplotlib.pyplot as plt
import numpy as np 
import pandas as pd

import pickle

# check version 
print(transformers.__version__)

In [None]:
# pre checks
print(torch.cuda.is_available(), '\n',
torch.cuda.device_count(), '\n',
torch.cuda.get_device_name(0))

In [None]:
# download and initiate model
processor = CLIPSegProcessor.from_pretrained("CIDAS/clipseg-rd64-refined")
model = CLIPSegForImageSegmentation.from_pretrained("CIDAS/clipseg-rd64-refined")

# set processor to the same dimensions as the model
processor.image_processor.size = {"height": 224, "width": 224}

In [None]:
# load prepared data 

path = '/home/adm_gpu/projects/masterthesis/code/main/dataframe.pkl'
with open(path, 'rb') as f:
    pictures = pickle.load(f)

# check if load was successful 
pictures.head()

In [None]:
# verify image format 
l_images = list(pictures['image'])
l_images[:3]

In [None]:
[im.resize((224, 224)) for im in l_images[:3]]

In [None]:
prompts = ['text', 'background', 'person']

In [None]:
# initialze multiple pandsa series to store information 

prompt_dict = {
    'text': pd.Series(dtype='float'),
    'logo': pd.Series(dtype='float'),
    'background': pd.Series(dtype='float'),
    'person': pd.Series(dtype='float'),
    'landscape': pd.Series(dtype='float')
}


# resize to model spec, this will distort DIN A4 pages as they are not square 
for image in [im.resize((224, 224)) for im in l_images]:
  start = time.time()
  for i in range(1,len(prompts)):
    tolist = prompts[:i+1]

    inputs = processor(text=tolist, images=[image] * len(tolist), padding="max_length", return_tensors="pt")
    # predict
    with torch.no_grad():
      outputs = model(**inputs)

    preds = nn.functional.interpolate(
        outputs.logits.unsqueeze(1),
        size=(image.size[1], image.size[0]),
        mode="bilinear"
    )
  
  

  for k in range(len(prompts)):

    iterres = torch.sigmoid(preds[k][0])

    thresh = 0.1

    positives = (iterres > thresh).sum().item()
    total_elements = iterres.numel()

    perc = round(positives / total_elements, 4)

    prompt_dict[prompts[k]] = pd.concat([prompt_dict[prompts[k]], pd.Series(perc)], ignore_index=True)
    #print(prompts[k], perc)

  end = time.time()
  diff = end - start
  print(diff)

In [None]:
path = '/home/adm_gpu/projects/masterthesis/code/main/prompt_dict.pkl'

start_time = time.time()

with open(path, 'wb') as file:
    pickle.dump(prompt_dict, file)

end_time = time.time()
time_taken = end_time - start_time

In [None]:
prompt_dict['background']

In [None]:
path = '/home/adm_gpu/projects/masterthesis/code/main/annotated_frame.pkl'

start_time = time.time()

with open(path, 'wb') as file:
    pickle.dump(pd.concat([pictures, pd.DataFrame(prompt_dict).drop(['logo', 'landscape'], axis = 1)], axis = 1), file)

end_time = time.time()
time_taken = end_time - start_time