## Instance segmentation

In [None]:
!pip3 install pycocotools pixellib

In [None]:
from matplotlib import pyplot as plt
import numpy as np

In [None]:
  import pixellib
  from pixellib.torchbackend.instance import instanceSegmentation

In [None]:
  ins = instanceSegmentation()
  ins.load_model("/content/pointrend_resnet50.pkl")


In [None]:
  segmask, output = ins.segmentImage("/content/sample2.jpg", show_bboxes=True, output_image_name="/content/sample2_segmented.jpg")

In [None]:
# Read segmask example instead of running the whole model each time
import pickle

# Read dictionary pkl file
with open('/content/segmask_example.pkl', 'rb') as fp:
    segmask = pickle.load(fp)

In [None]:
def get_instance_area(segmask):
  '''
  Returns the area of all objects in the image
  '''
  instances = np.sum(segmask['masks'], axis = 2)
  return instances

In [None]:
def instance_sizes(segmask):
  '''
  Returns a list of all instances in the image and their sizes in pixels,
  sorted by the instance size
  '''
  classes_with_sizes = []
  class_names = segmask['class_names']
  n_objects = len(class_names)
  for i in range(n_objects):
    object_name = class_names[i]
    obect_size = segmask['masks'][:,:,i].sum()
    classes_with_sizes.append((object_name, obect_size))
  return classes_with_sizes


In [None]:
from numpy.core.fromnumeric import shape
def create_straight_text_box(segmask, x_tl, y_tl, x_br, y_br):
  '''
  Retuns an image in the size of the given image with all values set to False
  except for the borders of the text box that are given as input.
  I created this function myself and it can only create straight angled boxes.
  '''
  image_shape = segmask['masks'].shape[:2]
  if x_br > image_shape[1] or y_br > image_shape[0]:
    print('Text box coordinates out of image range')
    return False
  text_box = np.zeros(shape = image_shape )
  text_box[y_tl:y_br, x_tl]  = 1
  text_box[y_br, x_tl:x_br] = 1
  text_box[y_tl:y_br, x_br] = 1
  text_box[y_tl, x_tl:x_br] = 1

  return text_box.astype(dtype = bool)



In [None]:
import numpy as np
from skimage.draw import polygon
import math

def create_rotated_text_box(segmask, x_tl, y_tl, width, height, angle_deg):
    # Convert angle from degrees to radians
    angle_rad = math.radians(angle_deg)

    # Rest of the code remains unchanged
    image_shape = segmask['masks'].shape[:2]
    if x_tl > image_shape[1] or y_tl > image_shape[0]:
        print('Text box top-left corner coordinates out of image range')

    # Calculate the four corners of the rectangle
    box_corners = np.array([[x_tl, y_tl],
                            [x_tl + width, y_tl],
                            [x_tl + width, y_tl + height],
                            [x_tl, y_tl + height]])

    # Rotate the box corners around the center
    rotation_matrix = np.array([[np.cos(angle_rad), -np.sin(angle_rad)],
                                [np.sin(angle_rad), np.cos(angle_rad)]])
    rotated_box_corners = np.dot(box_corners - [x_tl + width / 2, y_tl + height / 2], rotation_matrix.T) + [x_tl + width / 2, y_tl + height / 2]

    # Create an empty mask
    text_box = np.zeros(image_shape, dtype=bool)

    # Fill the rotated box polygon with ones
    rr, cc = polygon(rotated_box_corners[:, 1], rotated_box_corners[:, 0])
    text_box[rr, cc] = 1

    return text_box



In [None]:
def is_text_box_in_dead_area(instance_area, text_box):
    return np.all(instance_area * text_box == 0)

In [None]:
def text_box_location_candidates(segmask, width, height, angle_deg):
    instance_area = get_instance_area(segmask)
    image_shape = segmask['masks'].shape[:2]
    candidates = np.zeros(shape=image_shape, dtype=bool)
    for x in range(image_shape[1] - width ):
        for y in range(image_shape[0] - height ):
            text_box = create_rotated_text_box(segmask, x, y, width, height, angle_deg)
            candidates[y, x] = is_text_box_in_dead_area(instance_area, text_box)
    return candidates


In [None]:
def bounding_box_intersection(bb1, bb2):
    # Function to check if two bounding boxes intersect
    x1, y1, w1, h1 = bb1
    x2, y2, w2, h2 = bb2
    return not (x1 + w1 < x2 or x2 + w2 < x1 or y1 + h1 < y2 or y2 + h2 < y1)


In [None]:
from scipy.signal import convolve2d
from scipy.ndimage import binary_erosion, binary_dilation


def gpt_text_box_location_candidates(segmask, width, height, angle_deg):
    image_shape = segmask['masks'].shape[:2]
    boxes = segmask['boxes']
    candidates = np.zeros(shape=image_shape, dtype=bool)
    instance_area = get_instance_area(segmask)

    for y in range(image_shape[0] - height - 1):
        for x in range(image_shape[1] - width - 1):
            if not np.any(candidates[y:y+height, x:x+width]):
                text_box = create_rotated_text_box(segmask, x, y, width, height, angle_deg)
                intersect = False
                for box in boxes:
                    y_min, x_min, y_max, x_max = box
                    if (x <= x_min < x + width or x <= x_max < x + width) and (y <= y_min < y + height or y <= y_max < y + height):
                        intersect = True
                        break
                if not intersect:
                    candidates[y, x] = is_text_box_in_dead_area(instance_area, text_box)

    return candidates

## Text region detection

In [None]:
!pip3 install pytesseract

Collecting pytesseract
  Downloading pytesseract-0.3.10-py3-none-any.whl (14 kB)
Installing collected packages: pytesseract
Successfully installed pytesseract-0.3.10


In [None]:
import cv2
import numpy as np
import pytesseract
from google.colab.patches import cv2_imshow

pytesseract.pytesseract.tesseract_cmd = r"C:\Program Files\Tesseract-OCR\tesseract.exe"

# Load image, convert to HSV format, define lower/upper ranges, and perform
# color segmentation to create a binary mask
image = cv2.imread('1.jpg')
hsv = cv2.cvtColor(image, cv2.COLOR_BGR2HSV)
lower = np.array([0, 0, 218])
upper = np.array([157, 54, 255])
mask = cv2.inRange(hsv, lower, upper)

# Create horizontal kernel and dilate to connect text characters
kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (5,3))
dilate = cv2.dilate(mask, kernel, iterations=5)

# Find contours and filter using aspect ratio
# Remove non-text contours by filling in the contour
cnts = cv2.findContours(dilate, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
cnts = cnts[0] if len(cnts) == 2 else cnts[1]
for c in cnts:
    x,y,w,h = cv2.boundingRect(c)
    ar = w / float(h)
    if ar < 5:
        cv2.drawContours(dilate, [c], -1, (0,0,0), -1)

# Bitwise dilated image with mask, invert, then OCR
result = 255 - cv2.bitwise_and(dilate, mask)
data = pytesseract.image_to_string(result, lang='eng',config='--psm 6')
print(data)

cv2_imshow( mask)
cv2_imshow( dilate)
cv2_imshow( result)
cv2.waitKey()

error: ignored

## ChatGPT suggestions


In [None]:
!pip3 install openai

In [None]:
import openai
openai.api_key = open('/content/chatgpt_api_key.txt').read().strip('\n')

In [None]:

def get_raw_suggestions(segmask):
    instance_list = instance_sizes(segmask)
    message = f'here is a list of objects extracted from an image using\
     instance segmentation. The number next to each object represents the size\
      of the object in pixels: {instance_list}.\nI want a few suggestions for\
       captions and hashtags for an Instagram post and overlay text for a story.\
        Give me your result as a python list of tuples\
         (caption, hashtags, overlay text). I want only the list and no \
         other text so that I can easily process the response in python.\
         Limit the response to 5 suggestions'

    completion = openai.ChatCompletion.create(
        model='gpt-3.5-turbo',
        messages=[
            {'role': 'system', 'content': message}
        ]
    )

    suggestions = completion
    return suggestions

def get_suggestions(segmask):
  raw_suggestions = get_raw_suggestions(segmask)
  suggestions = eval(raw_suggestions['choices'][0]['message']['content'])
  return(suggestions)

In [None]:
d = get_suggestions(segmask)

In [None]:
print(d)

[('Enjoying a day out with friends! 🌞 #outdooradventures #friends', '#instadaily #instagood #fun', 'Having a blast'), ('City vibes! 🏙️ #urbanlife #cityscape', '#city #explore #architecture', 'Exploring the city'), ('Cruising through traffic in style! 🚗 #carride #streetlife', '#drive #cars #city', 'On the move'), ('Biking through scenic routes! 🚴 #cyclinglife #naturelovers', '#bikelife #adventure', 'Pedaling through nature'), ('Exploring the urban jungle! 🌃 #cityadventures #nightlife', '#explore #citylights', 'Nighttime exploration')]


## Render text on image

In [None]:
from PIL import Image, ImageDraw, ImageFont

In [None]:
image = Image.open("/content/sample2.jpg")
draw = ImageDraw.Draw(image)
font = ImageFont.truetype("/content/High Speed.ttf",36)
text = "Urban Life"
draw.text((500, 150), text, (0,0,0), font = font)
image.save("text.png")

## color analysis

In [None]:
!pip3 install easydev colormap opencv-python colorgram.py extcolors

Collecting easydev
  Downloading easydev-0.12.1.tar.gz (55 kB)
[?25l     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/55.7 kB[0m [31m?[0m eta [36m-:--:--[0m[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m55.7/55.7 kB[0m [31m4.9 MB/s[0m eta [36m0:00:00[0m
[?25h  Preparing metadata (setup.py) ... [?25l[?25hdone
Collecting colormap
  Downloading colormap-1.0.4.tar.gz (17 kB)
  Preparing metadata (setup.py) ... [?25l[?25hdone
Collecting colorgram.py
  Downloading colorgram.py-1.2.0-py2.py3-none-any.whl (6.2 kB)
Collecting extcolors
  Downloading extcolors-1.0.0-py3-none-any.whl (9.8 kB)
Collecting colorama (from easydev)
  Downloading colorama-0.4.6-py2.py3-none-any.whl (25 kB)
Collecting colorlog (from easydev)
  Downloading colorlog-6.7.0-py2.py3-none-any.whl (11 kB)
Collecting convcolors>=1.0.0 (from extcolors)
  Downloading convcolors-2.2.0-py3-none-any.whl (3.8 kB)
Building wheels for collected packages: easydev, colormap
  Building wheel

In [None]:
import cv2
import extcolors
from colormap import rgb2hex