In [1]:
import cv2
import os
import numpy as np
import matplotlib.pyplot as plt
from pathlib import Path
from PIL import Image
from rembg import remove       # https://github.com/danielgatis/rembg
import imutils
import cvzone                  # Computer vision package that makes its easy to run Image processing and AI functions

In [2]:
"""""
Two dictionaries that will be used to create the .txt files necessary for the YOLO format
They can be changed accordingly, depending on the classes (object classification) of the project at hand

"""""

class_id_to_name = {
    0: 'plastic',
    1: 'paper',
    2: 'metal',
    3: 'glass',
    4: 'cardboard'
}

name_to_class_id = {v:k for k,v in class_id_to_name.items()}

In [11]:
"""""
Accepts the path of a folder containing images and returns a list containing the paths of those images

"""""

def images_from_folder(folder):
    images = []
    
    for filename in os.listdir(folder):
        images.append(os.path.abspath(filename))
        
    return images



""""" 
Accepts an image path and returns the image with its background transparent

"""""

def make_background_transparent(image_path):
    input_path = image_path.replace("\\","/")
    output_path = Path(image_path).stem+"new_background.png"
    
    input = cv2.imread(input_path)
    output = remove(input)
    
    cv2.imwrite(output_path, output)
    
    print('Background removed successfully')
    
    return output



"""
This function accepts a folder of images
Creates a new destination folder
For every image, it makes its background transparent, sharpens it and rotates it so it creates 10 new images
Saves the new images in the destination folder

"""

def main_function(folder):
    
    # Creates a new folder
    
    new_path = './New_folder'
    os.mkdir(new_path)
    
    for filename in os.listdir(folder):
        
        input_path = f'{folder}\\'+filename.replace("\\","/")
            
        input_image = cv2.imread(input_path)
        
        # remove gives an np.array as an output
        
        output_image = remove(input_image)
        
        # sharpens the image
        
        kernel = np.array([[0, -1, 0],[-1, 5,-1],[0, -1, 0]])
        
        output_image = cv2.filter2D(src=output_image, ddepth=-1, kernel=kernel)
        
        
        for angle in range(0,360,36):
            
            rotated_image = imutils.rotate(output_image, angle)
        
            output_path = Path(input_path).stem+f"{angle}.png"
            
            
            cv2.imwrite(os.path.join(new_path,output_path), rotated_image)




"""""
Accepts the path of an image with transparent background as input and locates a frame around the object.
Returns the top left point of that frame, as well as its width and height

"""""

def image_frame(image_path):
    
    # Reads the image and converts it to RGBA format
    image = Image.open(image_path).convert('RGBA')
    
    # Returns the contents of this image as a sequence object containing pixel values
    datas = image.getdata()
          
    # Initializes 4 points, left - right - top - bottom
    L = (datas.size[0],0)
    R = (0,0)
    T = (0,0)
    B = (0,datas.size[1])
    
    for i in range(datas.size[0]):
        for j in range(datas.size[1]):
            
            # Gets the pixel value at a given position
            item = datas.getpixel((i,j))
            # Locates the left-,right-,top-,bottom- most points of the object
            if item[0] != 0 and item[1] != 0 and item[2] != 0: # pixel at position (i,j) is not black
                if i < L[0]:
                    L = (i,j)
                if i > R[0]:
                    R = (i,j)
                if j > T[1]:
                    T = (i,j)
                if j < B[1]:
                    B = (i,j)
    
    tl = (L[0], B[1])
    w = R[0] - L[0]
    h = T[1] - B[1]
    
    
    return tl, w, h




"""""
Accepts as input the 4 vertices of the initial plane and the 4 vertices of the resulting plane
and finds the 8 coefficients that will be used for the perpsective transformation

dst: The vertices of the destination plane 
src: The vertices of the source plane

"""""

def find_coeffs(dst, src):
    matrix = []
    for p1, p2 in zip(dst, src):
        matrix.append([p1[0], p1[1], 1, 0, 0, 0, -p2[0]*p1[0], -p2[0]*p1[1]])
        matrix.append([0, 0, 0, p1[0], p1[1], 1, -p2[1]*p1[0], -p2[1]*p1[1]])

    A = np.matrix(matrix, dtype=np.float)
    B = np.array(pb).reshape(8)

    res = np.dot(np.linalg.inv(A.T * A) * A.T, B)
    return np.array(res).reshape(8)



"""""
Perspective Transforation

It aceepts the path of an image and another 3 variables that determine the transformation
of the output points and it returns the transformed image.

"""""

def perspective_transform(image_path, offset = (0,0), alpha = 0, tilt = 0):
    
    image = Image.open(image_path)
    
    tl, w, h = image_frame(image_path)
    
    # Initializes the 4 corners of the input frame
    input_points = np.float32(([tl[0],tl[1]],[tl[0]+w,tl[1]],[tl[0]+w,tl[1]+h],[tl[0],tl[1]+h]))
    
    # Placeholders for the 2 points of the output frame
    P1 = [0,0]
    P2 = [0,0]
    
    # Transforms the above points
    
    P1[0] = input_points[0][0] + int(offset[0] * w) + int(alpha * w)
    P2[0] = input_points[1][0] + int(offset[0] * w) - int(alpha * w)
    P1[1] = input_points[0][1] + int(offset[1] * h) + int(tilt * h)
    P2[1] = input_points[0][1] + int(offset[1] * h) - int(tilt * h)
    
    output_points = np.float32(([P1,P2,input_points[2], input_points[3]]))
    
    # Get the coefficients using the input and output points
    coeffs = find_coeffs(input_points, output_points)
                              
    image = image.transform((image.size[0],image.size[1]), Image.PERSPECTIVE, coeffs, Image.BICUBIC)
    return image


"""""
Accepts an image path (image with transparent background) and a background path,
resizes the background image so it matches with the mask and overlays the two images,
saving the result in a png format

"""""

def overlay_image_PIL(image_path, background_path):
    
    # Reads the mask and background images
    mask = Image.open(image_path)
    background = Image.open(background_path)
    
    # Resizes the background image to it matches the mask
    width, height = mask.size
    background = background.resize((width,height))
    
    
    background.paste(mask, (0,0), mask)
    
    background.save(Path(image_path).stem+Path(background_path).stem +'.png', 'PNG')
    
    
"""""
Accepts the paths of an object image and a background
as well as the point b (x,y coordinates) which indicates the top left point of overlaying

"""""

def overlay_image_cv(imgBack_path,imgFront_path,b=(0,0)):
    
    imgBack = cv2.imread(imgBack_path)
    imgFront = cv2.imread(imgFront_path, cv2.IMREAD_UNCHANGED)
    
    imResult = cvzone.overlayPNG(imgBack, imgFront, b)
    
    img = cv2.imwrite(imgFront_path, imResult)




"""""
Creates a .txt file in the same directory as the given image with a YOLO format
using the dictionary name_to_class_id

"""""

def create_yolo_txt(image_path, tl, w, h):
    
    image = Image.open(image_path)
    name = Path(image_path).stem
    name_of_image = ''.join([i for i in name if not i.isdigit()])
    
    object_id = name_to_class_id.get(name_of_image)
    
    cX = (tl[0] + w/2)/image.size[0]
    cY = (tl[1] + h/2)/image.size[1]
    w = w/image.size[0]
    h = h/image.size[1]
    
    with open(str(Path(image_path).parent)+'/'+name+'.txt', 'w') as f:
        f.write(str(object_id) + ' ' + str(cX) + ' ' + str(cY) + ' ' + str(w) + ' ' + str(h))
    
    