# Sketch to Photograph AI Image Generator

### Made using numpy and cv2, no ML libraries

In [None]:
import numpy as np
from ImageProcessing_Ege import imgpro as ege
import cv2
import os
import time

### Image class contains functions to process the images and turn them into data as needed by the ML model

In [None]:

class Image:
    def __init__(self, filename: str, image: np.ndarray = None):
        """
        Loads an image from the given filename as a cv2 image object.
        Usage: Image("filename") or Image("filename.jpg") or Image("filename.png")
        """
        self.filename = filename
        if isinstance(image, np.ndarray):
            self.image = image
            self.shape = image.shape
            return
        elif isinstance(image, Image):
            self.image = image.image
            self.shape = image.shape
            return
        self.shape = None
        self.image = None
        try:
            self.image = cv2.imread(filename)
            self.shape = self.image.shape
        except FileNotFoundError:
            if filename[-4:] == ".jpg" or filename[-4:] == ".png":
                print("Invalid filename")
                return
            try:
                self.image = cv2.imread(filename + ".jpg")
                self.shape = self.image.shape
            except FileNotFoundError:
                try:
                    self.image = cv2.imread(filename + ".png")
                    self.shape = self.image.shape
                except FileNotFoundError:
                    print("Invalid filename")
    
    def show(self, tabname: str = "Image"):
        cv2.imshow(tabname, self.image)
        cv2.waitKey(0)
        cv2.destroyAllWindows()
    
    def save(self, newfilename: str):
        if newfilename[-4:] != ".jpg" and newfilename[-4:] != ".png":
            newfilename += ".png"
        cv2.imwrite(newfilename, self.image)
        
    def simplify(self, threshold: float):
        """
        Simplifies the image by the threshold, using the Simplify_cv2 function from ImageProcessing_Ege.
        
        Returns an Image object.
        """
        height, width = self.image.shape[:2]
        contrasted = ege.create_contrast_matrix_cv2(self.image)
        res = np.zeros((height, width, 3), dtype = np.uint8)
        for x in range(width):
            for y in range(height):
                if contrasted[x, y] >= threshold:
                    res[y, x] = np.array([0, 0, 0], dtype=np.uint8)
                else:
                    res[y, x] = np.array([255, 255, 255], dtype=np.uint8)
        return Image(self.filename + "_simplified_" + str(threshold), res)
    
    def simplify_multiple(self, thresholds: list = [25, 50, 75, 100, 125, 150, 175, 200, 225, 250]):
        """
        Simplifies the image by each threshold in the thresholds list of floats, returns a list of image objects.
        """
        height, width = self.image.shape[:2]
        contrasted = ege.create_contrast_matrix_cv2(self.image)
        res = []
        for i in range(len(thresholds)):
            temp = np.zeros((height, width, 3), dtype = np.uint8)
            for x in range(width):
                for y in range(height):
                    if contrasted[x, y] >= thresholds[i]:
                        temp[y, x] = np.array([0, 0, 0], dtype=np.uint8)
                    else:
                        temp[y, x] = np.array([255, 255, 255], dtype=np.uint8)
            res.append(Image(self.filename + "_simplified_" + str(thresholds[i]), temp))
        return res
    
    def convert_data(self) -> list:
        """
        Converts the BGR (not rgb because opencv uses bgr by default) image 
        into a list of 3 arrays of floats between 0 and 1, each array representing a color value (B, G, R)
        
        0 being 0 and 1 being 255
        """
        res = []
        height = self.shape[0]
        for i in range(3):
            temp = np.array([], dtype = np.float16)
            for y in range(height):
                temp = np.append(temp, (self.image[y, :, i]/255).astype(np.float16))
            res.append(temp)
        return res
    
    def convert_simplified_data(self) -> list:
        """
        Converts the simplified image into an array of floats between 0 and 1
        
        0 being 0 and 1 being 1
        """
        res = []
        img = cv2.cvtColor(self.image, cv2.COLOR_BGR2GRAY)
        height = self.shape[0]
        for y in range(height):
            res = np.append(res, (img[y]/255).astype(np.float16))
        return res
    

### NeuralNetwork class contains functions to handle the ML model, for training and saving weights and biases

In [None]:
  
class NeuralNetwork:
    def __init__(self, weights: list = [], biases: list = []):
        self.weights = weights
        self.biases = biases
        
    def init_params(self, layer_count: int, input_size: int, hidden_size: int = 100):
        """
        Initializes the weights and biases of the neural network.
        
        DO NOT use if this class already has weights and biases assigned, it will reset them.
        """
        self.weights = []
        self.biases = []
        for i in range(layer_count):
            if i == 0:
                self.weights.append((np.random.rand(hidden_size, input_size)-0.5).astype(np.float16))
                self.biases.append((np.random.rand(hidden_size, 1)-0.5).astype(np.float16))
            elif i == layer_count - 1:
                self.weights.append((np.random.rand(input_size, hidden_size)-0.5).astype(np.float16))
                self.biases.append((np.random.rand(input_size, 1)-0.5).astype(np.float16))
            else:
                self.weights.append((np.random.rand(hidden_size, hidden_size)-0.5).astype(np.float16))
                self.biases.append((np.random.rand(hidden_size, 1)-0.5).astype(np.float16))
    
    def sigmoid(self, x: np.ndarray):
        return 255 / (1 + np.exp(-x/50))
    
    def ReLU(self, x: np.ndarray):
        return np.maximum(0, x)
        
    def forward_propagation(self, inp: np.ndarray):
        """
        Forward propagation algorithm to compute the output of the neural network.
        
        Parameters:
        - inp: Input data.
        
        Returns:
        - Z: List of linear combinations for each layer (excluding input layer).
        - A: List of activation values for each layer (excluding input layer).
        """
        if len(inp.shape) == 1:
            A = [inp.reshape(inp.shape[0], 1)]
        else:
            A = [inp]
        Z = []
        for i in range(min(len(self.weights), len(self.biases))):
            Z.append(np.dot(self.weights[i], A[-1]) + self.biases[i])
            A.append(self.sigmoid(Z[-1]))
        return Z, A[1:]
        # TODO maybe change activation function, could add an option to choose although that will require retraining
        
    def predict(self, inp: np.ndarray):
        """
        Returns the output of the neural network for the given input.
        """
        Z, A = self.forward_propagation(inp)
        return A[-1]
    
    def backward_propagation(self, A, Z, inp, out):
        """
        Backpropagation algorithm to compute gradients for weights and biases.
    
        Parameters:
        - A: List of activation values for each layer (excluding input layer).
        - Z: List of linear combinations for each layer (excluding input layer).
        - inp: Input data.
        - out: Expected output data.
    
        Returns:
        - dW: List of gradients for weight matrices.
        - dB: List of gradients for bias vectors.
        """
        W = self.weights
        #m = inp.shape[0]
        m = 10**8
    
        dZ = [0 for z in Z]
        dW = [0 for w in W]
        dB = [0 for b in self.biases]
    
        dZ[-1] = A[-1] - out
        dW[-1] = (1 / m) * np.dot(dZ[2], A[1].T)
        dB[-1] = (1 / m) * np.sum(dZ[2])
        
        for i in range(len(W) - 2, 0, -1):
            dZ[i] = np.dot(W[i+1].T, dZ[i+1]) * (Z[i] > 0)
            dW[i] = (1 / m) * np.dot(dZ[i], A[i-1].T)
            dB[i] = (1 / m) * np.sum(dZ[i])
            
        dZ[0] = np.dot(W[1].T, dZ[1]) * (Z[0] > 0)
        dW[0] = (1 / m) * np.dot(dZ[0], inp.T)
        dB[0] = (1 / m) * np.sum(dZ[0])
        
        return dW, dB

    def update_params(self, dW, dB, learning_rate: float = 0.1):
        """
        Updates the weights and biases of the neural network.
        """
        for i in range(min(len(self.weights), len(self.biases))):
            self.weights[i] = (self.weights[i] - (dW[i] * learning_rate)).astype(np.float16)
            self.biases[i] = (self.biases[i] - (dB[i] * learning_rate)).astype(np.float16)
    
    def train(self, inp: np.ndarray, expected_output: np.ndarray, epochs: int = 100, learning_rate: float = 0.1):
        """
        Trains the neural network for the given input and expected output.
        """
        print("Training started")
        for i in range(epochs):
            
            Z, A = self.forward_propagation(inp)
            #print("Forward propagation done")
            
            dW, dB = self.backward_propagation(A, Z, inp, expected_output)
            #print("Backward propagation done")
            
            self.update_params(dW, dB, learning_rate)
            #print("Parameters updated")
            
            output = self.predict(inp)
            error = np.mean(np.abs(expected_output - output))
            
            """
            print("---------------inp-----------------")
            print(inp)
            print(np.mean(inp))
            print("---------------out-----------------")
            print(output)
            print(np.mean(output))
            print("---------------expected_output-----------------")
            print(expected_output)
            print(np.mean(expected_output))
            print("---------------error-----------------")
            print(error)
            print("---------------end-----------------")"""
            
            
            #print(f"Epoch {i+1} done, loss: {round(error, 3)}")
            if (i + 1) % 10 == 0:
                print(f"Epoch {i+1} loss: {round(error, 3)}")
        print("Final error: " + str(round(error, 3)))
        return error


In [None]:
test1 = np.array([[1,2,3], [4,5,6], [7,8,9]])
test2 = np.array([[2,2,2], [2,2,2], [2,2,2]])

np.mean(np.abs(test1 - test2))

### Some extra functions to turn back data into image class objects and create training data from a directory path with images in it.

### Creates training data by simplifying each image by each threshold in the given parameter, and saving them in a list containing the training input and expected output for each color's neural network (B, G, R)

In [None]:
def data_to_image(data: list, shape: tuple, newfilename: str = "Data2Img_" + str(int(time.time()*10000 % 10000))):
    """
    Converts the given data back to an image.
    """
    res = np.zeros((shape[0], shape[1], 3), dtype = np.uint8)
    for y in range(shape[0]):
        for x in range(shape[1]):
            #res[y, x] = np.array([data[0][y * shape[1] + x][0]*255, data[1][y * shape[1] + x][0]*255, data[2][y * shape[1] + x][0]*255], dtype=np.uint8)
            res[y, x] = np.array([data[0][y * shape[1] + x][0], data[1][y * shape[1] + x][0], data[2][y * shape[1] + x][0]], dtype=np.uint8)
    return Image(newfilename, res)

def simplified_data_to_image(data: list, shape: tuple, newfilename: str = "Data2Img_" + str(int(time.time()*10000 % 10000))):
    """
    Converts the given data back to an image.
    """
    res = np.zeros((shape[0], shape[1], 3), dtype = np.uint8)
    for y in range(shape[0]):
        for x in range(shape[1]):
            if data[y * shape[1] +  x] == 0:
                res[y, x] = np.array([0, 0, 0], dtype=np.uint8)
            else:
                res[y, x] = np.array([255, 255, 255], dtype=np.uint8)
    return Image(newfilename, res)
        
        
def create_training_data(path: str, thresholds: list = [25, 50, 75, 100, 125, 150, 175, 200, 225, 250]):
    """
    Imports all of the images in the given path, simplifies them by the given thresholds, and converts them to data.
    
    Returns list of lists of input arrays and output arrays for example:
    
    inputs = [img1_25, img1_50, ... , img2_25, img2_50, ...]]
    
    outputsB = [img1B,   img1B,    ... , img2B,    img2B, ...]
    outputsG = [img1G,   img1G,    ... , img2G,    img2G, ...]
    outputsR = [img1R,   img1R,    ... , img2R,    img2R, ...]
    
    res = [inputs, outputsB, outputsG, outputsR]
    """
    print(os.listdir(path))
    res = [[], [], [], []]
    for filename in os.listdir(path):
        if filename[-4:] == ".jpg" or filename[-4:] == ".png":
            img = Image(path + "\\" + filename)
            data = img.convert_data()
            B_data = data[0]
            G_data = data[1]
            R_data = data[2]
            simples = img.simplify_multiple(thresholds)
            
            for s in simples:
                s_data = s.convert_simplified_data()
                res[0].append(s_data)
                res[1].append(B_data)
                res[2].append(G_data)
                res[3].append(R_data)
                
    res[0] = np.array(res[0]).T
    res[1] = np.array(res[1]).T
    res[2] = np.array(res[2]).T
    res[3] = np.array(res[3]).T
    return res


def combine_colors(B: np.ndarray, G: np.ndarray, R: np.ndarray, shape: tuple):
    """
    Combines the given color values together and returns the resulting image.
    """
    return data_to_image([B, G, R], shape)



### Creating training data from the test directory /train

In [None]:
res = create_training_data("Birds/train/ABBOTTS BABBLER/")
print("Training data created")

In [None]:
inputs, b, g, r = res

## Initializing Neural Networks

In [None]:
#a = input("Enter folder path: ")
#a = "train"
#res = create_training_data(a)
#print("Training data created")

nnB = NeuralNetwork()
nnG = NeuralNetwork()
nnR = NeuralNetwork()
print("Neural Networks created")

nnB.init_params(3, inputs.shape[0])
nnG.init_params(3, inputs.shape[0])
nnR.init_params(3, inputs.shape[0])
print("Weights and biases initialized")

Just checking if the weights and biases and input shapes match as intended

In [None]:
f"{nnB.weights[0].shape} * {inputs.shape} + {nnB.biases[0].shape} = {(np.dot(nnB.weights[0], inputs) + nnB.biases[0]).shape}", f"{nnB.weights[1].shape} * {(np.dot(nnB.weights[0], inputs) + nnB.biases[0]).shape} + {nnB.biases[1].shape} = {(np.dot(nnB.weights[1], np.dot(nnB.weights[0], inputs) + nnB.biases[0]) + nnB.biases[1]).shape}", f"{nnB.weights[2].shape} * {(np.dot(nnB.weights[1], np.dot(nnB.weights[0], inputs) + nnB.biases[0]) + nnB.biases[1]).shape} + {nnB.biases[2].shape} = {(np.dot(nnB.weights[2], np.dot(nnB.weights[1], np.dot(nnB.weights[0], inputs) + nnB.biases[0]) + nnB.biases[1]) + nnB.biases[2]).shape}"

## Testing before training, should normally create random noise

In [None]:
import matplotlib.pyplot as plt

def show_rgb_image(image, title=None, conversion=cv2.COLOR_BGR2RGB):

    image = cv2.cvtColor(image, conversion)

    plt.imshow(image)

    plt.xticks([])
    plt.yticks([])

    if title is not None:
        plt.title(title)

    plt.show()

In [None]:
print("Neural networks trained")
    
print("Test run")

test = Image("test.png")
expected = Image("test.jpg")
testdata = test.convert_simplified_data()
B = nnB.predict(testdata)
G = nnG.predict(testdata)
R = nnR.predict(testdata)

result = combine_colors(B, G, R, test.shape)
x, y = 85, 153
result.image[y,x], B[y*result.image.shape[1] + x], G[y*result.image.shape[1] + x], R[y*result.image.shape[1] + x]
#result.show()

In [None]:
show_rgb_image(test.image, "Input")
show_rgb_image(expected.image, "Expected result")
show_rgb_image(result.image, "Untrained result, random noise")

In [None]:
np.mean(nnB.weights[0]), np.mean(inputs)

In [None]:
np.mean(np.dot(nnB.weights[0], inputs))

## Training

In [None]:
total_e = 0

In [None]:
e = 10
l = 0.5

lossB = nnB.train(inputs, b*255, e, l)
print("Blue trained")
lossG = nnG.train(inputs, g*255, e, l)
print("Green trained")
lossR = nnR.train(inputs, r*255, e, l)
print("Red trained")

total_e += e

print(f"Loss: {(lossB + lossG + lossR) / 3}")

## Testing the trained model

In [None]:
print("Neural networks trained")
    
print("Test run")

test = Image("test.png")
expected = Image("test.jpg")
testdata = test.convert_simplified_data()
B = nnB.predict(testdata)
G = nnG.predict(testdata)
R = nnR.predict(testdata)

result = combine_colors(B, G, R, test.shape)
x, y = 85, 153
result.image[y,x], B[y*result.image.shape[1] + x], G[y*result.image.shape[1] + x], R[y*result.image.shape[1] + x]
#result.show()

In [None]:
np.mean(B), np.mean(G), np.mean(R)

In [None]:
np.mean(expected.image[:,:,0]), np.mean(expected.image[:,:,1]), np.mean(expected.image[:,:,2])

In [None]:
np.mean(expected.image)

In [None]:
show_rgb_image(test.image, "Input")
show_rgb_image(expected.image, "Expected result")
show_rgb_image(result.image, f"Trained result, {total_e} epochs, {l} learning rate")