In [1054]:
import pandas as pd
import numpy as np
import pathlib
from main import BASEPATH, IMAGESPATH, IMAGEFILENAME, BLOCKFILENAME
import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
from torch.utils.data import DataLoader
import torchvision.transforms as transforms

In [1055]:
#   INITIAL DATA PREPARATION CELL
trainPath = BASEPATH + IMAGESPATH
#print(trainPath) # C:\Users\longw\Desktop\G Drive\Praca_inżynierska\Engineering_Thesis\backups\images\
    
# find the newest directory within backups/images/
withinImages= pathlib.Path(trainPath).glob('*')
dataPaths = [file for file in withinImages if file.is_dir()]
newestDir = str(dataPaths[-1]) + '\\'
#print(newestDir) # C:\Users\longw\Desktop\G Drive\Praca_inżynierska\Engineering_Thesis\backups\images\220117_21214749\

dfImages = pd.read_csv(newestDir + IMAGEFILENAME, index_col=None, header=None, names=['image row', 'image col', 'R_images','G_images','B_images'])
dfBlocks = pd.read_csv(newestDir + BLOCKFILENAME, index_col=None, header=None, names=['R_blocks','G_blocks','B_blocks', 'Path'])

# new dataframe columns, the element will be changed once the block is assigned to the part of an image
dfBlocks['assigned row'] = 0 
dfBlocks['assigned col'] = 0

# deleting unnecessary data if the image's edges were rounded
dfImages = dfImages[0:len(dfBlocks)]

In [1056]:
def Normalizer(tensor: torch.tensor, df: pd.Index):
    rTensor = tensor[0,:].clone().detach()
    rTensor = rTensor / rTensor.sum().expand_as(rTensor)
    rTensor = rTensor / torch.norm(rTensor)
    gTensor = tensor[1,:].clone().detach()
    gTensor = gTensor / gTensor.sum().expand_as(gTensor)
    gTensor = gTensor / torch.norm(gTensor)
    bTensor = tensor[2,:].clone().detach()
    bTensor = bTensor / bTensor.sum().expand_as(bTensor)
    bTensor = bTensor / torch.norm(bTensor)
    tensor = torch.tensor((dfImages.index, rTensor, bTensor, bTensor))
    return tensor

In [1057]:
# Define the tensors based on the dataframes
torchImages = torch.tensor((dfImages['R_images'], dfImages['G_images'], dfImages['B_images']))
torchBlocks = torch.tensor((dfBlocks['R_blocks'], dfBlocks['G_blocks'], dfBlocks['B_blocks']))
# Normalize the data and add indexes
torchImages = Normalizer(torchImages, dfImages.index)
torchBlocks = Normalizer(torchBlocks, dfImages.index)

In [1058]:
# to find the correct block, we will need to calculate the three-dimensional Euclidean distance
# the mathematical formula for that would be: pow((x2-x1)**2+(y2-y1)**2+(z2-z1)**2, 0.5)
def euclidian_distance_vector_single_block(block: tuple, images: pd.DataFrame):
    largeDistanceListWithIndexes = []
    largeDistanceList = []
    y = []
    for index in range(0, images.index[-1]+1):
        x = (((images['R_images'][index]-block[0])**2 + (images['G_images'][index]-block[1])**2 + (images['B_images'][index]-block[2])**2)**0.5)
        y.append(x)
        y.append(index)
        largeDistanceListWithIndexes.append(y)
        largeDistanceList.append(x)
        y = []
    return largeDistanceListWithIndexes, largeDistanceList

In [1059]:
singleBlockDistanceWithIndexes = []
singleBlockDistance = []
for n in range(0, dfBlocks.index[-1]+1):
    x, y = euclidian_distance_vector_single_block((dfBlocks['R_blocks'][n], dfBlocks['G_blocks'][n], dfBlocks['B_blocks'][n]), dfImages)
    singleBlockDistanceWithIndexes.append(x)
    singleBlockDistance.append(y)

In [1060]:
meanBlockDistance = []
for n in range(0, dfBlocks.index[-1]+1):
    x = np.mean(singleBlockDistance[n])
    meanBlockDistance.append(x)

In [1061]:
def find_closest(array, mean: float):
    newArray = []
    for n in range(0, len(array)-1):
        if len(array[n]) == 2:
            newArray.append(array[n][0])
        else:
            newArray.append(-1)
    array2 = np.asarray(newArray)
    idx = (np.abs(array2 - mean)).argmin()
    return array[idx]

In [1062]:
def removeElement(distanceList: list, index: int):
    for n in range(0, len(distanceList)):
        newList = distanceList[n]
        newList[index].pop(0)
        newList[index].pop(0)
        distanceList[n] = newList
        
    return distanceList

In [1063]:
closestBlocks = []
for n in range(0, dfBlocks.index[-1]):
    x = find_closest(singleBlockDistanceWithIndexes[n], meanBlockDistance[n])
    y = [x[0], x[1], n]
    if n != dfBlocks.index[-1]-1:
        closestBlocks.append(y)
    singleBlockDistanceWithIndexes = removeElement(singleBlockDistanceWithIndexes, x[1])
    
for n in singleBlockDistanceWithIndexes[dfBlocks.index[-1]]:
    if n:
        closestBlocks.append([n[0], n[1], dfBlocks.index[-1]])
#print(closestBlocks)

In [1064]:
for n in closestBlocks:
    dfBlocks['assigned row'][n[2]] = dfImages['image row'][n[1]]
    dfBlocks['assigned col'][n[2]] = dfImages['image col'][n[1]]

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  dfBlocks['assigned row'][n[2]] = dfImages['image row'][n[1]]
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  dfBlocks['assigned col'][n[2]] = dfImages['image col'][n[1]]


In [1065]:
dfBlocksColumns = dfBlocks.columns.tolist()
dfBlocksColumns = dfBlocksColumns[0:3] + dfBlocksColumns[4:] + dfBlocksColumns[3:4]
dfBlocks = dfBlocks[dfBlocksColumns]
dfBlocks.to_csv(newestDir + 'blocksAssignedToImages.csv', index=False)