# Image Processing Lab №7
Classification

In [1]:
import os
import ast
import string
import numpy as np
import pandas as pd
from PIL import Image
from IPython.display import display
from tqdm import tqdm

import matplotlib.pyplot as plt

from pathlib import Path

In [2]:
def open_image(image_name=None, images_dir=None):
    if images_dir:
        image_list = []
        for path in images_dir.iterdir():
            image = Image.open(path)
            image_list.append(image)
        return image_list

    image_folder = Path('input/'+image_name)
    try:
        image = Image.open(image_folder)
    except FileNotFoundError:
        print(f'Image file not found: {image_folder}')
    
    return image

pth = Path('input/')

In [3]:
def display_image(img):
    display(Image.fromarray(img.astype(np.uint8)))

In [4]:
def save_image(path: str, img: np.array):
    Image.fromarray(img.astype(np.uint8)).save(path)

In [5]:
def calculate_weight(img):
    return np.where(img == 0, True, False).sum()

In [6]:
def calculate_center_of_gravity(img_inp):
    img = np.where(img_inp == 0, 1, 0)
    x, y = 0, 0
    for i in range(img_inp.shape[0]):
        for j in range(img_inp.shape[1]):
            x += i * img[i, j]
            y += j * img[i, j]

    w = calculate_weight(img_inp)

    return x / w, y / w

In [7]:
def calculate_moments_of_inertia(img_inp, x_mean, y_mean):
    img = np.where(img_inp == 0, 1, 0)
    x, y = 0, 0
    for i in range(img_inp.shape[0]):
        for j in range(img_inp.shape[1]):
            x += np.power((j - y_mean), 2) * img[i, j]
            y += np.power((i - x_mean), 2) * img[i, j]
    
    return x, y

In [8]:
def euclidian_distance(x: np.array, y: np.array):
    return np.sqrt(np.power(x-y, 2).sum())

In [9]:
FILE_LIST = os.listdir('input/')

INITITAL_STRING = 'when i was a young boy, my father took me into the city'

In [10]:
stats_reference = pd.read_csv('statistics.csv')

stats_reference

Unnamed: 0,Symbol,Weight Quarter_1,Weight Density Quarter_1,Weight Quarter_2,Weight Density Quarter_2,Weight Quarter_3,Weight Density Quarter_3,Weight Quarter_4,Weight Density Quarter_4,Center of Gravity coordinate_x,Center of Gravity coordinate_y,Relative Center of Gravity coordinate_x,Relative Center of Gravity coordinate_y,Axial Moment of Inertia_x,Axial Moment of Inertia_y,Relative Axial Moment of Inertia_x,Relative Axial Moment of Inertia_y
0,a,606,0.467954,757,0.584556,837,0.646332,826,0.637838,38.686054,34.961005,0.516247,0.492188,1096803.0,1463876.0,1.441766e-06,1.30294e-06
1,b,1034,0.602915,492,0.28688,1037,0.592571,921,0.526286,55.499713,28.706085,0.55612,0.401537,1739233.0,2431523.0,1.155645e-06,5.977334e-07
2,c,652,0.533989,511,0.4062,649,0.531532,555,0.441176,36.388255,30.700042,0.484771,0.450001,867073.0,1246476.0,1.480295e-06,1.248895e-06
3,d,499,0.290962,1032,0.601749,900,0.514286,1025,0.585714,55.273148,40.498264,0.553808,0.572439,1696604.0,2412386.0,1.150927e-06,8.432764e-07
4,e,758,0.602544,783,0.622417,720,0.572337,608,0.483307,36.072848,32.883235,0.48045,0.475869,1023192.0,1206882.0,1.42462e-06,1.298653e-06
5,f,555,0.550595,671,0.665675,490,0.47619,294,0.285714,42.702985,20.59403,0.434406,0.477903,143160.7,1471312.0,2.57286e-06,1.240793e-06
6,g,894,0.525882,1061,0.606286,884,0.52,1138,0.650286,48.31657,37.271813,0.477945,0.533409,1745633.0,3408740.0,1.014841e-06,7.828568e-07
7,h,955,0.641801,487,0.327285,787,0.518104,785,0.516787,51.416722,26.683809,0.525174,0.421046,1407524.0,1920625.0,1.4216e-06,7.3777e-07
8,i,343,0.997093,344,1.0,344,1.0,344,1.0,42.52,7.505455,0.488471,0.433697,29183.71,847203.2,2.245724e-05,3.964059e-06
9,j,113,0.152497,798,1.0,262,0.353576,728,0.912281,58.45818,17.316675,0.508479,0.627564,61923.36,2140248.0,6.170325e-06,1.827794e-06


In [11]:
stats_reference.set_index('Symbol', inplace=True)

In [12]:
stats = pd.DataFrame()

for file in FILE_LIST:
    if '_preprocessed' not in file:
        continue

    img = np.array(open_image(file))
    img = np.where(img < 128, 0, 255)

    M, N = img.shape[0], img.shape[1]

    statistics = {}
    x_q, y_q = M // 2, N // 2

    statistics['file_name'] = file
    
    w = calculate_weight(img[:x_q, :y_q])
    statistics['Weight Quarter_1'] = w
    statistics['Weight Density Quarter_1'] = w / (x_q * y_q)

    w = calculate_weight(img[:x_q, y_q:])
    statistics['Weight Quarter_2'] = w
    statistics['Weight Density Quarter_2'] = w / (x_q * (N - y_q))

    w = calculate_weight(img[x_q:, :y_q])
    statistics['Weight Quarter_3'] = w
    statistics['Weight Density Quarter_3'] = w / ((M - x_q) * y_q)

    w = calculate_weight(img[x_q:, y_q:])
    statistics['Weight Quarter_4'] = w
    statistics['Weight Density Quarter_4'] = w / ((M - x_q) * (N - y_q))

    statistics['Center of Gravity coordinate_x'], statistics['Center of Gravity coordinate_y'] = \
        calculate_center_of_gravity(img)
    
    statistics['Relative Center of Gravity coordinate_x'], statistics['Relative Center of Gravity coordinate_y'] = \
        (statistics['Center of Gravity coordinate_x'] - 1) / (M - 1), (statistics['Center of Gravity coordinate_y'] - 1) / (N - 1)
    
    statistics['Axial Moment of Inertia_x'], statistics['Axial Moment of Inertia_y'] = \
        calculate_moments_of_inertia(img, statistics['Center of Gravity coordinate_x'], statistics['Center of Gravity coordinate_y'])
    
    statistics['Relative Axial Moment of Inertia_x'], statistics['Relative Axial Moment of Inertia_y'] = \
        statistics['Center of Gravity coordinate_x'] / (np.power(M, 2) * np.power(N, 2)), statistics['Center of Gravity coordinate_y'] / (np.power(M, 2) * np.power(N, 2))


    stats = pd.concat((stats, pd.DataFrame([statistics])), ignore_index=True)

In [13]:
stats.set_index('file_name', inplace=True)

In [14]:
stats

Unnamed: 0_level_0,Weight Quarter_1,Weight Density Quarter_1,Weight Quarter_2,Weight Density Quarter_2,Weight Quarter_3,Weight Density Quarter_3,Weight Quarter_4,Weight Density Quarter_4,Center of Gravity coordinate_x,Center of Gravity coordinate_y,Relative Center of Gravity coordinate_x,Relative Center of Gravity coordinate_y,Axial Moment of Inertia_x,Axial Moment of Inertia_y,Relative Axial Moment of Inertia_x,Relative Axial Moment of Inertia_y
file_name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1
row1_38_preprocessed.bmp,386,0.508564,372,0.490119,389,0.512516,155,0.204216,29.506144,20.178187,0.438556,0.426182,155130.7,458689.450845,3.201171e-06,2e-06
row1_19_preprocessed.bmp,251,0.54329,314,0.634343,238,0.5,169,0.331373,30.31893,13.73251,0.444226,0.454733,30792.45,341611.131687,8.030975e-06,4e-06
row1_27_preprocessed.bmp,390,0.53719,168,0.231405,528,0.705882,340,0.454545,37.530856,15.284712,0.553498,0.332203,205754.4,432071.142356,4.318506e-06,2e-06
row1_4_preprocessed.bmp,198,1.0,199,0.861472,204,1.0,204,0.857143,33.007453,5.509317,0.484961,0.375776,9635.18,299971.95528,4.350866e-05,7e-06
row1_33_preprocessed.bmp,249,0.573733,257,0.552688,169,0.377232,272,0.566667,30.87434,14.610348,0.481844,0.486084,31719.22,310780.046463,9.24955e-06,4e-06
row1_12_preprocessed.bmp,345,0.657143,338,0.614545,275,0.52381,275,0.5,22.854826,20.596918,0.446017,0.466593,277382.7,257123.013788,4.944256e-06,4e-06
row1_28_preprocessed.bmp,557,0.636571,519,0.593143,400,0.457143,425,0.485714,22.35192,34.198317,0.435753,0.481135,998094.2,402699.565492,1.824647e-06,3e-06
row1_16_preprocessed.bmp,386,0.508564,372,0.490119,389,0.512516,155,0.204216,29.506144,20.178187,0.438556,0.426182,155130.7,458689.450845,3.201171e-06,2e-06
row1_37_preprocessed.bmp,366,0.554545,392,0.565657,397,0.583824,442,0.619048,35.134627,17.262367,0.517191,0.406559,202773.1,551164.055103,4.656054e-06,2e-06
row1_23_preprocessed.bmp,295,0.842857,162,0.432,275,0.785714,0,0.0,20.565574,8.930328,0.399297,0.283226,40355.45,152209.852459,9.781486e-06,4e-06


In [15]:
for idx_1, row_1 in tqdm(stats.iterrows(), total=stats.shape[0]):
    distances = []
    for idx_2, row_2 in stats_reference.iterrows():
        distances.append((idx_2, 1 / (1 + euclidian_distance(np.array([row_1['Weight Density Quarter_1'],
                                                      row_1['Weight Density Quarter_2'],
                                                      row_1['Weight Density Quarter_3'],
                                                      row_1['Weight Density Quarter_4'],
                                                      row_1['Relative Center of Gravity coordinate_x'],
                                                      row_1['Relative Center of Gravity coordinate_y'],
                                                      row_1['Relative Axial Moment of Inertia_x'],
                                                      row_1['Relative Axial Moment of Inertia_y']]),
                                            np.array([row_2['Weight Density Quarter_1'],
                                                      row_2['Weight Density Quarter_2'],
                                                      row_2['Weight Density Quarter_3'],
                                                      row_2['Weight Density Quarter_4'],
                                                      row_2['Relative Center of Gravity coordinate_x'],
                                                      row_2['Relative Center of Gravity coordinate_y'],
                                                      row_2['Relative Axial Moment of Inertia_x'],
                                                      row_2['Relative Axial Moment of Inertia_y']])
                                            )))
                        )
    distances = sorted(distances, key=lambda x: x[1], reverse=True)

    stats.loc[idx_1, 'Hypothesis'] = str(distances)

100%|██████████| 39/39 [00:00<00:00, 714.71it/s]


In [16]:
stats.sort_index()

Unnamed: 0_level_0,Weight Quarter_1,Weight Density Quarter_1,Weight Quarter_2,Weight Density Quarter_2,Weight Quarter_3,Weight Density Quarter_3,Weight Quarter_4,Weight Density Quarter_4,Center of Gravity coordinate_x,Center of Gravity coordinate_y,Relative Center of Gravity coordinate_x,Relative Center of Gravity coordinate_y,Axial Moment of Inertia_x,Axial Moment of Inertia_y,Relative Axial Moment of Inertia_x,Relative Axial Moment of Inertia_y,Hypothesis
file_name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1
row1_0_preprocessed.bmp,696,0.490486,717,0.493802,661,0.45212,658,0.43984,31.754026,42.885066,0.46597,0.487036,1483852.0,923342.70571,9.345675e-07,1e-06,"[('v', 0.9438066395893265), ('x', 0.9270919509..."
row1_10_preprocessed.bmp,315,0.525,324,0.5184,338,0.541667,345,0.530769,25.378971,23.93646,0.487579,0.477843,280896.7,287951.135401,4.063886e-06,4e-06,"[('o', 0.9757619902119321), ('w', 0.9249226163..."
row1_11_preprocessed.bmp,264,0.52381,264,0.5,331,0.630476,365,0.663636,25.979575,21.547386,0.520408,0.489223,274203.3,252668.489379,5.851982e-06,5e-06,"[('u', 0.973613435670138), ('a', 0.90424917372..."
row1_12_preprocessed.bmp,345,0.657143,338,0.614545,275,0.52381,275,0.5,22.854826,20.596918,0.446017,0.466593,277382.7,257123.013788,4.944256e-06,4e-06,"[('n', 0.9789274104025067), ('m', 0.9368225563..."
row1_13_preprocessed.bmp,415,0.530691,486,0.595588,432,0.55243,551,0.675245,33.616242,24.970276,0.48681,0.521093,389070.3,741699.542994,3.291059e-06,2e-06,"[('g', 0.9567604378141485), ('u', 0.8967941122..."
row1_14_preprocessed.bmp,483,0.617647,239,0.292892,471,0.602302,439,0.53799,38.178922,19.28125,0.554909,0.397418,370631.9,541057.754902,3.737749e-06,2e-06,"[('b', 0.9780586278598412), ('h', 0.9060676413..."
row1_15_preprocessed.bmp,324,0.5184,317,0.5072,347,0.533846,336,0.516923,25.344411,23.97432,0.486888,0.468864,282151.1,288996.94864,3.897641e-06,4e-06,"[('o', 0.9738444055641872), ('w', 0.9309908411..."
row1_16_preprocessed.bmp,386,0.508564,372,0.490119,389,0.512516,155,0.204216,29.506144,20.178187,0.438556,0.426182,155130.7,458689.450845,3.201171e-06,2e-06,"[('y', 0.9237038241884793), ('p', 0.8567527419..."
row1_17_preprocessed.bmp,557,0.636571,521,0.578889,400,0.457143,425,0.472222,22.331056,34.235943,0.435328,0.474799,1000655.0,403487.434577,1.771954e-06,3e-06,"[('m', 0.9745851616232916), ('n', 0.9168831191..."
row1_18_preprocessed.bmp,386,0.508564,372,0.490119,389,0.512516,155,0.204216,29.506144,20.178187,0.438556,0.426182,155130.7,458689.450845,3.201171e-06,2e-06,"[('y', 0.9237038241884793), ('p', 0.8567527419..."


In [17]:
PREDICTED_STRING = ''

for row in ('row1', 'row2'):
    for i in range(40):
        try:
            PREDICTED_STRING += ast.literal_eval(stats.loc[row+'_'+str(i)+'_preprocessed.bmp', 'Hypothesis'])[0][0]
        except:
            continue

In [18]:
PREDICTED_STRING

'vhenlwasayoungboymyfdhertookmektothecuy'

In [19]:
import string
punct = string.punctuation

for p in string.punctuation:
    if p in INITITAL_STRING:
        INITITAL_STRING = INITITAL_STRING.replace(p, '')

INITITAL_STRING = ''.join(INITITAL_STRING.split())

In [26]:
print(PREDICTED_STRING)
print(INITITAL_STRING)

vhenlwasayoungboymyfdhertookmektothecuy
wheniwasayoungboymyfathertookmeintothecity


In [21]:
def levenshteinDistanceDP(token1, token2):
    distances = np.zeros((len(token1) + 1, len(token2) + 1))

    for t1 in range(len(token1) + 1):
        distances[t1][0] = t1

    for t2 in range(len(token2) + 1):
        distances[0][t2] = t2
        
    a = 0
    b = 0
    c = 0
    
    for t1 in range(1, len(token1) + 1):
        for t2 in range(1, len(token2) + 1):
            if (token1[t1-1] == token2[t2-1]):
                distances[t1][t2] = distances[t1 - 1][t2 - 1]
            else:
                a = distances[t1][t2 - 1]
                b = distances[t1 - 1][t2]
                c = distances[t1 - 1][t2 - 1]
                
                if (a <= b and a <= c):
                    distances[t1][t2] = a + 1
                elif (b <= a and b <= c):
                    distances[t1][t2] = b + 1
                else:
                    distances[t1][t2] = c + 1
                    
    return distances[len(token1)][len(token2)]

In [22]:
print(levenshteinDistanceDP(PREDICTED_STRING, INITITAL_STRING))

8.0


In [23]:
stats.to_csv('output/statistics.csv', sep=',', index=True)

In [24]:
for i in range(len(INITITAL_STRING)):
    if PREDICTED_STRING[i] != INITITAL_STRING[i]:
        print('Predicted: ', PREDICTED_STRING[i], 'Should be: ', INITITAL_STRING[i])
    else:
        print('okay')

Predicted:  v Should be:  w
okay
okay
okay
Predicted:  l Should be:  i
okay
okay
okay
okay
okay
okay
okay
okay
okay
okay
okay
okay
okay
okay
okay
Predicted:  d Should be:  a
Predicted:  h Should be:  t
Predicted:  e Should be:  h
Predicted:  r Should be:  e
Predicted:  t Should be:  r
Predicted:  o Should be:  t
okay
Predicted:  k Should be:  o
Predicted:  m Should be:  k
Predicted:  e Should be:  m
Predicted:  k Should be:  e
Predicted:  t Should be:  i
Predicted:  o Should be:  n
okay
Predicted:  h Should be:  o
Predicted:  e Should be:  t
Predicted:  c Should be:  h
Predicted:  u Should be:  e
Predicted:  y Should be:  c


IndexError: string index out of range