In [1]:
from ultralytics import YOLO
import cv2
import supervision as sv
import numpy as np
import torch
import math
import plotly.io as pio
from PIL import Image
from segment_anything import SamAutomaticMaskGenerator, SamPredictor, sam_model_registry
from skimage.metrics import structural_similarity as SSIM
import torchvision.transforms as transforms
import lpips
from statistics import mean
from ignite.metrics import FID
from torchvision.transforms import functional as F
import pandas as pd
import plotly.graph_objects as go
from plotly.subplots import make_subplots
import warnings
import random
warnings.filterwarnings("ignore", category=DeprecationWarning)

In [2]:
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

In [3]:
model = YOLO('/home/nikita/Desktop/diploma/Diploma/models/yolov8n-face.pt')  # load a pretrained YOLOv8n detection model
#model.train(data='coco128.yaml', epochs=3)  # train the model
#model('https://ultralytics.com/images/bus.jpg')  # predict on an image

In [4]:
sam = sam_model_registry["vit_l"](checkpoint="/home/nikita/Desktop/diploma/Diploma/models/sam_vit_l_0b3195.pth").to(device=device)
#predictor = SamPredictor(sam)
mask_generator = SamAutomaticMaskGenerator(sam)
mask_annotator = sv.MaskAnnotator()

In [13]:
image = Image.open('/home/nikita/Desktop/diploma/norm/sample/s3_mask.jpg')

In [14]:
model.task='detect'
results = model.predict(image, imgsz=image.size,
                        save=False,
                        device=device,
                        hide_conf=True, hide_labels=True, conf=0.25, line_width=1)


0: 1280x1280 1 face, 10.1ms
Speed: 10.9ms preprocess, 10.1ms inference, 1.3ms postprocess per image at shape (1, 3, 1280, 1280)


In [15]:
arr_of_boxes=[]
boxes = results[0].boxes
n=boxes.shape[0]


for i in range(n):
    #     # вот тут надо понять как все же картинку центрировать
    # arr[0] *= 0.95  # сдвиг влево
    # arr[2] *= 0.95
    arr=boxes.xyxy[i].cpu().numpy()
    
    if arr[2]-arr[0] > arr[3]-arr[1]:
        arr[3]=math.ceil(arr[1]+(arr[2]-arr[0]))
    else:
        arr[2]=math.ceil(arr[0]+(arr[3]-arr[1]))

    # arr[:2] *= 0.9
    # arr[-2:] *= 1.1
    
    arr[0]-=(arr[2]-arr[0])*0.3
    # arr[2]+=(arr[2]-arr[0])*0.1
    arr[1]-=(arr[3]-arr[1])*0.15
    arr[3]+=(arr[3]-arr[1])*0.15

    arr_of_boxes.append(arr)

In [16]:
arr_of_box_images=[]
for i in range(n):
    im=image.crop(arr_of_boxes[i])
    x=min(im.size)
    cropped_image = cv2.cvtColor(np.array(im.resize((x, x))), cv2.COLOR_BGR2RGB)
    arr_of_box_images.append(cropped_image)
    # cropped_image.save(f'/home/nikita/Desktop/diploma/inference/sample/sample_{i}.png')

len(arr_of_box_images)

1

In [None]:
for i in range(n):
    image_bgr = arr_of_box_images[i]
    sam_result = mask_generator.generate(image_bgr)
    #detections = sv.Detections.from_sam(sam_result=sam_result)
    #annotated_image = mask_annotator.annotate(scene=image_bgr.copy(), detections=detections)
    masks = [mask['segmentation'] for mask in sorted(sam_result, key=lambda x: x['area'], reverse=True)]
    sv.plot_images_grid(
    images=masks[:7],
    grid_size=(1, 7),
    size=(16, 16)
    )
    print('Выберите часть изображения, которую хотите исправить:')
    j=int(input())
    _, binary_mask = cv2.threshold(np.uint8(masks[j-1])*255, 127, 255, cv2.THRESH_BINARY)

    cv2.imwrite(f'/home/nikita/Desktop/diploma/inference/image/sample_{i}.png', image_bgr)
    cv2.imwrite(f'/home/nikita/Desktop/diploma/inference/mask/sample_{i}.png', binary_mask)
    

Далее необходимо запустить python file bat_sample.py для получения итоговых реконструкций

Считаем метрики

In [3]:
lpips_metric = lpips.LPIPS(net='alex')
baseline_minimax_metrics = [] # [PSNR_max, SSIM_max, LPIPS_min]
baseline_mean_metrics = [] # [PSNR_mean, SSIM_mean, LPIPS_mean]


for i in range(100):
    psnr_arr = [] #[sample_1, sample_2, sample_3, sample_4]
    ssim_arr = [] #[sample_1, sample_2, sample_3, sample_4]
    lpips_arr = []  #[sample_1, sample_2, sample_3, sample_4]
    img1=cv2.imread(f'/home/nikita/Desktop/diploma/inference/image/{i}.png')
    img1 = cv2.resize(img1, (256, 256))
    img1_ten = transforms.ToTensor()(img1)
    
    for j in range(4):
        img2=cv2.imread(f'/home/nikita/Desktop/diploma/inference/base_res/{i}_{j}.png')
        img2_ten = transforms.ToTensor()(img2)
        # print("PSNR:", cv2.PSNR(img1, img2))
        psnr_arr.append(cv2.PSNR(img1, img2))
        # print("SSIM:", metrics.structural_similarity(img1, img2, multichannel=True, channel_axis=2))
        ssim_arr.append(SSIM(img1, img2, multichannel=True, channel_axis=2))
        # print("LPIPS:", lpips_metric(img1_ten, img2_ten).item())
        lpips_arr.append(lpips_metric(img1_ten, img2_ten).item())

    baseline_mean_metrics.append([sum(psnr_arr) / len(psnr_arr), sum(ssim_arr) / len(ssim_arr), sum(lpips_arr) / len(lpips_arr)])
    baseline_minimax_metrics.append([max(psnr_arr),max(ssim_arr), min(lpips_arr)])             


Setting up [LPIPS] perceptual loss: trunk [alex], v[0.1], spatial [off]
Loading model from: /home/nikita/miniconda3/envs/dev/lib/python3.10/site-packages/lpips/weights/v0.1/alex.pth


In [2]:
lpips_metric = lpips.LPIPS(net='alex')
trained_minimax_metrics = [] # [PSNR_max, SSIM_max, LPIPS_min, FID_score]
trained_mean_metrics = [] # [PSNR_mean, SSIM_mean, LPIPS_mean, FID_score]


for i in range(100):
    psnr_arr = [] #[sample_1, sample_2, sample_3, sample_4]
    ssim_arr = [] #[sample_1, sample_2, sample_3, sample_4]
    lpips_arr = []  #[sample_1, sample_2, sample_3, sample_4]
    img1=cv2.imread(f'/home/nikita/Desktop/diploma/inference/image/{i}.png')
    img1 = cv2.resize(img1, (256, 256))
    img1_ten = transforms.ToTensor()(img1)
    
    for j in range(4):
        img2=cv2.imread(f'/home/nikita/Desktop/diploma/inference/midle_res/{i}_{j}.png')
        img2_ten = transforms.ToTensor()(img2)
        # print("PSNR:", cv2.PSNR(img1, img2))
        psnr_arr.append(cv2.PSNR(img1, img2))
        # print("SSIM:", metrics.structural_similarity(img1, img2, multichannel=True, channel_axis=2))
        ssim_arr.append(SSIM(img1, img2, multichannel=True, channel_axis=2))
        # print("LPIPS:", lpips_metric(img1_ten, img2_ten).item())
        lpips_arr.append(lpips_metric(img1_ten, img2_ten).item())

    trained_mean_metrics.append([sum(psnr_arr) / len(psnr_arr), sum(ssim_arr) / len(ssim_arr), sum(lpips_arr) / len(lpips_arr)])
    trained_minimax_metrics.append([max(psnr_arr),max(ssim_arr), min(lpips_arr)])             


Setting up [LPIPS] perceptual loss: trunk [alex], v[0.1], spatial [off]




Loading model from: /home/nikita/miniconda3/envs/dev/lib/python3.10/site-packages/lpips/weights/v0.1/alex.pth


In [23]:
lpips_metric = lpips.LPIPS(net='alex')
trained_minimax_metrics = [] # [PSNR_max, SSIM_max, LPIPS_min, FID_score]
trained_mean_metrics = [] # [PSNR_mean, SSIM_mean, LPIPS_mean, FID_score]


for i in range(100):
    psnr_arr = [] #[sample_1, sample_2, sample_3, sample_4]
    ssim_arr = [] #[sample_1, sample_2, sample_3, sample_4]
    lpips_arr = []  #[sample_1, sample_2, sample_3, sample_4]
    img1=cv2.imread(f'/home/nikita/Desktop/diploma/inference/image/{i}.png')
    img1 = cv2.resize(img1, (256, 256))
    img1_ten = transforms.ToTensor()(img1)
    
    for j in range(4):
        img2=cv2.imread(f'/home/nikita/Desktop/diploma/inference/final_res/{i}_{j}.png')
        img2_ten = transforms.ToTensor()(img2)
        # print("PSNR:", cv2.PSNR(img1, img2))
        psnr_arr.append(cv2.PSNR(img1, img2))
        # print("SSIM:", metrics.structural_similarity(img1, img2, multichannel=True, channel_axis=2))
        ssim_arr.append(SSIM(img1, img2, multichannel=True, channel_axis=2))
        # print("LPIPS:", lpips_metric(img1_ten, img2_ten).item())
        lpips_arr.append(lpips_metric(img1_ten, img2_ten).item())

    trained_mean_metrics.append([sum(psnr_arr) / len(psnr_arr), sum(ssim_arr) / len(ssim_arr), sum(lpips_arr) / len(lpips_arr)])
    trained_minimax_metrics.append([max(psnr_arr),max(ssim_arr), min(lpips_arr)])             


Setting up [LPIPS] perceptual loss: trunk [alex], v[0.1], spatial [off]
Loading model from: /home/nikita/miniconda3/envs/dev/lib/python3.10/site-packages/lpips/weights/v0.1/alex.pth


In [4]:
df = pd.DataFrame({'PSNR baseline mean': pd.Series([round(x[0], 3) for x in baseline_mean_metrics]),
                    'PSNR trained model mean': pd.Series([x[0] for x in trained_mean_metrics]),
                    'PSNR baseline max': pd.Series([x[0] for x in baseline_minimax_metrics]),
                    'PSNR trained model max': pd.Series([x[0] for x in trained_minimax_metrics]),
                    'SSIM baseline mean': pd.Series([round(x[1], 3) for x in baseline_mean_metrics]),
                    'SSIM trained model mean': pd.Series([x[1] for x in trained_mean_metrics]),
                    'SSIM baseline max': pd.Series([x[1] for x in baseline_minimax_metrics]),
                    'SSIM trained model max': pd.Series([x[1] for x in trained_minimax_metrics]),
                    'LPIPS baseline mean': pd.Series([round(x[2], 3) for x in baseline_mean_metrics]),
                    'LPIPS trained model mean': pd.Series([x[2] for x in trained_mean_metrics]),
                    'LPIPS baseline max': pd.Series([x[2] for x in baseline_minimax_metrics]),
                    'LPIPS trained model max': pd.Series([x[2] for x in trained_minimax_metrics])}).reset_index(drop=True).reset_index().rename(columns={'index': 'num'})

In [3]:
fig = make_subplots(specs=[[{"secondary_y": True}]])

fig.add_trace(
    go.Bar(
        x=['полное среднее', 'наилучшее сгенерированное'],
        y=[df['PSNR baseline mean'].mean(), df['PSNR baseline max'].mean()],
        name='BAT-Fill',
        text=[round(val, 2) for val in [df['PSNR baseline mean'].mean(), df['PSNR baseline max'].mean()]],
        textposition='auto',
        marker_color='orange'
        ),
    secondary_y=False
)

fig.add_trace(
    go.Bar(
        x=['полное среднее', 'наилучшее сгенерированное'],
        y=[df['PSNR trained model mean'].mean(), df['PSNR trained model max'].mean()],
        name='masked VAE-BAT-Fill',
        text=[round(val, 2) for val in [df['PSNR trained model mean'].mean(), df['PSNR trained model max'].mean()]],
        textposition='auto',
        marker_color='rgb(69,0,102)'
        ),
    secondary_y=False
)

fig.update_layout(
        title={
            'text': "PSNR",
            'x': 0.5,
            'xanchor': 'center',
            'font': {'size': 64}},
        legend={
            'x': 0.75,
            'y': 1.5,
            'font': {'size': 32}},
        font=dict(
            family="Courier New, monospace",
            size=32
        )
        )
# fig.update_xaxes(title='Усреднение меры на 4 изображениях', showticklabels=False, tickfont=dict(size=32))
fig.update_yaxes(title='Значение меры', type='log')
fig.show()

print('Среднеквадратичное отклонение для базовой модели', round(np.std(df['PSNR baseline mean']), 2),'-для полного среднего, ', round(np.std(df['PSNR baseline max']), 2), '-для максимального')
print('Среднеквадратичное отклонение для дообученной модели', round(np.std(df['PSNR trained model mean']), 2),'-для полного среднего, ', round(np.std(df['PSNR trained model max']), 2), '-для максимального')
print('Прирост качества модели:', round((df['PSNR trained model mean'].mean() - df['PSNR baseline mean'].mean())/df['PSNR baseline mean'].mean(), 3)*100,'% - для полного среднего, ' ,
       round((df['PSNR trained model max'].mean() - df['PSNR baseline max'].mean())/df['PSNR baseline max'].mean(), 3)*100, '% - для максимального')

Среднеквадратичное отклонение для базовой модели 1.98 -для полного среднего,  2.2 -для максимального
Среднеквадратичное отклонение для дообученной модели 2.16 -для полного среднего,  2.26 -для максимального
Прирост качества модели: 4.8 % - для полного среднего,  4.5 % - для максимального


In [4]:
fig = make_subplots(specs=[[{"secondary_y": True}]])

fig.add_trace(
    go.Bar(
        x=['полное среднее', 'наилучшее сгенерированное'],
        y=[df['SSIM baseline mean'].mean(), df['SSIM baseline max'].mean()],
        name='BAT-Fill',
        text=[round(val, 3) for val in [df['SSIM baseline mean'].mean(), df['SSIM baseline max'].mean()]],
        textposition='auto',
        marker_color='orange'
        ),
    secondary_y=False
)

fig.add_trace(
    go.Bar(
        x=['полное среднее', 'наилучшее сгенерированное'],
        y=[df['SSIM trained model mean'].mean(), df['SSIM trained model max'].mean()],
        name='masked VAE-BAT-Fill',
        text=[round(val, 3) for val in [df['SSIM trained model mean'].mean(), df['SSIM trained model max'].mean()]],
        textposition='auto',
        marker_color='rgb(69,0,102)'
        ),
    secondary_y=False
)

fig.update_layout(
        title={
            'text': "SSIM",
            'x': 0.5,
            'xanchor': 'center',
            'font': {'size': 64}},
        legend={
            'x': 0.75,
            'y': 1.5,
            'font': {'size': 32}},
        font=dict(
            family="Courier New, monospace",
            size=32
        )
        )
# fig.update_xaxes(title='Усреднение меры на 4 изображениях', showticklabels=False, tickfont=dict(size=32))
fig.update_yaxes(title='Значение меры', type='log')
fig.show()

print('Среднеквадратичное отклонение базовой модели', round(np.std(df['SSIM baseline mean']), 3),' - для полного среднего, ', round(np.std(df['SSIM baseline max']), 3), ' - для наилучшего сгенерированного')
print('Среднеквадратичное отклонение дообученной модели', round(np.std(df['SSIM trained model mean']), 3),'-для полного среднего, ', round(np.std(df['SSIM trained model max']), 3), ' - для наилучшего сгенерированного')
print('Прирост качества модели:', round((df['SSIM trained model mean'].mean() - df['SSIM baseline mean'].mean())/df['SSIM baseline mean'].mean(), 3)*100,'$\%$ - для полного среднего, ' ,
       round((df['SSIM trained model max'].mean() - df['SSIM baseline max'].mean())/df['SSIM baseline max'].mean(), 3)*100, '$\%$ - для наилучшего сгенерированного')

Среднеквадратичное отклонение базовой модели 0.024  - для полного среднего,  0.025  - для наилучшего сгенерированного
Среднеквадратичное отклонение дообученной модели 0.024 -для полного среднего,  0.024  - для наилучшего сгенерированного
Прирост качества модели: 1.7000000000000002 $\%$ - для полного среднего,  1.6 $\%$ - для наилучшего сгенерированного


In [5]:
fig = make_subplots(specs=[[{"secondary_y": True}]])

fig.add_trace(
    go.Bar(
        x=['полное среднее', 'наилучшее сгенерированное'],
        y=[df['LPIPS baseline mean'].mean(), df['LPIPS baseline max'].mean()],
        name='BAT-Fill',
        text=[round(val, 3) for val in [df['LPIPS baseline mean'].mean(), df['LPIPS baseline max'].mean()]],
        textposition='auto',
        marker_color='orange'
        ),
    secondary_y=False
)

fig.add_trace(
    go.Bar(
        x=['полное среднее', 'наилучшее сгенерированное'],
        y=[df['LPIPS trained model mean'].mean(), df['LPIPS trained model max'].mean()],
        name='masked VAE-BAT-Fill',
        text=[round(val, 3) for val in [df['LPIPS trained model mean'].mean(), df['LPIPS trained model max'].mean()]],
        textposition='auto',
        marker_color='rgb(69,0,102)'
        ),
    secondary_y=False
)

fig.update_layout(
        title={
            'text': "LPIPS",
            'x': 0.5,
            'xanchor': 'center',
            'font': {'size': 64}},
        legend={
            'x': 0.75,
            'y': 1.5,
            'font': {'size': 32}},
        font=dict(
            family="Courier New, monospace",
            size=32
        )
        )
# fig.update_xaxes(title='Усреднение меры на 4 изображениях', showticklabels=False, tickfont=dict(size=32))
fig.update_yaxes(title='Значение меры', type='log')
fig.show()

print('Среднеквадратичное отклонение для базовой модели', round(np.std(df['LPIPS baseline mean']), 3),' - для полного среднего, ', round(np.std(df['LPIPS baseline max']), 3), ' - для наилучшего сгенерированного')
print('Среднеквадратичное отклонение для дообученной модели', round(np.std(df['LPIPS trained model mean']), 3),' - для полного среднего, ', round(np.std(df['LPIPS trained model max']), 3), ' - для наилучшего сгенерированного')
print('Прирост качества модели:', round((df['LPIPS baseline mean'].mean() - df['LPIPS trained model mean'].mean())/df['LPIPS baseline mean'].mean(), 3)*100,'$\%$ - для полного среднего, ' ,
       round((df['LPIPS baseline max'].mean() - df['LPIPS trained model max'].mean())/df['LPIPS baseline max'].mean(), 3)*100, '$\%$ - для наилучшего сгенерированного')

Среднеквадратичное отклонение для базовой модели 0.015  - для полного среднего,  0.015  - для наилучшего сгенерированного
Среднеквадратичное отклонение для дообученной модели 0.015  - для полного среднего,  0.014  - для наилучшего сгенерированного
Прирост качества модели: 19.8 $\%$ - для полного среднего,  18.2 $\%$ - для наилучшего сгенерированного


Старое

In [None]:
df = pd.DataFrame({'PSNR baseline mean': pd.Series([round(x[0], 3) for x in baseline_mean_metrics]).rolling(window=4).mean()[3::4],
                    'PSNR trained model mean': pd.Series([x[0] for x in trained_mean_metrics]).rolling(window=4).mean()[3::4],
                    'PSNR baseline max': pd.Series([x[0] for x in baseline_minimax_metrics]).rolling(window=4).max()[3::4],
                    'PSNR trained model max': pd.Series([x[0] for x in trained_minimax_metrics]).rolling(window=4).max()[3::4],
                    'SSIM baseline mean': pd.Series([round(x[1], 3) for x in baseline_mean_metrics]).rolling(window=4).mean()[3::4],
                    'SSIM trained model mean': pd.Series([x[1] for x in trained_mean_metrics]).rolling(window=4).mean()[3::4],
                    'SSIM baseline max': pd.Series([x[1] for x in baseline_minimax_metrics]).rolling(window=4).max()[3::4],
                    'SSIM trained model max': pd.Series([x[1] for x in trained_minimax_metrics]).rolling(window=4).max()[3::4],
                    'LPIPS baseline mean': pd.Series([round(x[2], 3) for x in baseline_mean_metrics]).rolling(window=4).mean()[3::4],
                    'LPIPS trained model mean': pd.Series([x[2] for x in trained_mean_metrics]).rolling(window=4).mean()[3::4],
                    'LPIPS baseline max': pd.Series([x[2] for x in baseline_minimax_metrics]).rolling(window=4).min()[3::4],
                    'LPIPS trained model max': pd.Series([x[2] for x in trained_minimax_metrics]).rolling(window=4).min()[3::4]
                    }).reset_index(drop=True).reset_index().rename(columns={'index': 'num'})