-
Notifications
You must be signed in to change notification settings - Fork 124
/
Copy pathutils.py
105 lines (83 loc) · 3.34 KB
/
utils.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
import torch
import torch.nn as nn
import torch.nn.functional as F
import torchvision
import torchvision.transforms as transforms
import torchvision.models as models
import cv2
import numpy as np
from PIL import Image
import matplotlib.pyplot as plt
cnn_normalization_mean = [0.485, 0.456, 0.406]
cnn_normalization_std = [0.229, 0.224, 0.225]
tensor_normalizer = transforms.Normalize(mean=cnn_normalization_mean, std=cnn_normalization_std)
epsilon = 1e-5
def preprocess_image(image, target_width=None):
"""输入 PIL.Image 对象,输出标准化后的四维 tensor"""
if target_width:
t = transforms.Compose([
transforms.Resize(target_width),
transforms.CenterCrop(target_width),
transforms.ToTensor(),
tensor_normalizer,
])
else:
t = transforms.Compose([
transforms.ToTensor(),
tensor_normalizer,
])
return t(image).unsqueeze(0)
def image_to_tensor(image, target_width=None):
"""输入 OpenCV 图像,范围 0~255,BGR 顺序,输出标准化后的四维 tensor"""
image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
image = Image.fromarray(image)
return preprocess_image(image, target_width)
def read_image(path, target_width=None):
"""输入图像路径,输出标准化后的四维 tensor"""
image = Image.open(path)
return preprocess_image(image, target_width)
def recover_image(tensor):
"""输入 GPU 上的四维 tensor,输出 0~255 范围的三维 numpy 矩阵,RGB 顺序"""
image = tensor.detach().cpu().numpy()
image = image * np.array(cnn_normalization_std).reshape((1, 3, 1, 1)) + \
np.array(cnn_normalization_mean).reshape((1, 3, 1, 1))
return (image.transpose(0, 2, 3, 1) * 255.).clip(0, 255).astype(np.uint8)[0]
def recover_tensor(tensor):
m = torch.tensor(cnn_normalization_mean).view(1, 3, 1, 1).to(tensor.device)
s = torch.tensor(cnn_normalization_std).view(1, 3, 1, 1).to(tensor.device)
tensor = tensor * s + m
return tensor.clamp(0, 1)
def imshow(tensor, title=None):
"""输入 GPU 上的四维 tensor,然后绘制该图像"""
image = recover_image(tensor)
print(image.shape)
plt.imshow(image)
if title is not None:
plt.title(title)
def mean_std(features):
"""输入 VGG16 计算的四个特征,输出每张特征图的均值和标准差,长度为1920"""
mean_std_features = []
for x in features:
x = x.view(*x.shape[:2], -1)
x = torch.cat([x.mean(-1), torch.sqrt(x.var(-1) + epsilon)], dim=-1)
n = x.shape[0]
x2 = x.view(n, 2, -1).transpose(2, 1).contiguous().view(n, -1) # 【mean, ..., std, ...] to [mean, std, ...]
mean_std_features.append(x2)
mean_std_features = torch.cat(mean_std_features, dim=-1)
return mean_std_features
class Smooth:
# 对输入的数据进行滑动平均
def __init__(self, windowsize=100):
self.window_size = windowsize
self.data = np.zeros((self.window_size, 1), dtype=np.float32)
self.index = 0
def __iadd__(self, x):
if self.index == 0:
self.data[:] = x
self.data[self.index % self.window_size] = x
self.index += 1
return self
def __float__(self):
return float(self.data.mean())
def __format__(self, f):
return self.__float__().__format__(f)