utils.py

import random
import re
import time
import math

import numpy as np
import torch
import torch.nn as nn
import torch.nn.functional as F
# from omegaconf import OmegaConf
from torch import distributions as pyd
from torch.distributions.utils import _standard_normal
from torch.distributions import Distribution, Normal


class eval_mode:
	def __init__(self, *models):
		self.models = models

	def __enter__(self):
		self.prev_states = []
		for model in self.models:
			self.prev_states.append(model.training)
			model.train(False)

	def __exit__(self, *args):
		for model, state in zip(self.models, self.prev_states):
			model.train(state)
		return False


def set_seed_everywhere(seed):
	torch.manual_seed(seed)
	if torch.cuda.is_available():
		torch.cuda.manual_seed_all(seed)
	np.random.seed(seed)
	random.seed(seed)


def chain(*iterables):
	for it in iterables:
		yield from it


def soft_update_params(net, target_net, tau):
	for param, target_param in zip(net.parameters(), target_net.parameters()):
		target_param.data.copy_(tau * param.data +
								(1 - tau) * target_param.data)


def hard_update_params(net, target_net):
	for param, target_param in zip(net.parameters(), target_net.parameters()):
		target_param.data.copy_(param.data)


def to_torch(xs, device):
	return tuple(torch.as_tensor(x, device=device) for x in xs)


def weight_init(m):
	"""Custom weight init for Conv2D and Linear layers."""
	if isinstance(m, nn.Linear):
		nn.init.orthogonal_(m.weight.data)
		if hasattr(m.bias, 'data'):
			m.bias.data.fill_(0.0)
	elif isinstance(m, nn.Conv2d) or isinstance(m, nn.ConvTranspose2d):
		gain = nn.init.calculate_gain('relu')
		nn.init.orthogonal_(m.weight.data, gain)
		if hasattr(m.bias, 'data'):
			m.bias.data.fill_(0.0)


def param_norm(params, norm_type=2.0):
	total_norm = torch.norm(
		torch.stack([torch.norm(p.detach(), norm_type) for p in params]),
		norm_type)
	return total_norm.item()


class Until:
	def __init__(self, until, action_repeat=1):
		self._until = until
		self._action_repeat = action_repeat

	def __call__(self, step):
		if self._until is None:
			return True
		until = self._until // self._action_repeat
		return step < until


class Every:
	def __init__(self, every, action_repeat=1):
		self._every = every
		self._action_repeat = action_repeat

	def change_every(self, freq):
		self._every *= freq

	def __call__(self, step):
		if self._every is None:
			return False
		every = self._every // self._action_repeat
		if step % every == 0:
			return True
		return False


class Timer:
	def __init__(self):
		self._start_time = time.time()
		self._last_time = time.time()

	def reset(self):
		elapsed_time = time.time() - self._last_time
		self._last_time = time.time()
		total_time = time.time() - self._start_time
		return elapsed_time, total_time

	def total_time(self):
		return time.time() - self._start_time


class TruncatedNormal(pyd.Normal):
	def __init__(self, loc, scale, low=-1.0, high=1.0, eps=1e-6):
		super().__init__(loc, scale, validate_args=False)
		self.low = low
		self.high = high
		self.eps = eps

	def _clamp(self, x):
		clamped_x = torch.clamp(x, self.low + self.eps, self.high - self.eps)
		x = x - x.detach() + clamped_x.detach()
		return x

	def sample(self, clip=None, sample_shape=torch.Size()):
		shape = self._extended_shape(sample_shape)
		eps = _standard_normal(shape,
							   dtype=self.loc.dtype,
							   device=self.loc.device)
		eps *= self.scale
		if clip is not None:
			eps = torch.clamp(eps, -clip, clip)
		x = self.loc + eps
		return self._clamp(x)


class TanhTransform(pyd.transforms.Transform):
	domain = pyd.constraints.real
	codomain = pyd.constraints.interval(-1.0, 1.0)
	bijective = True
	sign = +1

	def __init__(self, cache_size=1):
		super().__init__(cache_size=cache_size)

	@staticmethod
	def atanh(x):
		return atanh(x)
		# return 0.5 * (x.log1p() - (-x).log1p())

	def __eq__(self, other):
		return isinstance(other, TanhTransform)

	def _call(self, x):
		return x.tanh()

	def _inverse(self, y):
		# We do not clamp to the boundary here as it may degrade the performance of certain algorithms.
		# one should use `cache_size=1` instead
		return self.atanh(y)

	def log_abs_det_jacobian(self, x, y):
		# We use a formula that is more numerically stable, see details in the following link
		# https://github.com/tensorflow/probability/commit/ef6bb176e0ebd1cf6e25c6b5cecdd2428c22963f#diff-e120f70e92e6741bca649f04fcd907b7
		return 2. * (math.log(2.) - x - F.softplus(-2. * x))


class SquashedNormal(pyd.transformed_distribution.TransformedDistribution):
	def __init__(self, loc, scale):
		self.loc = loc
		self.scale = scale

		self.base_dist = pyd.Normal(loc, scale)
		transforms = [TanhTransform()]
		super().__init__(self.base_dist, transforms)

	@property
	def mean(self):
		mu = self.loc
		for tr in self.transforms:
			mu = tr(mu)
		return mu


class SquashedNormal2:
	def __init__(self, loc, scale):
		self.loc = loc
		self.scale = scale
		self.normal = pyd.normal.Normal(loc, scale)

	def sample(self, sample_shape=torch.Size()):
		# Gradients will and should *not* pass through this operation.
		z = self.normal.sample(sample_shape=sample_shape).detach()
		return torch.tanh(z)

	def rsample(self, sample_shape=torch.Size()):
		# Gradients will and should pass through this operation.
		z = self.normal.rsample(sample_shape=sample_shape)
		return torch.tanh(z)

	def log_prob(self, action):
		# pre_tanh_value = torch.log((1 + action) / (1 - action)) / 2  # arc-tan 函数, 还原到取 tanh 之前时候
		pre_tanh_value = atanh(action)
		logp_pi = self.normal.log_prob(pre_tanh_value).sum(dim=-1)
		if torch.isnan(logp_pi).any():
			print("gaussian error:", logp_pi, self.loc, (self.scale >= 0).all())
		# print("in log_prob 0:", action.shape, pre_tanh_value.shape)
		# print("in log_prob 1:", self.normal.log_prob(pre_tanh_value).shape, logp_pi.shape)
		logp_pi -= (2 * (np.log(2) - pre_tanh_value - F.softplus(-2 * pre_tanh_value))).sum(dim=-1)
		# print("in log_prob 2:", logp_pi.shape)
		# print("is nan 0:", torch.isnan(logp_pi).any())
		# if torch.isnan(logp_pi).any():
		# 	print("is nan 1:", torch.isnan(pre_tanh_value).any(), torch.isnan(action).any())
		# 	print("is nan 2:", torch.isnan(F.softplus(-2 * pre_tanh_value)).any())
		# 	print("is nan 3:", torch.isnan(self.normal.log_prob(pre_tanh_value)).any())
		# 	print("is nan 4:", torch.isnan(self.loc).any(), torch.isnan(self.scale).any())
		# 	print("is nan 5:", torch.isnan((2 * (np.log(2) - pre_tanh_value))).any())
		# 	print("is nan 6:", torch.isnan(-F.softplus(-2 * pre_tanh_value)).any())
		# 	print("is nan 7:", torch.isnan(-pre_tanh_value-F.softplus(-2 * pre_tanh_value)).any())
		# 	print("is nan 8:", torch.isnan(np.log(2)-pre_tanh_value-F.softplus(-2 * pre_tanh_value)).any())
		# 	print("is nan 9:", np.log(2), pre_tanh_value.shape, F.softplus(-2 * pre_tanh_value).shape)
		# 	print("is nan 10:", action.mean(), pre_tanh_value.mean(), pre_tanh_value.min(), pre_tanh_value.max(), F.softplus(-2 * pre_tanh_value).mean())
		# 	print("is nan 11:", torch.isnan(logp_pi).any())
		# 	print("")

		return logp_pi

	@property
	def mean(self):
		return torch.tanh(self.loc)


def weight_init_he_relu(m, activation='relu'):
	""" kaiming uniform init for linear layers."""
	if isinstance(m, nn.Linear):
		nn.init.kaiming_uniform_(m.weight.data, mode='fan_in', nonlinearity=activation)
		if hasattr(m.bias, 'data'):
			m.bias.data.fill_(0.01)


def weight_init_he_lrelu(m, activation='leaky_relu'):
	""" kaiming uniform init for linear layers."""
	if isinstance(m, nn.Linear):
		nn.init.kaiming_uniform_(m.weight.data, mode='fan_in', nonlinearity=activation)
		if hasattr(m.bias, 'data'):
			m.bias.data.fill_(0.01)


def fanin_init(tensor):
	size = tensor.size()
	if len(size) == 2:
		fan_in = size[0]
	elif len(size) > 2:
		fan_in = np.prod(size[1:])
	else:
		raise Exception("Shape must be have dimension at least 2.")
	bound = 1. / np.sqrt(fan_in)
	return tensor.data.uniform_(-bound, bound)


def weight_init_cql(m):
	""" kaiming uniform init for linear layers."""
	if isinstance(m, nn.Linear):
		fanin_init(m.weight)
		if hasattr(m.bias, 'data'):
			m.bias.data.fill_(0.1)


def grad_norm(params, norm_type=2.0):
	params = [p for p in params if p.grad is not None]
	# pytorch_total_params = sum(p.numel() for p in params if p.requires_grad)
	total_norm = torch.norm(
		torch.stack([torch.norm(p.grad.detach(), norm_type) for p in params]),
		norm_type)
	return total_norm.item()


def schedule(schdl, step):
	try:
		return float(schdl)
	except ValueError:
		match = re.match(r'linear\((.+),(.+),(.+)\)', schdl)
		if match:
			init, final, duration = [float(g) for g in match.groups()]
			mix = np.clip(step / duration, 0.0, 1.0)
			return (1.0 - mix) * init + mix * final
		match = re.match(r'step_linear\((.+),(.+),(.+),(.+),(.+)\)', schdl)
		if match:
			init, final1, duration1, final2, duration2 = [
				float(g) for g in match.groups()
			]
			if step <= duration1:
				mix = np.clip(step / duration1, 0.0, 1.0)
				return (1.0 - mix) * init + mix * final1
			else:
				mix = np.clip((step - duration1) / duration2, 0.0, 1.0)
				return (1.0 - mix) * final1 + mix * final2
	raise NotImplementedError(schdl)


class RandomShiftsAug(nn.Module):
	def __init__(self, pad):
		super().__init__()
		self.pad = pad

	def forward(self, x):
		x = x.float()
		n, c, h, w = x.size()
		assert h == w
		padding = tuple([self.pad] * 4)
		x = F.pad(x, padding, 'replicate')     # (n, c, h+2*pad, w+2*pad) after padding
		eps = 1.0 / (h + 2 * self.pad)
		arange = torch.linspace(-1.0 + eps,
								1.0 - eps,
								h + 2 * self.pad,
								device=x.device,
								dtype=x.dtype)[:h]
		arange = arange.unsqueeze(0).repeat(h, 1).unsqueeze(2)
		base_grid = torch.cat([arange, arange.transpose(1, 0)], dim=2)
		base_grid = base_grid.unsqueeze(0).repeat(n, 1, 1, 1)

		shift = torch.randint(0,
							  2 * self.pad + 1,
							  size=(n, 1, 1, 2),
							  device=x.device,
							  dtype=x.dtype)
		shift *= 2.0 / (h + 2 * self.pad)

		grid = base_grid + shift
		return F.grid_sample(x,
							 grid,
							 padding_mode='zeros',
							 align_corners=False)


class RMS(object):
	"""running mean and std """
	def __init__(self, device, epsilon=1e-4, shape=(1,)):
		self.M = torch.zeros(shape).to(device)
		self.S = torch.ones(shape).to(device)
		self.n = epsilon

	def __call__(self, x):
		bs = x.size(0)
		delta = torch.mean(x, dim=0) - self.M
		new_M = self.M + delta * bs / (self.n + bs)
		new_S = (self.S * self.n + torch.var(x, dim=0) * bs +
				 torch.square(delta) * self.n * bs /
				 (self.n + bs)) / (self.n + bs)

		self.M = new_M
		self.S = new_S
		self.n += bs

		return self.M, self.S


class PBE(object):
	"""particle-based entropy based on knn normalized by running mean """
	def __init__(self, rms, knn_clip, knn_k, knn_avg, knn_rms, device):
		self.rms = rms
		self.knn_rms = knn_rms
		self.knn_k = knn_k
		self.knn_avg = knn_avg
		self.knn_clip = knn_clip
		self.device = device

	def __call__(self, rep):
		source = target = rep
		b1, b2 = source.size(0), target.size(0)
		# (b1, 1, c) - (1, b2, c) -> (b1, 1, c) - (1, b2, c) -> (b1, b2, c) -> (b1, b2)
		sim_matrix = torch.norm(source[:, None, :].view(b1, 1, -1) -
								target[None, :, :].view(1, b2, -1),
								dim=-1,
								p=2)
		# sim_matrix calculate the distance between two points. shape=(b1, b2)
		# The topk returned after the topk is the topk element closest to each element in b1 in b2
		reward, _ = sim_matrix.topk(self.knn_k,
									dim=1,
									largest=False,   # False return the minimal distance
									sorted=True)     # return shape=(b1, knn_k)
		if not self.knn_avg:                         # only keep k-th nearest neighbor
			reward = reward[:, -1]                   # k-th minimal
			reward = reward.reshape(-1, 1)           # (b1, 1)
			reward /= self.rms(reward)[0] if self.knn_rms else 1.0   # regularize
			reward = torch.maximum(
				reward - self.knn_clip,
				torch.zeros_like(reward).to(self.device)
			) if self.knn_clip >= 0.0 else reward    # (b1, 1)
		else:                                        # average over all k nearest neighbors
			reward = reward.reshape(-1, 1)           # (b1 * k, 1)
			reward /= self.rms(reward)[0] if self.knn_rms else 1.0
			reward = torch.maximum(
				reward - self.knn_clip,
				torch.zeros_like(reward).to(
					self.device)) if self.knn_clip >= 0.0 else reward
			reward = reward.reshape((b1, self.knn_k))        # (b1, k)
			reward = reward.mean(dim=1, keepdim=True)        # (b1, 1) take average and return
		reward = torch.log(reward + 1.0)
		return reward


class ConstantSchedule(object):
	def __init__(self, value):
		"""Value remains constant over time.
		Parameters
		----------
		value: float
			Constant value of the schedule
		"""
		self._v = value

	def value(self, t):
		"""See Schedule.value"""
		return self._v


def linear_interpolation(l, r, alpha):
	return l + alpha * (r - l)


class PiecewiseSchedule(object):
	def __init__(self, endpoints, interpolation=linear_interpolation, outside_value=None):
		""" From OpenAI baselines
		"""
		idxes = [e[0] for e in endpoints]
		assert idxes == sorted(idxes)
		self._interpolation = interpolation
		self._outside_value = outside_value
		self._endpoints = endpoints

	def value(self, t):
		# See Schedule.value
		for (l_t, l), (r_t, r) in zip(self._endpoints[:-1], self._endpoints[1:]):
			if l_t <= t and t < r_t:
				alpha = float(t - l_t) / (r_t - l_t)
				return self._interpolation(l, r, alpha)

		# t does not belong to any of the pieces, so doom.
		assert self._outside_value is not None
		return self._outside_value


class TanhNormal(Distribution):
	"""
	Represent distribution of X where
		X ~ tanh(Z)
		Z ~ N(mean, std)

	Note: this is not very numerically stable.
	"""

	def __init__(self, normal_mean, normal_std, epsilon=1e-6):
		"""
		:param normal_mean: Mean of the normal distribution
		:param normal_std: Std of the normal distribution
		:param epsilon: Numerical stability epsilon when computing log-prob.
		"""
		self.normal_mean = normal_mean
		self.normal_std = normal_std
		self.normal = Normal(normal_mean, normal_std)
		self.epsilon = epsilon

	def sample_n(self, n, return_pre_tanh_value=False):
		z = self.normal.sample_n(n)
		if return_pre_tanh_value:
			return torch.tanh(z), z
		else:
			return torch.tanh(z)

	def log_prob(self, value, pre_tanh_value=None):
		"""
		:param value: some value, x
		:param pre_tanh_value: arctanh(x)
		:return:
		"""
		if pre_tanh_value is None:
			pre_tanh_value = torch.log((1 + value) / (1 - value)) / 2  # arc-tan
		# log of action - log(1-tanh(a)*tanh(a)+e)
		# refer https://garage.readthedocs.io/en/v2020.06.1/_modules/garage/torch/distributions/tanh_normal.html#TanhNormal.log_prob
		return self.normal.log_prob(pre_tanh_value) - torch.log(1 - value * value + self.epsilon)

	def sample(self, return_pretanh_value=False):
		"""
		Gradients will and should *not* pass through this operation.

		See https://github.com/pytorch/pytorch/issues/4620 for discussion.
		"""
		z = self.normal.sample().detach()

		if return_pretanh_value:
			return torch.tanh(z), z
		else:
			return torch.tanh(z)

	def rsample(self, return_pretanh_value=False):
		"""
		Sampling in the reparameterization case.
		"""
		z = (self.normal_mean.cuda() + self.normal_std.cuda() *
			 Normal(torch.zeros(self.normal_mean.size()).cuda(), torch.ones(self.normal_std.size()).cuda()).sample())
		z.requires_grad_()

		if return_pretanh_value:
			return torch.tanh(z), z
		else:
			return torch.tanh(z)


def atanh(x):
	one_plus_x = (1 + x).clamp(min=1e-6)
	one_minus_x = (1 - x).clamp(min=1e-6)
	return 0.5 * torch.log(one_plus_x / one_minus_x)


def total_parameters(qfs):
	if not isinstance(qfs, list):
		qfs = [qfs]
	n = 0
	for qfs_member in qfs:
		n += sum(p.numel() for p in qfs_member.parameters())
	return n


def decay_linear(t, init, minimum, total_steps):
	fraction = min(float(t) / total_steps, 1.0)
	return init + fraction * (minimum - init)