In [2]:
from torch.autograd.function import Function
import torch
import numpy as np

# 1. 自己编写function以探究autograd

In [5]:
class Exp(Function):                    # 此层计算e^x

    @staticmethod
    def forward(ctx, x, coeff):                # 模型前向
        result = x.exp()
        # 保存backward需要的内容，正向计算的结果被保存在saved_tensors元组中，注意此处仅能保存tensor类型变量，
        ctx.save_for_backward(result)
        # 其余类型变量（Int等），可直接赋予ctx作为成员变量，也可以达到保存效果
        ctx.coeff = coeff

        return result


    @staticmethod
    def backward(ctx, grad_output):     # 梯度反传
        result, = ctx.saved_tensors     # 取出forward中保存的result
        # 计算梯度并返回，注意乘系数
        # 返回值是2个：因为要和forward的输入个数和顺序保持一致
        return grad_output * result*ctx.coeff, None

# 尝试使用
x = torch.tensor([1.], requires_grad=True)  # 需要设置tensor的requires_grad属性为True，才会进行梯度反传
ret = Exp.apply(x,0.1)                          # 使用apply方法调用自定义autograd function
print(ret)                                  # tensor([2.7183], grad_fn=<ExpBackward>)
ret.backward()                              # 反传梯度，得到乘了系数的结果
print(x.grad)                               # tensor([0.27183])


tensor([2.7183], grad_fn=<ExpBackward>)
tensor([0.2718])


# 2. mask的用法
Here's a simple example of a custom loss function that computes the mean squared error (MSE) between two tensors, **but only for the elements that are greater than a certain threshold**:

In [88]:
# 原始版本
def custom_loss_fn_1(x, y, threshold=0.5):
    loss = torch.tensor([0.],requires_grad=True)
    mask = (y > threshold).int()
    mse = (diff*diff).mean()
    for i in range(len(x)):
        diff = x[i] - y[i]
        mse = (diff * diff).mean()
        print(mse)
        loss = loss + mse

    masked_mse = loss/len(x) * mask         # 作mask
    return masked_mse

x = torch.tensor([0.2, 0.6, 0.8], requires_grad=True)
y = torch.tensor([0.3, 0.7, 1.8])

loss = custom_loss_fn_1(x, y, threshold=0.5)
loss.backward(torch.ones_like(loss))        # torch.ones_like(loss)相当于sum

print(x.grad)

tensor(0.0100, grad_fn=<MeanBackward0>)
tensor(0.0100, grad_fn=<MeanBackward0>)
tensor(1.0000, grad_fn=<MeanBackward0>)
tensor([-0.1333, -0.1333, -1.3333])


In [85]:
# loss?
def custom_loss_fn(x, y, threshold=0.5):
    loss = torch.tensor([])
    mask = (x > threshold).int()
    for i in range(len(x)):
        diff = x[i] - y[i]
        mse = (diff * diff).mean()
        loss = torch.cat([loss,mse])
        # loss = loss + mse

    masked_mse = loss/len(x) * mask         # 作mask
    return masked_mse

x = torch.tensor([0.2, 0.6, 0.8], requires_grad=True)
y = torch.tensor([0.3, 0.7, 0.9], requires_grad=True)

loss = custom_loss_fn(x, y, threshold=0.5)
loss.backward(torch.ones_like(loss))        # torch.ones_like(loss)相当于sum

print(x.grad)
print(y.grad)

RuntimeError: zero-dimensional tensor (at position 1) cannot be concatenated

In [36]:
def custom_loss_fn(x, y, threshold=0.5):
        diff = x - y
        mask = (x > threshold).int()
        mse = (diff * diff).mean()
        # mask的作用是：只保留部分值的loss贡献，不是所有的
        # 用mask？mask = (x > T).int()
        masked_mse = mse * mask         # 作mask
        print(masked_mse)
        return masked_mse

x = torch.tensor([0.2, 0.6, 0.8], requires_grad=True)
y = torch.tensor([0.3, 0.7, 0.9], requires_grad=True)

loss = custom_loss_fn(x, y, threshold=0.5)
loss.backward(torch.ones_like(loss))        # torch.ones_like(loss)相当于sum

print(x.grad)
print(y.grad)

tensor([0.0000, 0.0100, 0.0100], grad_fn=<MulBackward0>)
tensor([-0.1333, -0.1333, -0.1333])
tensor([0.1333, 0.1333, 0.1333])


# 3. logistic_function的两个写法？

In [47]:
def logistic_function(x):
#     x = np.float64(x)
    return 1.0 / (1.0 + torch.exp(-x))

In [48]:
def logistic_function_2(x):
    return .5 * (1 + torch.tanh(.5 * x))


In [53]:
input = torch.tensor([1.,2.,-100.,-200.,-500.,-10000.])
ans1 = [logistic_function(i) for i in input]
ans2 = [logistic_function_2(i) for i in input]

In [54]:
print(ans1)
print(ans2)

[tensor(0.7311), tensor(0.8808), tensor(0.), tensor(0.), tensor(0.), tensor(0.)]
[tensor(0.7311), tensor(0.8808), tensor(0.), tensor(0.), tensor(0.), tensor(0.)]


# 4. Standardize with sklearn

In [65]:
from sklearn.preprocessing import StandardScaler
scaler = StandardScaler()

data = np.array([1.,2.,3.])
data_st = scaler.fit_transform(data.reshape(-1, 1))

data_st,(data-data.mean())/data.std()

(array([[-1.22474487],
        [ 0.        ],
        [ 1.22474487]]),
 array([-1.22474487,  0.        ,  1.22474487]))

## 4.1其他标准化方式

In [73]:
import numpy as np
import math

'''
around(arr,decimals=?)？表示保留多少位小数
'''


class Datanorm:
    def __init__(self):
        self.arr = np.array([3.8880657774004e-05,3.911735146961352e-05,3.935547427214615e-05,3.9595034658223214e-05,3.983604115299071e-05,4.0078502330276094e-05,4.032242681230033e-05,4.056782327105409e-05,4.08147004273422e-05,4.106306705171421e-05,4.131293196472811e-05,4.156430403710224e-05,4.181719218953348e-05,4.2071605393624246e-05,4.232755267180993e-05,4.258504309750736e-05,4.284408579615e-05,4.310468994411414e-05,4.336686477008516e-05,4.3630619554980406e-05,4.389596363209313e-05,4.416290638734643e-05,4.4431457259435226e-05,4.470162574152024e-05,4.49734213784841e-05,4.524685377039804e-05,4.552193257077433e-05,4.579866748825603e-05,4.607706828453511e-05,4.6357144778146974e-05,4.663890684105586e-05,4.6922364402114375e-05,4.720752744453417e-05,4.74944060086777e-05,4.77830101906356e-05,4.807335014335304e-05,4.836543607587029e-05,4.865927825533354e-05,4.895488700578967e-05,4.9252272708642935e-05,4.9551445803886e-05,4.985241678966738e-05,5.015519622241176e-05,5.045979471716097e-05,5.076622294868913e-05,5.107449165095444e-05,5.13846116168823e-05,5.1696593700473794e-05,5.201044881558893e-05,5.23261879363906e-05,5.264382209800877e-05,5.296336239676013e-05,5.328481999058783e-05,5.360820609861397e-05,5.3933532002462666e-05,5.426080904625261e-05,5.4590048636366816e-05,5.492126224210687e-05,5.52544613956814e-05,5.558965769407513e-05,5.592686279637704e-05,5.626608842697511e-05,5.660734637409948e-05,5.695064849135307e-05,5.729600669647328e-05,5.764343297330226e-05,5.799293937087752e-05,5.834453800440261e-05,5.869824105510951e-05,5.905406077078314e-05,5.941200946628446e-05,5.977209952340767e-05,6.013434339195295e-05,6.049875358847408e-05,6.08653426986752e-05,6.123412337604427e-05,6.16051083430293e-05,6.197831039088511e-05,6.235374238062464e-05,6.273141724230907e-05,6.311134797654816e-05,6.349354765389741e-05,6.387802941535917e-05,6.426480647354526e-05,6.465389211129438e-05,6.504529968382584e-05,6.543904261790584e-05,6.583513441244929e-05,6.62335886385666e-05,6.663441894093485e-05,6.703763903673512e-05,6.744326271646695e-05,6.785130384520212e-05,6.826177636118454e-05,6.867469427774272e-05,6.909007168243771e-05,6.95079227381976e-05,6.992826168323431e-05,7.035110283117927e-05,7.077646057221066e-05,7.120434937274237e-05,7.16347837750005e-05,7.206777839935804e-05,7.250334794269167e-05,7.294150717960724e-05,7.338227096322074e-05,7.38256542243914e-05,7.427167197216658e-05,7.4720339295659e-05,7.517167136227857e-05,7.562568341905265e-05,7.608239079339123e-05,7.654180889263553e-05,7.700395320470764e-05,7.746883929776427e-05,7.793648282227482e-05,7.840689950934554e-05,7.888010517224037e-05,7.935611570602337e-05,7.98349470890732e-05,8.031661538172729e-05,8.080113672734937e-05,8.128852735350396e-05,8.177880357081213e-05,8.227198177423027e-05,8.276807844322277e-05,8.326711014182155e-05,8.376909351912329e-05,8.427404530967306e-05,8.478198233351428e-05,8.529292149645568e-05,8.580687979165691e-05,8.632387429790562e-05,8.684392218119638e-05,8.736704069465353e-05,8.789324717944183e-05,8.842255906446199e-05,8.895499386648338e-05,8.949056919115397e-05,9.002930273290542e-05,9.057121227518492e-05,9.11163156911234e-05,9.166463094364986e-05,9.221617608549162e-05,9.277096926005128e-05,9.332902870128916e-05,9.38903727344823e-05,9.4455019775659e-05,9.502298833311972e-05,9.55942970065337e-05,9.616896448845128e-05,9.674700956317252e-05,9.732845110891091e-05,9.791330809566393e-05,9.850159958802754e-05,9.909334474382817e-05,9.968856281494916e-05,0.0001002872731477,0.000100889495183,0.0001014952484567,0.0001021045525997,0.0001027174273385,0.0001033338924955,0.0001039539679888,0.0001045776738334,0.0001052050301402,0.0001058360571181,0.0001064707750718,0.0001071092044048,0.0001077513656168,0.0001083972793075,0.0001090469661728,0.0001097004470087,0.0001103577427091,0.0001110188742675,0.0001116838627763,0.0001123527294278,0.0001130254955142,0.0001137021824271,0.0001143828116596,0.0001150674048043,0.0001157559835551,0.000116448569707,0.0001171451851564,0.000117845851901,0.0001185505920403,0.0001192594277762,0.0001199723814122,0.000120689475355,0.0001214107321131,0.0001221361742989,0.0001228658246271,0.0001235997059163,0.0001243378410881,0.0001250802531684,0.0001258269652865,0.0001265780006761,0.0001273333826754,0.0001280931347269,0.0001288572803776,0.0001296258432799,0.0001303988471908,0.0001311763159728,0.0001319582735935,0.0001327447441263,0.0001335357517505,0.0001343313207507,0.0001351314755178,0.0001359362405493,0.0001367456404485,0.0001375596999249,0.0001383784437956,0.0001392018969834,0.0001400300845179,0.0001408630315366,0.0001417007632832,0.0001425433051086,0.0001433906824714,0.0001442429209374,0.0001451000461793,0.0001459620839781,0.0001468290602218,0.0001477010009062,0.0001485779321349,0.0001494598801193,0.0001503468711787,0.0001512389317399,0.0001521360883383,0.0001530383676163,0.0001539457963253,0.0001548584013237,0.0001557762095784,0.0001566992481644,0.0001576275442649,0.0001585611251698,0.0001595000182788,0.0001604442510983,0.0001613938512428,0.0001623488464349,0.0001633092645052,0.0001642751333915,0.0001652464811397,0.0001662233359031,0.0001672057259428,0.0001681936796273,0.0001691872254317,0.0001701863919399,0.0001711912078415,0.0001722017019333,0.00017321790312,0.0001742398404117,0.0001752675429259,0.0001763010398859,0.0001773403606222,0.0001783855345699,0.0001794365912716,0.0001804935603739,0.0001815564716301,0.0001826253548981,0.0001837002401408,0.0001847811574259,0.0001858681369256,0.0001869612089165,0.0001880604037784,0.0001891657519955,0.0001902772841549,0.0001913950309471,0.0001925190231647,0.0001936492917028,0.0001947858675591,0.0001959287818319,0.0001970780657217,0.0001982337505295,0.0001993958676562,0.000200564448604,0.0002017395249733,0.0002029211284646,0.000204109290877,0.0002053040441076,0.0002065054201513,0.0002077134511005,0.0002089281691444,0.0002101496065676,0.0002113777957514,0.0002126127691721,0.0002138545593997,0.000215103199099,0.0002163587210283,0.0002176211580382,0.0002188905430711,0.0002201669091617,0.0002214502894352,0.0002227407171065,0.00022403822548,0.0002253428479496,0.0002266546179965,0.0002279735691891,0.0002292997351824,0.0002306331497171,0.0002319738466188
])
        self.x_max = self.arr.max()
        self.x_min = self.arr.min()
        self.x_mean = self.arr.mean()
        self.x_std = self.arr.std()

    def Min_MaxNorm(self):
        arr = np.around(((self.arr - self.x_min) / (self.x_max - self.x_min)), decimals=4)
        print("Min_Max标准化:{}".format(arr))

    def Z_ScoreNorm(self):
        arr = np.around((self.arr - self.x_mean) / self.x_std, decimals=4)
        print("Z_Score标准化:{}".format(arr))

    def Decimal_ScalingNorm(self):
        power = 1
        maxValue = self.x_max
        while maxValue / 10 >= 1.0:
            power += 1
            maxValue /= 10
        arr = np.around((self.arr / pow(10, power)), decimals=4)
        print(f"小数定标,power = {power},标准化:{arr}")

    def MeanNorm(self):
        first_arr = np.around((self.arr-self.x_mean) / (self.x_max - self.x_min), decimals=4)
        second_arr = np.around((self.arr - self.x_mean)/self.x_max, decimals=4)
        print("均值归一法：\n公式一:{}\n公式二:{}".format(first_arr, second_arr))

    def Vector(self):
        arr = np.around((self.arr/self.arr.sum()), decimals=4)
        print("向量归一法:{}".format(arr))

    def exponeential(self):

        first_arr = np.around(np.log10(self.arr) / np.log10(self.x_max), decimals=4)
        #second_arr = np.around(np.exp(self.arr)/sum(np.exp(self.arr)), decimals=4)
        #three_arr = np.around(1/(1+np.exp(-1*self.arr)), decimals=4)
        # print("lg函数:{}\nSoftmax函数:{}\nSigmoid函数:{}\n".format(first_arr,second_arr,three_arr))
        print("lg函数:{}\n".format(first_arr))


if __name__ == "__main__":

    a = Datanorm()
    a.Min_MaxNorm()
    a.Z_ScoreNorm()     # seems the best
    #a.Decimal_ScalingNorm()
    a.MeanNorm()
    #a.Vector()
    # a.exponeential()


Min_Max标准化:[0.     0.0012 0.0025 0.0037 0.0049 0.0062 0.0075 0.0087 0.01   0.0113
 0.0126 0.0139 0.0152 0.0165 0.0179 0.0192 0.0205 0.0219 0.0232 0.0246
 0.026  0.0274 0.0287 0.0301 0.0316 0.033  0.0344 0.0358 0.0373 0.0387
 0.0402 0.0416 0.0431 0.0446 0.0461 0.0476 0.0491 0.0506 0.0522 0.0537
 0.0553 0.0568 0.0584 0.06   0.0616 0.0631 0.0648 0.0664 0.068  0.0696
 0.0713 0.0729 0.0746 0.0763 0.078  0.0797 0.0814 0.0831 0.0848 0.0865
 0.0883 0.09   0.0918 0.0936 0.0954 0.0972 0.099  0.1008 0.1026 0.1045
 0.1063 0.1082 0.1101 0.112  0.1139 0.1158 0.1177 0.1196 0.1216 0.1235
 0.1255 0.1275 0.1295 0.1315 0.1335 0.1355 0.1375 0.1396 0.1417 0.1437
 0.1458 0.1479 0.15   0.1522 0.1543 0.1564 0.1586 0.1608 0.163  0.1652
 0.1674 0.1696 0.1719 0.1741 0.1764 0.1787 0.181  0.1833 0.1856 0.1879
 0.1903 0.1927 0.195  0.1974 0.1998 0.2023 0.2047 0.2072 0.2096 0.2121
 0.2146 0.2171 0.2196 0.2222 0.2247 0.2273 0.2299 0.2325 0.2351 0.2377
 0.2404 0.243  0.2457 0.2484 0.2511 0.2538 0.2566 0.2593 0.2621 0.

# 5. bp hook

In [6]:
import torch  # torch version is 1.11.0
def save_grad(name):
    def hook(grad):
        print(f"name={name}, grad={grad}")
    return hook

def bp_hook(module, grad_input, grad_output):
    param_grad = list(module.parameters())[0].grad
    print(f'gradient of the module inside bp_hook: {param_grad}')

# net = torch.nn.Linear(4, 1, bias=False)
# # net.register_full_backward_hook(bp_hook)
# data = torch.ones(1, 4)
# # net.register_full_backward_hook(bp_hook)
# output = net(data)
# # net.register_full_backward_hook(bp_hook)
# net.register_hook(save_grad("net"))
# output.backward(retain_graph=True)
# # net.register_full_backward_hook(bp_hook)

# print(f'gradient of the module outside bp_hook: {list(net.parameters())[0].grad}')

In [11]:
alpha = torch.tensor([1.,2.,3.],requires_grad=True)
labda = torch.tensor([5.,6.,7.],requires_grad=True)
condtn = torch.tensor([1.,1.,0.])


def loss_fn(condtn,alpha,labda):
    if condtn>0:
        return torch.min(alpha*labda,torch.tensor([10.]))
        # 被torch.min截断的grad=0(常数的导数当然是0，得不到更新)
        # 因此没有必要计算？alpha*x的
    else:
        return alpha

loss = torch.tensor([0.],requires_grad=True)
for i in range(3):
    loss = loss_fn(condtn[i],alpha[i],labda[i]) + loss

alpha.register_hook(save_grad("alpha"))
labda.register_hook(save_grad("labda"))

loss.backward()


name=labda, grad=tensor([1., 0., 0.])
name=alpha, grad=tensor([5., 0., 1.])


# 6. del对于bp的影响

In [17]:
def custom_loss_fn(x, y):
        diff = x - y
        # mse = (diff * diff).mean()
        mse = diff
        # 设置中间变量，与x有关
        print(f"mse:{mse.detach()}")
        middle = x*3
        mse = middle + mse
        print(f"mse:{mse.detach()}")
        # 以下del都不影响
        del middle
        del diff
        del y
        del x
        return mse

x = torch.tensor([0.2, 0.6, 0.8], requires_grad=True)
y = torch.tensor([0.3, 0.7, 0.9], requires_grad=True)       # target data

loss = custom_loss_fn(x, y)
loss.backward(torch.ones_like(loss))        # torch.ones_like(loss)相当于sum
print(x.grad)

mse:tensor([-0.1000, -0.1000, -0.1000])
mse:tensor([0.5000, 1.7000, 2.3000])
tensor([4., 4., 4.])
