-
Notifications
You must be signed in to change notification settings - Fork 1
/
min_norm_solvers.py
87 lines (78 loc) · 3.32 KB
/
min_norm_solvers.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
import numpy as np
import torch
class MinNormSolver:
MAX_ITER = 250
STOP_CRIT = 1e-5
def _min_norm_element_from2(v1v1, v1v2, v2v2):
"""
Analytical solution for min_{c} |cx_1 + (1-c)x_2|_2^2
d is the distance (objective) optimzed
v1v1 = <x1,x1>
v1v2 = <x1,x2>
v2v2 = <x2,x2>
"""
if v1v2 >= v1v1:
# Case: Fig 1, third column
gamma = 0.999
cost = v1v1
return gamma, cost
if v1v2 >= v2v2:
# Case: Fig 1, first column
gamma = 0.001
cost = v2v2
return gamma, cost
# Case: Fig 1, second column
gamma = -1.0 * ((v1v2 - v2v2) / (v1v1 + v2v2 - 2 * v1v2))
cost = v2v2 + gamma * (v1v2 - v2v2)
return gamma, cost
def _min_norm_2d(vecs, dps):
"""
Find the minimum norm solution as combination of two points
This is correct only in 2D
ie. min_c |\sum c_i x_i|_2^2 st. \sum c_i = 1 , 1 >= c_1 >= 0 for all i, c_i + c_j = 1.0 for some i, j
"""
dmin = 1e8
for i in range(len(vecs)):
for j in range(i + 1, len(vecs)):
if (i, j) not in dps:
dps[(i, j)] = 0.0
for k in range(len(vecs[i])):
dps[(i, j)] += torch.mul(vecs[i][k], vecs[j][k]).sum().data.cpu()
dps[(j, i)] = dps[(i, j)]
if (i, i) not in dps:
dps[(i, i)] = 0.0
for k in range(len(vecs[i])):
dps[(i, i)] += torch.mul(vecs[i][k], vecs[i][k]).sum().data.cpu()
if (j, j) not in dps:
dps[(j, j)] = 0.0
for k in range(len(vecs[i])):
dps[(j, j)] += torch.mul(vecs[j][k], vecs[j][k]).sum().data.cpu()
c, d = MinNormSolver._min_norm_element_from2(dps[(i, i)], dps[(i, j)], dps[(j, j)])
if d < dmin:
dmin = d
sol = [(i, j), c, d]
return sol, dps
def find_min_norm_element(vecs):
"""
Given a list of vectors (vecs), this method finds the minimum norm element in the convex hull
as min |u|_2 st. u = \sum c_i vecs[i] and \sum c_i = 1.
It is quite geometric, and the main idea is the fact that if d_{ij} = min |u|_2 st u = c x_i + (1-c) x_j; the solution lies in (0, d_{i,j})
Hence, we find the best 2-task solution, and then run the projected gradient descent until convergence
"""
# Solution lying at the combination of two points
dps = {}
init_sol, dps = MinNormSolver._min_norm_2d(vecs, dps)
n = len(vecs)
sol_vec = np.zeros(n)
sol_vec[init_sol[0][0]] = init_sol[1]
sol_vec[init_sol[0][1]] = 1 - init_sol[1]
if n < 3:
# This is optimal for n=2, so return the solution
return sol_vec, init_sol[2]
def gradient_normalizers(grads, losses, normalization_type):
# gradient normalization method supposed in MGDA
gn = []
if normalization_type == 'ours':
for t in range(len(grads)):
gn.append(np.sqrt(np.sum([gr.pow(2).sum().data.cpu() for gr in grads[t]])) / losses[t])
return gn