In this code we create functions to generate matrix with a specific structure. We take inspiration from paper "Analyzing Multi-Head Self-Attention: Specialized Heads Do the Heavy Lifting, the Rest Can Be Pruned" (https://arxiv.org/abs/1905.09418).


In [1]:
import numpy as np
import scipy


In [78]:
def creatediagonal(d): #d is the vector dimension of the matrix
  vect=np.random.rand(d)
  return (np.diag(vect))


def createsparse(d,perc=0.1): #percentage of sparsity
  sparse_matrix=np.random.rand(d[0], d[1])
  random_mask = np.random.rand(d[0], d[1])
  sparse_matrix[random_mask>perc]=0
  return

#here we create a matrix that for which we attend specific other input randomly.
#For every column we have few big (>0.5) elements and the others are zero.

def structured(d,perc=0.1,sup=0.9): #variable sup tells us how much non zero elements we want since we use rand that is a uniform distribution
  structured_matrix = np.zeros(d)
  for j in range(d[1]):  # Iterate over columns
      random_mask = np.random.rand(d[0])  # Create a mask for each column vector
      random_mask[random_mask < sup] = 0
      random_mask[random_mask > sup] = np.random.rand(0.5,1,size=np.sum(random_mask >sup))
      #Alternative line
      #random_mask[random_mask > sup] - 0.4 * np.random.rand(np.sum(random_mask > sup)) #so that I have elements between 0.5 and 1
      structured_matrix[:, j] = random_mask
  return structured_matrix


#create a banded matrix

def context(d,w): #here w stands for window size.
  context_matrix=np.zeros((d[0],d[1]))
  for i in range(d[0]):
    for j in range(max(0,i-w),min(i+w+1,d[1])):
      context_matrix[i,j]=np.random.uniform(0.5, 1) #we can set other parameters
  return context_matrix

def addnoise(M,noise=0.1):
  n=M.shape[0]
  m=M.shape[1]
  noise_matrix=np.random.randn(n,m)*M.max()*noise
  M_with_noise=M+noise_matrix
  M_with_noise[M_with_noise > 1] = 1
  M_with_noise[M_with_noise < 0] = 0
  return M_with_noise

def add_sparsity(M, perc=0.1):
  sparsity_mask = np.random.rand(*M.shape) < perc  #I create a boolean mask
  M[sparsity_mask] = np.random.uniform(0.5, 1, size=np.count_nonzero(sparsity_mask))
  return M

def random_matrix(d):
  return np.random.rand(d[0],d[1])




In [81]:
A=context([8,8],2)
np.set_printoptions(precision=2)
display(A)

array([[0.95, 0.99, 0.56, 0.  , 0.  , 0.  , 0.  , 0.  ],
       [0.69, 0.86, 0.78, 0.64, 0.  , 0.  , 0.  , 0.  ],
       [0.88, 0.95, 0.9 , 0.63, 0.6 , 0.  , 0.  , 0.  ],
       [0.  , 0.53, 0.79, 0.92, 0.56, 0.77, 0.  , 0.  ],
       [0.  , 0.  , 0.85, 0.92, 0.89, 0.51, 0.74, 0.  ],
       [0.  , 0.  , 0.  , 0.66, 0.66, 0.51, 0.71, 0.7 ],
       [0.  , 0.  , 0.  , 0.  , 0.69, 0.81, 0.65, 0.82],
       [0.  , 0.  , 0.  , 0.  , 0.  , 0.53, 0.53, 0.57]])

In [83]:
B=add_sparsity(A)
print(B)

[[0.95 0.99 0.95 0.55 0.86 0.   0.   0.  ]
 [0.69 0.86 0.78 0.64 0.   0.   0.   0.  ]
 [0.88 0.95 0.92 0.58 0.6  0.   0.   0.  ]
 [0.88 0.53 0.79 0.92 0.56 0.77 0.   0.  ]
 [0.   0.   0.85 0.69 0.75 0.51 0.74 0.73]
 [0.   0.   0.64 0.66 0.66 0.51 0.9  0.7 ]
 [0.   0.94 0.   0.   0.69 0.81 0.65 0.55]
 [0.   0.   0.   0.   0.   0.53 0.53 0.83]]


In [85]:
C=addnoise(B)
print(C)

[[0.94 0.82 0.89 0.63 0.93 0.08 0.   0.15]
 [0.52 0.73 0.75 0.64 0.   0.04 0.   0.2 ]
 [0.82 0.99 1.   0.65 0.57 0.02 0.   0.  ]
 [0.92 0.28 0.92 0.9  0.63 0.75 0.   0.05]
 [0.14 0.   1.   0.66 0.65 0.46 0.82 0.51]
 [0.02 0.   0.56 0.61 0.6  0.61 0.89 0.73]
 [0.   1.   0.   0.11 0.79 0.69 0.57 0.56]
 [0.   0.02 0.1  0.   0.   0.31 0.55 0.73]]
