In [1]:
import tensorflow as tf
from tensorflow import keras
import numpy as np
from keras.constraints import maxnorm, nonneg, unit_norm
import gc
from argparse import ArgumentParser
from tensorflow.keras.optimizers import Adam
import matplotlib.pyplot as plt
from tensorflow.keras.layers import Input, Dense, LSTM, GRU, Conv1D, Activation, Lambda, Permute, Conv2D, Flatten
from tensorflow.keras.callbacks import ReduceLROnPlateau
from itertools import permutations

from math import comb
from itertools import combinations
from sklearn.linear_model import Ridge
import time
from numpy import linalg as LA
from sklearn.metrics import mean_squared_error
import os
os.environ["CUDA_VISIBLE_DEVICES"]="0"

2023-05-09 17:00:39.814408: I tensorflow/stream_executor/platform/default/dso_loader.cc:53] Successfully opened dynamic library libcudart.so.11.0


## Utils

In [2]:
def poly_Zk_Zn(x, indices):
    def inv1(a, b):
        return a * b ** 2

    unstacked_variables  = tf.unstack(x, axis=1)
    unstacked_variables = tf.gather(unstacked_variables, indices)

    q1 = 0
    for i in np.arange(len(indices)-1):
        q1 += inv1(unstacked_variables[i], unstacked_variables[i+1])
    q1 += inv1(unstacked_variables[len(indices)-1], unstacked_variables[0])        
    return q1

def apply_layers(x, layers):
    for l in layers:
        x = l(x)
    return x

def sigmaPi(fin, m, n, p):
    fin = tf.transpose(fin, (0, 2, 1, 3))
    fin = fin[:, :, tf.newaxis]
    fin = tf.tile(fin, (1, 1, m, 1, 1))
    y = fin @ p
    y = tf.linalg.diag_part(y)
    y = tf.reduce_prod(y, axis=3)
    y = tf.reduce_sum(y, axis=2)
    return y

def prepare_permutation_matices(perm, n, m):
    p1 = np.eye(n, dtype=np.float32)
    p = np.tile(p1[np.newaxis], (m, 1, 1))
    for i, x in enumerate(perm):
        p[i, x, :] = p1[np.arange(n)]
    return p 
            
def get_matrix(d):
    I = np.eye(d)
    M1 = np.vstack([I]*((d-1)))
    P = np.roll(I,1,axis=-1)
    M2 = P@I
    P_ = P
    for i in range(1,d-1):
        P1 = P_@P
        M2 = np.vstack((M2,P1@I))
        P_ = P1
    return M1,M2

## Model

In [3]:
class GroupInvariance(tf.keras.Model):
    def __init__(self, perm, num_features):
        super(GroupInvariance, self).__init__()
        activation=tf.keras.activations.tanh

        self.num_features = num_features
        self.n = len(perm[0])
        self.m = len(perm)
        self.p = prepare_permutation_matices(perm, self.n, self.m)

        self.features = [
            tf.keras.layers.Dense(16, activation),
            tf.keras.layers.Dense(64, activation),
            tf.keras.layers.Dense(self.n * self.num_features, tf.keras.activations.sigmoid),
            #tf.keras.layers.Dense(self.n * self.num_features, None),
        ]

        self.fc = [
            #tf.keras.layers.Dense(32, tf.keras.activations.tanh),
            tf.keras.layers.Dense(32, tf.keras.activations.relu, use_bias=False),
            tf.keras.layers.Dense(1),
        ]

    def call(self, inputs):
        x = inputs[:, :, tf.newaxis]
        x = apply_layers(x, self.features)
        x = tf.reshape(x, (-1, self.n, self.num_features, self.n))
        x = sigmaPi(x, self.m, self.n, self.p)
        x = apply_layers(x, self.fc)
        return x

## Data utils

In [4]:
def get_data(x, train_indices):
    P = np.zeros((d, d))
    p_indices = np.roll(train_indices, 1)
    j = 0
    for i in range(d):
        if i in train_indices:
            P[i][p_indices[j]] = 1
            j += 1
        else:
            P[i][i] = 1

    c_train_ds = []
    P1 = P
    for i in range(len(train_indices)-1):
        c_train_ds.append(np.dot(P1, x))
        P1 = P@P1

    d_train_ds = []
    P1 = np.eye(d)
    id0 = train_indices[0]
    id1 = train_indices[1]
    P1[[id0,id1]] = P1[[id1,id0]]

    for i in range(len(train_indices)-1):
        d_train_ds.append(np.dot(P1, x))
        P1 = P@P1        

    p_indices = np.array(list(permutations(train_indices)))
    random_indices = list(np.random.choice(len(p_indices), size=2*k, replace=False))
    new_indices = p_indices[random_indices]

    p_train_ds = []
    for i in range(2*k):
        y = x.copy()
        y[train_indices] = y[new_indices[i]]
        p_train_ds.append(y)

    return np.array(c_train_ds), np.array(d_train_ds), np.array(p_train_ds)


def get_data_v1(x, train_indices):
    P = np.zeros((d, d))
    p_indices = np.roll(train_indices, 1)
    j = 0
    for i in range(d):
        if i in train_indices:
            P[i][p_indices[j]] = 1
            j += 1
        else:
            P[i][i] = 1

    
    P = P.T
    c_train_ds = [np.dot(x, P)]
    for i in range(len(train_indices)-2):
        c_train_ds.append(np.dot(c_train_ds[-1], P))

    
    P1 = np.eye(d)
    id0 = train_indices[0]
    id1 = train_indices[1]
    P1[[id0,id1]] = P1[[id1,id0]]
    d_train_ds = [np.dot(x, P1)]
    for i in range(len(train_indices)-1):
        d_train_ds.append(np.dot(d_train_ds[-1], P))      

    p_indices = np.array(list(permutations(train_indices)))
    random_indices = list(np.random.choice(len(p_indices), size=2*k, replace=False))
    new_indices = p_indices[random_indices]

    p_train_ds = []
    for i in range(2*k):
        y = x.copy()
        y[:,train_indices] = y[:,new_indices[i]]
        p_train_ds.append(y)

    return np.concatenate(c_train_ds, axis=0), np.concatenate(d_train_ds, axis=0), np.concatenate(p_train_ds, axis=0)


def create_data(d, k, batch_size, true_indices, aux = True):
    train_ds = np.random.rand(ts*batch_size, d)
    val_ds = np.random.rand(vs*batch_size, d)
    indices = np.array(true_indices).astype(np.int64)

    # Additional data
    if aux == True:
        c_train_ds, d_train_ds, p_train_ds = get_data_v1(train_ds[0:5], true_indices)
        train_ds = np.vstack((train_ds, c_train_ds, d_train_ds, p_train_ds))

        c_val_ds, d_val_ds, p_val_ds = get_data_v1(val_ds[0:5], true_indices)
        val_ds = np.vstack((val_ds, c_val_ds, d_val_ds, p_val_ds))
 
    train_y = poly_Zk_Zn(train_ds, indices).numpy()
    val_y = poly_Zk_Zn(val_ds, indices).numpy()
    print("Shape info:", [train_ds.shape, train_y.shape, val_ds.shape, val_y.shape])
    return train_ds, train_y, val_ds, val_y


## Create data

In [5]:
np.random.seed(1024)
ts = 64
vs = 480
d = 10
divisors = [1, 2, 5, 10]
k = 5   #args.k

true_indices = np.sort(np.random.choice(list(range(0, d)), k, replace=False)).tolist()
train_ds, train_y, val_ds, val_y = create_data(d, k, 1, true_indices,True)
print('k:',k)
print('True Indices:',true_indices)

2023-05-09 17:01:04.157673: I tensorflow/stream_executor/platform/default/dso_loader.cc:53] Successfully opened dynamic library libcuda.so.1
2023-05-09 17:01:04.198753: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1733] Found device 0 with properties: 
pciBusID: 0000:17:00.0 name: RTX A6000 computeCapability: 8.6
coreClock: 1.8GHz coreCount: 84 deviceMemorySize: 47.54GiB deviceMemoryBandwidth: 715.34GiB/s
2023-05-09 17:01:04.198808: I tensorflow/stream_executor/platform/default/dso_loader.cc:53] Successfully opened dynamic library libcudart.so.11.0
2023-05-09 17:01:04.203116: I tensorflow/stream_executor/platform/default/dso_loader.cc:53] Successfully opened dynamic library libcublas.so.11
2023-05-09 17:01:04.203206: I tensorflow/stream_executor/platform/default/dso_loader.cc:53] Successfully opened dynamic library libcublasLt.so.11
2023-05-09 17:01:04.204565: I tensorflow/stream_executor/platform/default/dso_loader.cc:53] Successfully opened dynamic library libcufft.so.10
2023-0

Shape info: [(159, 10), (159,), (575, 10), (575,)]
k: 5
True Indices: [0, 2, 3, 6, 7]


## Regularizers

In [6]:
def my_regularizer(x):
    #x = tf.abs(x) + 1e-8
    x = x/(tf.reduce_sum(x, axis=0))
    entropy = tf.reduce_mean(tf.reduce_sum(-x*tf.math.log(x), axis=0))
    return 1e-5 * entropy

lambda_val = 1e-02
l2_reg = tf.keras.regularizers.l2(1e-5)

## Model

In [7]:
class CyclicGroupInvarianceDiscover(tf.keras.Model):
    def __init__(self, d):
        super(CyclicGroupInvarianceDiscover, self).__init__()
        activation = tf.keras.activations.tanh
        activation2 = tf.keras.activations.tanh

        self.d = d

        self.linear1 = Dense(d,activation=None)
        self.inputs1 = Dense(d * (d-1),use_bias=True,activation=None)
        self.inputs2 = Dense(d * (d-1),use_bias=True,activation=None)
        self.linear2 = Dense(d * (d-1),activation=None)

        self.features = [
            tf.keras.layers.Dense(16, activation, kernel_regularizer=l2_reg),
            tf.keras.layers.Dense(32, activation, kernel_regularizer=l2_reg),
            tf.keras.layers.Dense(32, activation, kernel_regularizer=l2_reg),
        ]

        self.Add = tf.keras.layers.Lambda(lambda x: tf.reduce_sum(x, axis=1), output_shape=(lambda shape: (shape[0], shape[2])))
        self.fc = [
            tf.keras.layers.Dense(32, activation2,  kernel_regularizer=l2_reg),
            tf.keras.layers.Dense(32, activation2,  kernel_regularizer=l2_reg),
            #tf.keras.layers.Dense(32, activation2,  kernel_regularizer=l2_reg),
            #tf.keras.layers.Dense(32, activation2,  kernel_regularizer=l2_reg),    
            tf.keras.layers.Dense(1, kernel_regularizer=l2_reg),
        ]
        
    def call(self, inputs):
        inputs = self.linear1(inputs)                   # (B, d)
        in1 = self.inputs1(inputs)                      # (B, d*(d-1))
        in1 = self.linear2(in1)[:, :, tf.newaxis]       # (B, d*(d-1), 1)
        in2 = self.inputs2(inputs)                      # (B, d*(d-1))
        in2 = self.linear2(in2)[:, :, tf.newaxis]       # (B, d*(d-1), 1)

        #print("in1\n:", in1)
        #print("in2\n:", in2)
        x = tf.concat((in1,in2), axis=-1)               # (B, d*(d-1), 2)             
        x = apply_layers(x, self.features)              # (B, d*(d-1), 128) 
        x = self.Add(x)                                 # (B,  128) 
        x = apply_layers(x, self.fc)                    # (B,  1)   
        return x               



In [None]:
class CyclicGroupInvarianceDiscoverPart(tf.keras.Model):
    def __init__(self, d):
        super(CyclicGroupInvarianceDiscoverPart, self).__init__()
        activation = tf.keras.activations.tanh
        self.d = d

        self.linear1 = Dense(d,activation=None)
        self.inputs1 = Dense(d * (d-1),use_bias=True,activation=None)
        self.inputs2 = Dense(d * (d-1),use_bias=True,activation=None)
        self.linear2 = Dense(d * (d-1),activation=None)

        self.features = [
            tf.keras.layers.Dense(16, activation, kernel_regularizer=l2_reg),
            tf.keras.layers.Dense(64, activation, kernel_regularizer=l2_reg),
            tf.keras.layers.Dense(128, activation, kernel_regularizer=l2_reg),
        ]

        self.Add = tf.keras.layers.Lambda(lambda x: tf.reduce_sum(x, axis=1), output_shape=(lambda shape: (shape[0], shape[2])))
        self.fc = [
            tf.keras.layers.Dense(64, tf.keras.activations.tanh,  kernel_regularizer=l2_reg),
            tf.keras.layers.Dense(64, tf.keras.activations.tanh,  kernel_regularizer=l2_reg),
            tf.keras.layers.Dense(32, tf.keras.activations.tanh,  kernel_regularizer=l2_reg),
            tf.keras.layers.Dense(32, tf.keras.activations.tanh,  kernel_regularizer=l2_reg),            
            tf.keras.layers.Dense(1, kernel_regularizer=l2_reg),
        ]
        
    def call(self, inputs):
        inputs = self.linear1(inputs)                   # (B, d)
        in1 = self.inputs1(inputs)                      # (B, d*(d-1))
        in1 = self.linear2(in1)[:, :, tf.newaxis]       # (B, d*(d-1), 1)
        in2 = self.inputs2(inputs)                      # (B, d*(d-1))
        in2 = self.linear2(in2)[:, :, tf.newaxis]       # (B, d*(d-1), 1)

        print("in1\n:", in1)
        print("in2\n:", in2)
        x = tf.concat((in1,in2), axis=-1)               # (B, d*(d-1), 2)             
        x = apply_layers(x, self.features)              # (B, d*(d-1), 128) 
        x = self.Add(x)                                 # (B,  128) 
        x = apply_layers(x, self.fc)                    # (B,  1)  
        return x

## Selection matrices

In [46]:
def new_matrix(k,d,indices,subgroup_indices):
    if subgroup_indices == 0:
        M3 = np.zeros((d,d))
        for row,index in zip(np.arange(k),indices):
            M3[row,index] = 1                                       #L1, Sk

        M4 = np.eye(d*(d-1),d*(d-1))                                #L2, Sk     

    elif subgroup_indices == 1 or 2:
        l = d//k
        I_k = np.zeros((k,d))
        for row,index in zip(np.arange(k),indices):
            I_k[row,index] = 1
        M3 = np.vstack((I_k,)*l)                                    #L1, D2k or Zk

        if subgroup_indices == 1:
            I4 = np.eye(d)
            M4 = np.vstack((I4,np.zeros(((d**2)-3*d,d)),I4))
            M4 = np.hstack((M4,np.zeros(((d*(d-1),(d**2)-2*d)))))   #L2, D2k

        elif subgroup_indices == 2:
            I4 = np.eye(d)
            M4 = np.vstack((I4,np.zeros(((d**2)-2*d,d))))
            M4 = np.hstack((M4,np.zeros((d*(d-1),(d**2)-2*d))))     #L2, Zk  

    return M3,M4

def new_matrix_all(k,d,indices,subgroup_indices):
    M3 = np.zeros((d,d))
    for row,index in zip(np.arange(k),indices):
        M3[row,index] = 1     

    if(k>1):
        if subgroup_indices == 0:                                
            M4 = np.eye(d*(d-1),d*(d-1))                                #L2, Sk     

        elif subgroup_indices == 1 or 2:    #D2k or Zk
            M4 = np.zeros((d*(d-1),d*(d-1)))
            M4[0:k-1,0:k-1] = np.eye(k-1)
            key_loc_Zk = ((d-k)*d) + k - 1
            M4[k-1, key_loc_Zk] = 1
            if subgroup_indices == 1:
                M4[k:(2*k)-1, -d+1:-d+k] = np.eye(k-1)
                key_loc_D2k = ((k-2)*d)
                M4[(2*k)-1, key_loc_D2k] = 1
    else:
        M4 =  np.zeros((d*(d-1),d*(d-1)))
        M4[0,0] = 1               

    return M3,M4


def generate_matrix(k_,subgroup_indices):
    #k_=int(np.random.choice(divisors,1,replace=False))
    train_indices=np.sort(np.random.choice(list(range(0,d)),k_,replace=False))
    #Generate matrix
    L1, L2 = new_matrix(len(train_indices),d,train_indices,subgroup_indices)
    #print('Matrix:',matrix)
    return L1, L2, train_indices

def generate_matrix_given_indices(train_indices,subgroup_indices):
    #Generate matrix
    L1, L2 = new_matrix(len(train_indices),d,train_indices,subgroup_indices)
    #print('Matrix:',matrix)
    return L1, L2   

def generate_matrix_all(k_,subgroup_indices):
    #k_=int(np.random.choice(divisors,1,replace=False))
    train_indices=np.sort(np.random.choice(list(range(0,d)),k_,replace=False))
    #Generate matrix
    L1, L2 = new_matrix_all(len(train_indices),d,train_indices,subgroup_indices)
    #print('Matrix:',matrix)
    return L1, L2, train_indices

def generate_matrix_given_indices_all(train_indices,subgroup_indices):
    #Generate matrix
    L1, L2 = new_matrix_all(len(train_indices),d,train_indices,subgroup_indices)
    #print('Matrix:',matrix)
    return L1, L2   

def get_loss(M5,M6,epochs=100, batch_size=4):
    Model_discover.set_weights(initial_weights)
    #sample_output=Model_discover(val_ds)
    bias0 = np.zeros(d)
    bias3 = np.zeros(d*(d-1))

    #Model_discover.layers[1].set_weights([M1_cc.T, bias_l1])
    #Model_discover.layers[2].set_weights([M2_cc.T, bias_l2])
    
    Model_discover.layers[0].set_weights([M5.T, bias0])
    Model_discover.layers[3].set_weights([M6.T, bias3])
    
    start=time.time()
    train_history=Model_discover.fit(train_ds,train_y, 
                        epochs=epochs,
                        batch_size=batch_size,
                        shuffle=True,
                        validation_data=(val_ds,val_y),
                        callbacks=[callback]) 
    end=time.time()
    print('Time:',end-start)
    
    return train_history

## Analyse model

In [None]:
try:
    del Model_discover_part 
    tf.keras.backend.clear_session()
    gc.collect() 
except:
    print("model hasn't been yet defined")

Model_discover_part = CyclicGroupInvarianceDiscoverPart(d)
M1, M2 = get_matrix(d)
adam = Adam(learning_rate=1e-3)
sample_output = Model_discover_part(val_ds)

Model_discover_part.layers[0].trainable = False                    # Linear1
Model_discover_part.layers[1].trainable = False
Model_discover_part.layers[2].trainable = False
Model_discover_part.layers[3].trainable = False                    # Linear 2

Model_discover_part.compile(optimizer=adam, loss='mae')
initial_weights=Model_discover_part.get_weights()
Model_discover_part.set_weights(initial_weights)

Model_discover_part.layers[1].set_weights([M1.T])
Model_discover_part.layers[2].set_weights([M2.T])
print(Model_discover_part.summary())


#True Indices: [0, 2, 3, 6, 7]
train_indices = np.array([0, 2, 3, 6, 7]).astype(np.int32)
M5, M6  = generate_matrix_given_indices_all(train_indices,2) 


bias1 = np.zeros(d)
bias2 = np.zeros(d*(d-1))
Model_discover_part.layers[0].set_weights([M5.T, bias1])
Model_discover_part.layers[3].set_weights([M6.T, bias2])

## Checkpoints

In [None]:
filepath = 'Pavan_May_7/saved_model.h5'
callback = tf.keras.callbacks.ModelCheckpoint(filepath,
                                                save_best_only=True,
                                                save_weights_only=True,)



## Create and build model

In [8]:
try:
    del Model_discover 
    tf.keras.backend.clear_session()
    gc.collect() 
except:
    print("model hasn't been yet defined")

Model_discover = CyclicGroupInvarianceDiscover(d)
M1_cc, M2_cc = get_matrix(d)
adam = Adam(learning_rate=1e-3)
sample_output = Model_discover(val_ds)

model hasn't been yet defined


2023-05-09 17:01:21.381445: I tensorflow/stream_executor/platform/default/dso_loader.cc:53] Successfully opened dynamic library libcublas.so.11
2023-05-09 17:01:22.084410: I tensorflow/stream_executor/platform/default/dso_loader.cc:53] Successfully opened dynamic library libcublasLt.so.11
2023-05-09 17:01:22.084468: I tensorflow/stream_executor/cuda/cuda_blas.cc:1838] TensorFloat-32 will be used for the matrix multiplication. This will only be logged once.


## Set trainable params

In [9]:
Model_discover.layers[0].trainable = False                    # Linear1
Model_discover.layers[1].trainable = False
Model_discover.layers[2].trainable = False
Model_discover.layers[3].trainable = False                    # Linear 2

Model_discover.compile(optimizer=adam, loss='mae')


bias_l1 = np.zeros(d*(d-1))
bias_l2 = np.zeros(d*(d-1))

Model_discover.layers[1].set_weights([M1_cc.T, bias_l1])
Model_discover.layers[2].set_weights([M2_cc.T, bias_l2])

initial_weights=Model_discover.get_weights()
Model_discover.set_weights(initial_weights)

print(Model_discover.summary())

Model: "cyclic_group_invariance_discover"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense (Dense)                multiple                  110       
_________________________________________________________________
dense_1 (Dense)              multiple                  990       
_________________________________________________________________
dense_2 (Dense)              multiple                  990       
_________________________________________________________________
dense_3 (Dense)              multiple                  8190      
_________________________________________________________________
dense_4 (Dense)              multiple                  48        
_________________________________________________________________
dense_5 (Dense)              multiple                  544       
_________________________________________________________________
dense_6 (Dense)              multi

## Contexts for bandit arms

In [None]:
C = [list(combinations(range(d), i)) for i in range(1, d+1)]
C = [list(item) for sublist in C for item in sublist]

At = np.zeros((len(C), d))
for i, idx in zip(range(len(C)), C):
    At[i][idx] = 1

# Normalization
#l2_norms_rows = LA.norm(At, 2, axis=1)[:, np.newaxis]
#At = At/l2_norms_rows
At = np.hstack((At, np.ones((At.shape[0], 1))))

C_d = [list(combinations(range(d), i)) for i in divisors]
C_d = [list(item) for sublist in C_d for item in sublist]

d_At = np.zeros((len(C_d), d))
for i, idx in zip(range(len(C_d)), C_d):
    d_At[i][idx] = 1

# Normalization
#l2_norms_rows = LA.norm(d_At, 2, axis=1)[:, np.newaxis]
#d_At = d_At/l2_norms_rows
d_At = np.hstack((d_At, np.ones((d_At.shape[0], 1))))

s_arms = np.hstack((At, np.array([[1, 0, 0]] * At.shape[0])))
d_arms = np.hstack((d_At, np.array([[0, 1, 0]] * d_At.shape[0])))
z_arms = np.hstack((d_At, np.array([[0, 0, 1]] * d_At.shape[0])))

At = np.vstack((s_arms, d_arms, z_arms))
print('At shape:',At.shape)


## TS params

In [None]:
#R = 0.01
R = 0.1
epsilon = 0.5
delta = 0.5
n_features = d+4
subgroup = {0:'Sk', 1:'D2k', 2:'Zk'}
B = np.eye(n_features)
B_inv = np.eye(n_features)
f = np.zeros((n_features, 1))
mu_hat = np.zeros((n_features, 1))
arm_iterations = 450
v = R * np.sqrt(24 / epsilon * n_features * np.log(1 / delta))
contexts = At
As = []
b = []

## Run Iterations

In [None]:
for i in np.arange(arm_iterations):
    print("Iteration:",i)
    k_=int(np.random.choice(divisors,1,replace=False))
    subgroup_indices = 2 

    L1, L2, train_indices = generate_matrix(k_,subgroup_indices) 
    contexts_t = np.zeros(10)
    contexts_t[train_indices] = 1
    train_history = get_loss(L1,L2)
    min_loss = np.min(train_history.history['val_loss'])
    reward = -min_loss

    As.append(contexts_t)
    b.append(min_loss)
    print([min_loss,train_indices])

    np.save('c_r_Zk_As.npy',np.array(As))
    np.save('c_r_Zk_b.npy',np.array(b))


## Del model

In [None]:
del Model_discover 
tf.keras.backend.clear_session()
gc.collect() 