In [4]:
import tensorflow as tf
from tensorflow import keras
import pandas as pd
import numpy as np
from sklearn.impute import SimpleImputer

In [13]:
def load_data():
    train_set1 = pd.read_csv('training.csv', index_col='ID')
    train_set2 = pd.read_csv('additional_training.csv', index_col='ID')
    label_confidence = pd.read_csv('annotation_confidence.csv', index_col='ID').values
    source_set = pd.concat((train_set1, train_set2), axis=0)
    
    imputer = SimpleImputer(missing_values=np.nan, strategy='mean')
    X_source = imputer.fit_transform(source_set.iloc[:,:-1])
    y_source = source_set.iloc[:,-1].values.astype(np.float32)
    
    # Soften the label by using the label confidence
    for i, y in enumerate(y_source):
        y_source[i] = label_confidence[i] * y_source[i] + (1-label_confidence[i])*(1-y_source[i])

    # normalize the features
    train_mean = np.mean(X_source, axis=0)
    train_std = np.std(X_source, axis=0)
    X_source = (X_source - train_mean) / train_std
    X_source = tf.convert_to_tensor(X_source, dtype=tf.float32)
    y_source = tf.reshape(tf.convert_to_tensor(y_source, dtype=tf.float32),(-1,1))

    X_target = pd.read_csv('testing.csv', index_col='ID').values
    m = np.mean(X_target, axis=0)
    s = np.std(X_target, axis=0)
    X_target = (X_target - m) / s
    X_target = tf.convert_to_tensor(X_target, dtype=tf.float32)
    
    return X_source,y_source,X_target
X_source,y_source,X_target = load_data()

In [155]:
a = np.array([[1,2],[6,7],[6,7]])
b = np.array([[1,2],[6,7]])

In [166]:
batches_x = tf.data.Dataset.from_tensor_slices((X_source)).batch(64,drop_remainder=True)
batches_y = tf.data.Dataset.from_tensor_slices((y_source)).batch(64,drop_remainder=True)
batches_test = tf.data.Dataset.from_tensor_slices((X_target)).batch(64,drop_remainder=True)

iter_x = batches_x.as_numpy_iterator()
iter_y = batches_y.as_numpy_iterator()
iter_test = batches_test.as_numpy_iterator()

In [168]:
iter_x.shape

AttributeError: '_NumpyIterator' object has no attribute 'shape'

In [145]:
iter_train = tf.data.Dataset.from_tensor_slices((X_source,y_source)).batch(64)

In [47]:
num_batches_train = int((len(X_source)/64)+0.5)
num_batches_test = int((len(X_target)/64)+0.5)

In [169]:
class ADDA():
    from functools import partial
    def __init__(self, betas = (0.5, 0.999), lr1 = 1e-4, epochs = 10,
                 lr2 = 2e-4,dropout=0.2, slope=0.1):
        #variables 
        self.es = 'es'
        self.et = 'et'
        self.c = 'c'
        self.d = 'd'
        self.lr1 = lr1
        self.lr2 = lr2
        self.betas = betas
        self.epochs = epochs
        self.dropout = dropout
        self.slope = slope
        
    class encoder():
        from functools import partial
        def __init__(self, name, slope = 0.2, dropout = 0.1):
            self.name = name
            self.slope = slope
            self.dropout = dropout
            
        def encode(self,inputs,trainable = True):
            from functools import partial
            with tf.compat.v1.variable_scope(self.name,reuse=tf.AUTO_REUSE):
                flat = tf.compat.v1.layers.flatten(inputs,name = 'flat')
                dp1 = tf.compat.v1.layers.dropout(flat,self.dropout,name = 'dp1')
                
                dense1 = tf.compat.v1.layers.dense(dp1,1024,activation=partial(tf.nn.leaky_relu, alpha=self.slope),
                                            trainable = trainable,name = 'dense1')
                
                dp2 = tf.compat.v1.layers.dropout(dense1,dropout,name = 'dp2')
                
                output = tf.compat.v1.layers.dense(dp2,512,trainable = trainable,
                                            activation = tf.nn.leaky_relu(self.slope),name = 'output')
                return output
            
    def discrimnator(self,inputs,reuse = False, trainable = True):
        with tf.compat.v1.variable_scope(self.d,reuse = tf.AUTO_REUSE):
            dense1 = tf.compat.v1.layers.dense(inputs,256,trainable = trainable,activation = tf.nn.leaky_relu(0.2),name = 'dense1')
            dense2 = tf.compat.v1.layers.dense(dense1,128,trainable = trainable,activation = tf.nn.leaky_relu(0.2),name = 'dense2')
            dense3 = tf.compat.v1.layers.dense(dense2,64,trainable = trainable,activation = tf.nn.leaky_relu(0.2),name = 'dense3')
            output = tf.compat.v1.layers.dense(dense3,1,trainable = trainable,activation = tf.nn.sigmoid,name = 'output')
            return output
        
    def classifier(self,inputs, trainable = True):
        with tf.compat.v1.variable_scope(self.c,reuse = tf.AUTO_REUSE):
            dense1 = tf.compat.v1.layers.dense(inputs,256,activation = tf.nn.leaky_relu(0.2),name = 'dense1')
            dense2 = tf.compat.v1.layers.dense(dense1,64,activation = tf.nn.leaky_relu(0.2),name = 'dense1')
            output = tf.compat.v1.layers.dense(dense2,1,activation = tf.nn.sigmoid,name = 'output')
            return output
        
    def build_encoder_s(self):
        return self.encoder(self.es,self.slope,self.dropout)
    
    def build_encoder_t(self):
        return self.encoder(self.et,self.slope,self.dropout)
        
    def classify(self,iter_x,iter_y,num_batches):
        # create encoder of source domian and classifier for source domain
        encoder_es = self.build_encoder_s().encode(iter_x)
        cls_s = self.cliassifier(encoder_es)
        
        # build loss and optimizer
        classification_loss = tf.keras.losses.BinaryCrossentropy(cls_s,iter_y)
        opt_c = tf.keras.optimizers.Adam(lr = self.lr2, beta_1 = self.betas[0],
                                               beta_2 = self.betas[1]).minimize(classification_loss)
        
        # start a session
        config = tf.ConfigProto()
        config.gpu_options.allow_growth = True 
        with tf.Session(config=config) as sess:
            sess.run(tf.global_variables_initializer())
            for i in range(self.epochs):
                for k in range(num_batches):
                    _,loss,acc = sess.run([opt_c,classification_loss])
                    
    def discriminate(self,iter_x,iter_test,batch_size,num_batches):
        # create encoder of target domian and discrimnator for target domain and source domain
        encoder_et = self.build_encoder_t().encode(iter_test)
        encoder_es = self.build_encoder_s().encode(iter_x,reuse = True, trainable = False)
        dis_t = self.discrimnator(encoder_et)
        dis_s = self.discrimnator(encoder_es)
        
        # domain label 1 for source, 0 for target
        valid = tf.ones_like(dis_t)
        fake = tf.zeros_like(dis_s)
        
        target_loss = tf.keras.losses.BinaryCrossentropy(dis_t,valid)
        discrimination_loss = tf.keras.losses.BinaryCrossentropy(dis_t,fake) + \
                                tf.keras.losses.BinaryCrossentropy(dis_s,valid)
        
        opt_et = tf.keras.optimizers.Adam(lr = self.lr2, beta_1 = self.betas[0],
                                               beta_2 = self.betas[1]).minimize(target_loss,var_list = self.et)
        opt_d = tf.keras.optimizers.Adam(lr = self.lr1, beta_1 = self.betas[0],beta_2 = 
                                              self.betas[1]).minimize(discrimination_loss,var_list = self.d)
        
        # start a session
        config = tf.ConfigProto()
        config.gpu_options.allow_growth = True 
        with tf.Session(config=config) as sess:
            sess.run(tf.global_variables_initializer())
            for i in range(self.epochs):
                for k in range(num_batches):
                    if k > 20:
                        break
                    _,loss_t,acc_t = sess.run([opt_et,target_loss])
                    _,loss_d,acc_d = sess.run([opt_d,discrimination_loss])
                    
    def predict(self,iter_test,num_batches):
        encoder_et = self.build_encoder_t().encode(iter_test,reuse = True, trainable = False)
        classifier_t = self.cliassifier(encoder_es, reuse = True, trainable = False)
        predictions = []
        # start a session
        config = tf.ConfigProto()
        config.gpu_options.allow_growth = True 
        with tf.Session(config=config) as sess:
            sess.run(tf.global_variables_initializer())
            for k in range(num_batches):
                result = sess.run([classifier_t])
                predictions = np.r_[predictions,result]
                
        predictions = (predictions>0.5).astype(np.int32)
        return predictions
        

In [170]:
adda = ADDA()

adda.classify(iter_x,iter_y,num_batches_train)
adda.discriminate(iter_x,iter_test,64,num_batches_test)
predictions = adda.predict()

predictions = list(zip(1,range(predictions.shape[0]+1),predictions))
predictions = pd.DataFrame(data=predictions, columns=['ID', 'prediction'])
predictions.to_csv("submission_ADDA.csv",index=False, header=True)        

AttributeError: module 'tensorflow' has no attribute 'AUTO_REUSE'

In [153]:
flat = tf.keras.layers.Flatten(name = 'flat')(a)

In [158]:
flat

<tf.Tensor: shape=(3, 2), dtype=int64, numpy=
array([[1, 2],
       [6, 7],
       [6, 7]])>

In [156]:
flat = tf.compat.v1.layers.flatten(a,name = 'flat')

In [157]:
flat.shape

TensorShape([3, 2])

In [143]:
import dataset 

ModuleNotFoundError: No module named 'dataset'

In [None]:
data_func = dataset.get_dataset_v2(source)
x_tr,y_tr,x_te,y_te,tr_size,te_size,te_init = data_func(batch_size,training_size,testing_size)