In [1]:
import tensorflow as tf
import tensorflow_datasets as tfds
import numpy as np

In [2]:
train, test = tfds.load('genomics_ood', split = ["train[:1000]","test[:1000]"], as_supervised = True)

In [3]:
vocab = {"A":0, "C":1,"G":2,"T":3}

In [4]:


def preprocessing(dataset):
  dataset = [tensor for tensor in dataset]

  x = np.array([[vocab[chr(i)] for i in tensor[0].numpy()] for tensor in dataset])

  y = np.array([tensor[1].numpy() for tensor in dataset],dtype="uint8")
  y = tf.one_hot(y,10, dtype= "uint8")

  x = tf.one_hot(x,4, dtype = "uint8")
  x = tf.reshape(x,(len(dataset), 250*4))

  # get labels as np.array

  dataset = tf.data.Dataset.from_tensor_slices((x, y))
  dataset = dataset.prefetch(1)
  dataset = dataset.batch(1000)
  dataset = dataset.shuffle(buffer_size=128)

  return dataset

In [6]:
train_dataset = preprocessing(train)
test_dataset = preprocessing(test)

for i, d in train_dataset:
    print(i, d)

tf.Tensor(
[[1 0 0 ... 0 0 1]
 [0 1 0 ... 0 0 0]
 [0 1 0 ... 0 0 0]
 ...
 [0 0 0 ... 0 1 0]
 [0 0 0 ... 0 1 0]
 [1 0 0 ... 0 1 0]], shape=(1000, 1000), dtype=uint8) tf.Tensor(
[[0 0 0 ... 1 0 0]
 [0 0 0 ... 0 0 0]
 [0 1 0 ... 0 0 0]
 ...
 [0 0 0 ... 0 0 0]
 [0 0 0 ... 0 0 0]
 [0 0 0 ... 1 0 0]], shape=(1000, 10), dtype=uint8)


In [8]:
a = 10%10
print(a)

0


In [None]:
from tensorflow.keras import Model
from tensorflow.keras.layers import Layer

class Model(Model): 
    
    def __init__(self):
        super(Model, self).__init__()
        # Define the hidden layer
        self.hidden_layer_1 = tf.keras.layers.Dense(units=256,
                                               activation=tf.keras.activations.sigmoid
                                               )
        self.hidden_layer_2 = tf.keras.layers.Dense(units=256,
                                               activation=tf.keras.activations.sigmoid
                                               )                              
        self.output_layer = tf.keras.layers.Dense(units=10, activation = tf.keras.activations.softmax, use_bias=False)
                                               
       
    def call(self, x):
        # Define the forward step.
        x = self.hidden_layer_1(x)
        x = self.hidden_layer_2(x)
        x = self.output_layer(x)
        return x

In [None]:
def train_step(model, input, target, loss_function, optimizer):
    # loss_object and optimizer_object are instances of respective tensorflow classes
    with tf.GradientTape() as tape:
        prediction = model(input)
        loss = loss_function(target, prediction)
        gradients = tape.gradient(loss, model.trainable_variables)
    optimizer.apply_gradients(zip(gradients, model.trainable_variables))
    return loss 

def test(model, test_data, loss_function):
    # test over complete test data

    test_loss_aggregator = []

    for (input, target) in test_data:
        prediction = model(input)
        sample_test_loss = loss_function(target, prediction)
        test_loss_aggregator.append(sample_test_loss.numpy())

    test_loss = np.mean(test_loss_aggregator)

    return test_loss


In [None]:
tf.keras.backend.clear_session()

### Hyperparameters
num_epochs = 50
learning_rate = 0.01
running_average_factor = 0.95

# Initialize the model.
model = Model()
# Initialize the loss: categorical cross entropy. Check out 'tf.keras.losses'.
cce = tf.keras.losses.CategoricalCrossentropy()
# Initialize the optimizer: Adam with default parameters. Check out 'tf.keras.optimizers'
optimizer = tf.keras.optimizers.SGD(learning_rate)

# Initialize lists for later visualization.
train_losses = []

test_losses = []

#testing once before we begin
test_loss = test(model, test_dataset, cce)
test_losses.append(test_loss)


#check how model performs on train data once before we begin
train_loss = test(model, train_dataset, cce)
train_losses.append(train_loss)

# We train for num_epochs epochs.
for epoch in range(num_epochs):
    print('Epoch: __ ' + str(epoch))

    train_dataset = train_dataset.shuffle(buffer_size=128)
    test_dataset = test_dataset.shuffle(buffer_size=128)

    #training (and checking in with training)
    running_average = 0
    for (input,target) in train_dataset:
        train_loss = train_step(model, input, target, cce, optimizer)
        running_average = running_average_factor * running_average  + (1 - running_average_factor) * train_loss
    train_losses.append(running_average)

    #testing
    test_loss = test(model, test_dataset, cce)
    test_losses.append(test_loss)

In [None]:
# Visualize accuracy and loss for training and test data. 
# One plot training and test loss.
# One plot training and test accuracy.
import matplotlib.pyplot as plt
plt.figure()
line1, = plt.plot(train_losses)
line2, = plt.plot(test_losses)
plt.xlabel("Training steps")
plt.ylabel("Loss")
plt.legend((line1,line2),("training","test"))
plt.show()

In [None]:
train_dataset