In [1]:
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
from sklearn.preprocessing import StandardScaler
import numpy as np
import matplotlib.pyplot as plt

In [2]:
times   = np.loadtxt('../data/train_test/times.csv', delimiter=',')
fluxes  = np.loadtxt('../data/train_test/fluxes.csv', delimiter=',')
targets = np.loadtxt('../data/train_test/targets.csv', delimiter=',').astype('uint8')

In [3]:
# scale and translate times so that they fall in the range [0,1]
times = (times - times[:, 0, np.newaxis]) / (times[:, -1, np.newaxis] - times[:, 0, np.newaxis])

In [4]:
# transform flux so that it has μ = 0, σ = 1
scaler = StandardScaler()
fluxes = scaler.fit_transform(fluxes)

In [5]:
x = np.stack([times,fluxes],axis=-1)
y = targets

# Implement transformer

##### _AUTOBOTS, ROLL OUT_

In [6]:
import torch
import torch.nn as nn
import torch.nn.functional as F

In [7]:
# set up our transformer. d_model=E (see below)
transformer_model = nn.Transformer(d_model=2, nhead=1, num_encoder_layers=12)

Inputs of nn.Transformer:
- d_model – the number of expected features in the encoder/decoder inputs (default=512).
- nhead – the number of heads in the multiheadattention models (default=8).
- num_encoder_layers – the number of sub-encoder-layers in the encoder (default=6).
- num_decoder_layers – the number of sub-decoder-layers in the decoder (default=6).
- dim_feedforward – the dimension of the feedforward network model (default=2048).
- dropout – the dropout value (default=0.1).
- activation – the activation function of encoder/decoder intermediate layer, relu or gelu (default=relu).
- custom_encoder – custom encoder (default=None).
- custom_decoder – custom decoder (default=None).

The transformer model expects an input shape of (S, N, E) where <br>
>S: Source sequence length (ie number of datapoints in the lightcurve) <br>
>N: batch size <br>
>E: number of features in our dataset. I think this should be 2 (flux and time) <br>

The output shape is (T, N, E) where <br>
>T: Target output shape. In our case this should be 1 (single number corresponding to probability of presence of planet)

___

Here I make a sample of input data of two identical lightcurves with 4070 datapoints each. Use torch.tensor so that it can be accepted by the network (analagous to np.array)

array([[[1, 0],
        [0, 0],
        [1, 1],
        ...,
        [0, 1],
        [1, 1],
        [1, 0]]], dtype=uint8)

In [45]:
x.shape

(931, 1624, 2)

In [83]:
src = torch.tensor(x.transpose((1,0,2)))
tgt = torch.tensor(np.stack([y,y]).reshape(1, 931, 2))

In [24]:
# For some reason the number of features in input and output has to be the same. So I have to stack target on itself so there are 
# two identical columns. Weird. If you odnt do this transformer_model isn't happy.

In [86]:
# check the shape is ok
print(src.shape)
print(tgt.shape)

torch.Size([1624, 931, 2])
torch.Size([1, 931, 2])


In [89]:
out = transformer_model(src.float(),tgt.float())
# need to put .float() otherwise it kicks up an errror - weird
# this takes awhile to run and sometimes fails - I think this is an unusually large transformer model

KeyboardInterrupt: 

In [25]:
tgt

tensor([[[0.9572, 0.9044],
         [0.2659, 0.6652]]])

In [29]:
out.train

AttributeError: 'Tensor' object has no attribute 'train'