In [29]:
import pandas as pd
import numpy as np
import tensorflow as tf

### Importation des données

In [30]:
X_train = pd.read_csv('https://raw.githubusercontent.com/Tdjaaleb/QRT-Challenge-2022/main/Data/X_train.csv', index_col=0, sep=',')
X_train.columns.name = 'date'

Y_train = pd.read_csv('https://raw.githubusercontent.com/Tdjaaleb/QRT-Challenge-2022/main/Data/Y_train.csv', index_col=0, sep=',')
Y_train.columns.name = 'date'

### Preprocessing

In [31]:
X_train_reshape = pd.concat([ X_train.T.shift(i+1).stack(dropna=False) for i in range(250) ], 1).dropna()
X_train_reshape.columns = pd.Index(range(1,251), name='timeLag')
size = (25200, 250, 1)

ar = np.ndarray(size)
for i in range(25200):
    new = pd.DataFrame(X_train_reshape.iloc[i])
    ar[i]=new

X = ar.reshape((25200, 250))

size = (504,50)
ar2 = np.ndarray(size)
for i in range(504):
    new = Y_train[f"{i+250}"]
    ar2[i] = new

Y = ar2.reshape(25200,1)

  X_train_reshape = pd.concat([ X_train.T.shift(i+1).stack(dropna=False) for i in range(250) ], 1).dropna()


### Fonctions du challenge

In [32]:
########################################### CheckOrthonormality
def checkOrthonormality(A): 
    bool = True
    D, F = A.shape   
    Error = pd.DataFrame(A.T @ A - np.eye(F)).abs()
    
    if any(Error.unstack() > 1e-6):
        bool = False
     
    return bool

########################################### metric_train
def metric_train(A, beta): 
    
    if not checkOrthonormality(A):
        return -1.0    
    
    Ypred = (X_train_reshape @ A @ beta).unstack().T         
    Ytrue = Y_train
    
    Ytrue = Ytrue.div(np.sqrt((Ytrue**2).sum()), 1)    
    Ypred = Ypred.div(np.sqrt((Ypred**2).sum()), 1)

    meanOverlap = (Ytrue * Ypred).sum().mean()
    
    return  meanOverlap

########################################### parametersTransform
def parametersTransform(A, beta, D=250, F=10):
    
    if A.shape != (D, F):
        print('A has not the good shape')
        return
    
    if beta.shape[0] != F:
        print('beta has not the good shape')
        return        
    
    output = np.hstack( (np.hstack([A.T, beta.reshape((F, 1))])).T )
    
    return output

### Gram-Schmidt Algorithm

In [33]:
#https://en.wikipedia.org/wiki/Gram%E2%80%93Schmidt_process

def gramschmidt(V):
  n, k = V.shape
  U = np.zeros((n,k))
  U[:,0] = V[:,0]/np.linalg.norm(V[:,0])
  for i in range(1,k):
    U[:,i]=V[:,i]
    for j in range(0, i):
      U[:,i]=U[:,i]-(U[:,j].T @ U[:,i]) * U[:,j]
    if np.linalg.norm(U[:,i])!=0:
      U[:,i] = U[:,i]/np.linalg.norm(U[:,i])
    else:
      U[:,i]=0
  return(U)

### Contrainte d'orthonormalité

In [34]:
#https://gist.github.com/dswah/9cee80674e2deea941430ec298f3f99e

class Orthonormal(tf.keras.constraints.Constraint):
    """approximate Orthonormal weight constraint.
    Constrains the weights incident to each hidden unit
    to be approximately orthonormal
    
    # Arguments
        beta: the strength of the constraint
        
    # References
        https://arxiv.org/pdf/1710.04087.pdf
    """

    def __init__(self, beta=0.01):
        self.beta = beta

    def __call__(self, w):
        eye = tf.linalg.matmul(w, w, transpose_b=True)
        return (1 + self.beta) * w - self.beta * tf.linalg.matmul(eye, w)

    def get_config(self):
        return {'beta': self.beta}

### Création du modèle

In [35]:
def create_model():
  model = tf.keras.Sequential()
  model.add(tf.keras.layers.Dense(10, 
                                  activation='linear',
                                  kernel_initializer=tf.keras.initializers.Orthogonal(),
                                  kernel_constraint=Orthonormal(),
                                  use_bias=False))
  model.add(tf.keras.layers.Dense(1, activation='linear', use_bias=False))
  return model

### Entraînement du modèle

In [36]:
#Paramètres
batch = 512
opti = tf.keras.optimizers.SGD(learning_rate=0.01)
loss = tf.keras.losses.CosineSimilarity(axis=0,reduction=tf.keras.losses.Reduction.SUM_OVER_BATCH_SIZE)

#Création
model = create_model()
model.build([25200,250])
print(model.summary())

model.compile(loss=loss, optimizer=opti, steps_per_execution = 100, run_eagerly=True)

#Entraînement
tensor_X = tf.convert_to_tensor(X)
tensor_Y = tf.convert_to_tensor(Y, dtype='float64')

history = model.fit(tensor_X, tensor_Y, batch_size=batch, epochs=40, shuffle=False, use_multiprocessing=True)

Model: "sequential_8"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 dense_16 (Dense)            (25200, 10)               2500      
                                                                 
 dense_17 (Dense)            (25200, 1)                10        
                                                                 
Total params: 2,510
Trainable params: 2,510
Non-trainable params: 0
_________________________________________________________________
None
Epoch 1/40
Epoch 2/40
Epoch 3/40
Epoch 4/40
Epoch 5/40
Epoch 6/40
Epoch 7/40
Epoch 8/40
Epoch 9/40
Epoch 10/40
Epoch 11/40
Epoch 12/40
Epoch 13/40
Epoch 14/40
Epoch 15/40
Epoch 16/40
Epoch 17/40
Epoch 18/40
Epoch 19/40
Epoch 20/40
Epoch 21/40
Epoch 22/40
Epoch 23/40
Epoch 24/40
Epoch 25/40
Epoch 26/40
Epoch 27/40
Epoch 28/40
Epoch 29/40
Epoch 30/40
Epoch 31/40
Epoch 32/40
Epoch 33/40
Epoch 34/40
Epoch 35/40
Epoch 36/40
Epoch 37/40
Epoch 38

### Résultats et Export

In [37]:
A = model.get_weights()[0]
B = model.get_weights()[1]
At = gramschmidt(A)

print("Orthonormality : ", checkOrthonormality(At))
print("Cosine Similarity : ", metric_train(At,B))

Orthonormality :  True
Cosine Similarity :  0.1333829309170633


In [None]:
output = parametersTransform(At,B)
pd.DataFrame(output).to_csv('AnswerQRT.csv')