In [1]:
import numpy as np
import pandas as pd
from sklearn.preprocessing import StandardScaler, OneHotEncoder
from sklearn.model_selection import train_test_split
from tensorflow.keras.layers import Input, Dense, Lambda, Layer
from tensorflow.keras.models import Model
from tensorflow.keras.losses import mse
from tensorflow.keras import backend as K

data = pd.read_csv('data.csv')
scaler = StandardScaler()
data_scaled = scaler.fit_transform(data[['price', 'book value', 'revenue per share', 'net income per share', 'current ratio', 'dividend yield']])

onehot = OneHotEncoder(sparse_output=False)
sectors_onehot = onehot.fit_transform(data[['sector']])

X = np.hstack((data_scaled, sectors_onehot))

X_train, X_test = train_test_split(X, test_size=0.2, random_state=42)

input_dim = X_train.shape[1]  
latent_dim = 2  

def sampling(args):
    z_mean, z_log_sigma = args
    epsilon = K.random_normal(shape=(K.shape(z_mean)[0], latent_dim))
    return z_mean + K.exp(z_log_sigma) * epsilon

class VAELossLayer(Layer):
    def __init__(self, **kwargs):
        super(VAELossLayer, self).__init__(**kwargs)

    def call(self, inputs):
        x, x_decoded, z_mean, z_log_sigma = inputs
        xent_loss = mse(x, x_decoded)
        kl_loss = -0.5 * K.sum(1 + z_log_sigma - K.square(z_mean) - K.exp(z_log_sigma), axis=-1)
        total_loss = K.mean(xent_loss + kl_loss)
        self.add_loss(total_loss)  
        return x  

inputs = Input(shape=(input_dim,))
h = Dense(32, activation='relu')(inputs)
z_mean = Dense(latent_dim)(h)
z_log_sigma = Dense(latent_dim)(h)
z = Lambda(sampling, output_shape=(latent_dim,))([z_mean, z_log_sigma])

decoder_h = Dense(32, activation='relu')
decoder_mean = Dense(input_dim, activation='linear')
h_decoded = decoder_h(z)
x_decoded_mean = decoder_mean(h_decoded)

loss_layer = VAELossLayer()([inputs, x_decoded_mean, z_mean, z_log_sigma])

vae = Model(inputs, loss_layer) 

vae.compile(optimizer='rmsprop')
vae.summary()

vae.fit(X_train, shuffle=True, epochs=50, batch_size=32, validation_data=(X_test, None))


2024-04-17 09:59:09.271378: I tensorflow/core/util/port.cc:113] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
2024-04-17 09:59:09.334118: I external/local_tsl/tsl/cuda/cudart_stub.cc:32] Could not find cuda drivers on your machine, GPU will not be used.
2024-04-17 09:59:11.728579: I external/local_tsl/tsl/cuda/cudart_stub.cc:32] Could not find cuda drivers on your machine, GPU will not be used.
2024-04-17 09:59:20.691777: I tensorflow/core/platform/cpu_feature_guard.cc:210] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 AVX512F AVX512_VNNI FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


Epoch 1/50
[1m32/32[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 7ms/step - loss: 1.2361 - val_loss: 1.0123
Epoch 2/50
[1m32/32[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step - loss: 1.0074 - val_loss: 0.9329
Epoch 3/50
[1m32/32[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step - loss: 0.9260 - val_loss: 0.8859
Epoch 4/50
[1m32/32[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - loss: 0.8952 - val_loss: 0.8690
Epoch 5/50
[1m32/32[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - loss: 0.8360 - val_loss: 0.8632
Epoch 6/50
[1m32/32[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - loss: 0.8949 - val_loss: 0.8360
Epoch 7/50
[1m32/32[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - loss: 0.8859 - val_loss: 0.8530
Epoch 8/50
[1m32/32[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - loss: 0.8276 - val_loss: 0.8683
Epoch 9/50
[1m32/32[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[

<keras.src.callbacks.history.History at 0x7fa5dc244940>

In [2]:
import numpy as np
import pandas as pd
from sklearn.preprocessing import StandardScaler, OneHotEncoder
from sklearn.model_selection import train_test_split
from statsmodels.tsa.vector_ar.var_model import VAR

data = pd.read_csv('data.csv')

model = VAR(data[['price', 'book value', 'revenue per share', 'net income per share', 'current ratio', 'dividend yield']])
results = model.fit(maxlags=15, ic='aic')

residuals = results.resid

data_aligned = data.iloc[len(data) - len(residuals):]

scaler = StandardScaler()
data_scaled = scaler.fit_transform(data_aligned[['price', 'book value', 'revenue per share', 'net income per share', 'current ratio', 'dividend yield']])
residuals_scaled = scaler.transform(residuals) 

onehot = OneHotEncoder(sparse_output=False)
sectors_onehot = onehot.fit_transform(data_aligned[['sector']])

X = np.hstack((data_scaled, residuals_scaled, sectors_onehot))

X_train, X_test = train_test_split(X, test_size=0.2, random_state=42)

input_dim = X_train.shape[1]  
latent_dim = 2 

def sampling(args):
    z_mean, z_log_sigma = args
    epsilon = K.random_normal(shape=(K.shape(z_mean)[0], latent_dim))
    return z_mean + K.exp(z_log_sigma) * epsilon

class VAELossLayer(Layer):
    def __init__(self, **kwargs):
        super(VAELossLayer, self).__init__(**kwargs)

    def call(self, inputs):
        x, x_decoded, z_mean, z_log_sigma = inputs
        xent_loss = mse(x, x_decoded)
        kl_loss = -0.5 * K.sum(1 + z_log_sigma - K.square(z_mean) - K.exp(z_log_sigma), axis=-1)
        total_loss = K.mean(xent_loss + kl_loss)
        self.add_loss(total_loss)  
        return x  

inputs = Input(shape=(input_dim,))
h = Dense(32, activation='relu')(inputs)
z_mean = Dense(latent_dim)(h)
z_log_sigma = Dense(latent_dim)(h)
z = Lambda(sampling, output_shape=(latent_dim,))([z_mean, z_log_sigma])

decoder_h = Dense(32, activation='relu')
decoder_mean = Dense(input_dim, activation='linear')
h_decoded = decoder_h(z)
x_decoded_mean = decoder_mean(h_decoded)

loss_layer = VAELossLayer()([inputs, x_decoded_mean, z_mean, z_log_sigma])

vae = Model(inputs, loss_layer) 

vae.compile(optimizer='rmsprop')
vae.summary()

vae.fit(X_train, shuffle=True, epochs=50, batch_size=32, validation_data=(X_test, None))

Epoch 1/50
[1m32/32[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 8ms/step - loss: 8.5684 - val_loss: 7.2458
Epoch 2/50
[1m32/32[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step - loss: 6.8273 - val_loss: 5.7014
Epoch 3/50
[1m32/32[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step - loss: 5.6351 - val_loss: 4.8176
Epoch 4/50
[1m32/32[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - loss: 4.7229 - val_loss: 4.0432
Epoch 5/50
[1m32/32[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - loss: 3.8412 - val_loss: 3.4290
Epoch 6/50
[1m32/32[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step - loss: 3.2840 - val_loss: 2.7378
Epoch 7/50
[1m32/32[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - loss: 2.7453 - val_loss: 2.4745
Epoch 8/50
[1m32/32[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - loss: 2.3137 - val_loss: 2.0229
Epoch 9/50
[1m32/32[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[

<keras.src.callbacks.history.History at 0x7fa5a97a75e0>