In [None]:
from google.colab import drive
import pandas as pd

drive.mount('/content/drive')

path = '/content/drive/My Drive/'
train_df = pd.read_csv(path + 'train.csv')
test_df = pd.read_csv(path + 'test.csv')
test_labels_df = pd.read_csv(path + 'test_label.csv')


Mounted at /content/drive


In [None]:
from sklearn.preprocessing import MinMaxScaler

# Normalize data
scaler = MinMaxScaler()
features = train_df.columns[1:]  # Excluding timestamp
train_df[features] = scaler.fit_transform(train_df[features])
test_df[features] = scaler.transform(test_df[features])

train_df.fillna(method='ffill', inplace=True)
test_df.fillna(method='ffill', inplace=True)


In [None]:
import numpy as np

def geometric_masking(data, p=0.1):
    mask = np.random.geometric(p, size=data.shape) <= 1
    return data * mask

masked_data = geometric_masking(train_df[features].values)


In [None]:
import tensorflow as tf
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Input, Dense, LayerNormalization, MultiHeadAttention, Dropout

def transformer_encoder(inputs, head_size, num_heads, ff_dim, dropout=0):
    x = LayerNormalization(epsilon=1e-6)(inputs)
    x = MultiHeadAttention(key_dim=head_size, num_heads=num_heads)(x, x)
    x = Dropout(dropout)(x)
    res = x + inputs

    x = LayerNormalization(epsilon=1e-6)(res)
    x = Dense(ff_dim, activation="relu")(x)
    x = Dropout(dropout)(x)
    x = Dense(inputs.shape[-1])(x)
    return x + res

inputs = Input(shape=(None, len(features)))
x = transformer_encoder(inputs, head_size=64, num_heads=4, ff_dim=128)
outputs = Dense(len(features), activation='sigmoid')(x)

model = Model(inputs, outputs)
model.compile(optimizer='adam', loss='mse')


In [None]:
# Convert data to appropriate format for training
sequence_length = 30
X_train = np.array([masked_data[i:i+sequence_length] for i in range(len(masked_data) - sequence_length)])
y_train = X_train

model.fit(X_train, y_train, epochs=1, batch_size=32)




<keras.src.callbacks.History at 0x7aced9263d90>

In [None]:
import tensorflow.keras.backend as K

def contrastive_loss(y_true, y_pred):
    margin = 1
    square_pred = K.square(y_pred)
    margin_square = K.square(K.maximum(margin - y_pred, 0))
    return K.mean(y_true * square_pred + (1 - y_true) * margin_square)


In [None]:
from tensorflow.keras.models import Sequential, Model
from tensorflow.keras.layers import Dense, LeakyReLU, BatchNormalization, Reshape, Flatten, Input
from tensorflow.keras.optimizers import Adam
import numpy as np

def build_generator(latent_dim, output_shape):
    model = Sequential([
        Dense(128, input_dim=latent_dim),
        LeakyReLU(alpha=0.2),
        BatchNormalization(momentum=0.8),
        Dense(256),
        LeakyReLU(alpha=0.2),
        BatchNormalization(momentum=0.8),
        Dense(512),
        LeakyReLU(alpha=0.2),
        BatchNormalization(momentum=0.8),
        Dense(np.prod(output_shape), activation='tanh'),
        Reshape(output_shape)
    ])
    return model

def build_discriminator(input_shape):
    model = Sequential([
        Flatten(input_shape=input_shape),
        Dense(512),
        LeakyReLU(alpha=0.2),
        Dense(256),
        LeakyReLU(alpha=0.2),
        Dense(1, activation='sigmoid')
    ])
    model.compile(loss='binary_crossentropy', optimizer=Adam(0.0002, 0.5), metrics=['accuracy'])
    return model

latent_dim = 100
generator = build_generator(latent_dim, (len(features),))
discriminator = build_discriminator((len(features),))

discriminator.trainable = False
gan_input = Input(shape=(latent_dim,))
gan_output = discriminator(generator(gan_input))
gan = Model(gan_input, gan_output)
gan.compile(loss='binary_crossentropy', optimizer=Adam(0.0002, 0.5))


In [None]:
def train_gan(generator, discriminator, gan, features, epochs, batch_size, latent_dim):
    half_batch = batch_size // 2
    for epoch in range(epochs):

        idx = np.random.randint(0, features.shape[0], half_batch)
        real_data = features[idx]

        noise = np.random.normal(0, 1, (half_batch, latent_dim))
        gen_data = generator.predict(noise)

        real_y = np.ones((half_batch, 1))
        fake_y = np.zeros((half_batch, 1))
        d_loss_real = discriminator.train_on_batch(real_data, real_y)
        d_loss_fake = discriminator.train_on_batch(gen_data, fake_y)
        d_loss = 0.5 * np.add(d_loss_real, d_loss_fake)


        noise = np.random.normal(0, 1, (batch_size, latent_dim))
        valid_y = np.ones((batch_size, 1))
        g_loss = gan.train_on_batch(noise, valid_y)

        print(f"Epoch {epoch} / {epochs}, D Loss: {d_loss[0]}, G Loss: {g_loss}")

features = train_df[train_df.columns[1:]].values

train_gan(generator, discriminator, gan, features, epochs=100, batch_size=32, latent_dim=100)


Epoch 0 / 100, D Loss: 0.6078918278217316, G Loss: 0.7971608638763428
Epoch 1 / 100, D Loss: 0.5896534621715546, G Loss: 0.8271079659461975
Epoch 2 / 100, D Loss: 0.5583745241165161, G Loss: 0.8583386540412903
Epoch 3 / 100, D Loss: 0.5177457928657532, G Loss: 0.8893975019454956
Epoch 4 / 100, D Loss: 0.5135910958051682, G Loss: 0.9094705581665039
Epoch 5 / 100, D Loss: 0.48505493998527527, G Loss: 0.9673546552658081
Epoch 6 / 100, D Loss: 0.458295613527298, G Loss: 1.0026311874389648
Epoch 7 / 100, D Loss: 0.42749227583408356, G Loss: 1.1115370988845825
Epoch 8 / 100, D Loss: 0.4055106192827225, G Loss: 1.1801156997680664
Epoch 9 / 100, D Loss: 0.3922591060400009, G Loss: 1.1926604509353638
Epoch 10 / 100, D Loss: 0.3341425806283951, G Loss: 1.2198644876480103
Epoch 11 / 100, D Loss: 0.3823211193084717, G Loss: 1.3055200576782227
Epoch 12 / 100, D Loss: 0.3231939524412155, G Loss: 1.3611245155334473
Epoch 13 / 100, D Loss: 0.317561537027359, G Loss: 1.4114713668823242
Epoch 14 / 100, 