In [1]:
%run center_of_mass_calculation.ipynb

In [2]:
import tensorflow as tf
import pandas as pd
from sklearn.model_selection import train_test_split
import math
from tensorflow.keras import backend as K
from tqdm.notebook import trange
import pandas as pd
import numpy as np
from multiprocessing import Pool

In [3]:
BATCH_SIZE = 128

In [4]:
df = pd.read_csv("full_df.csv", header=None)

In [5]:
data = tf.constant(df.to_numpy(), dtype="float32")

In [6]:
data.shape

TensorShape([62364, 30])

In [7]:
X = tf.data.Dataset.from_tensor_slices(data).shuffle(100_000).batch(BATCH_SIZE)

In [8]:
trans_mult = tf.constant([45] * 17 + [8] * 12 + [1], dtype="float32")
trans_add = tf.constant([5] * 17 + [16] * 12 + [0], dtype="float32")

In [9]:
class Generator(tf.keras.Model):
    def __init__(self):
        super().__init__()
        self.hidden_layers = [
            #tf.keras.layers.Dense(6, activation="relu"),
            #tf.keras.layers.Dense(8, activation="relu"),
            #tf.keras.layers.Dense(10, activation="relu"),
            #tf.keras.layers.Dense(12, activation="relu"),
            #tf.keras.layers.Dense(15, activation="relu"),
            #tf.keras.layers.Dense(19, activation="relu"),
            #tf.keras.layers.Dense(23, activation="relu"),
            #tf.keras.layers.Dense(27, activation="relu"),
            tf.keras.layers.Dense(64, activation="relu"),
            tf.keras.layers.Dense(64, activation="relu"),
            tf.keras.layers.Dense(64, activation="relu"),
            tf.keras.layers.Dense(64, activation="relu"),
            tf.keras.layers.Dense(64, activation="relu"),
            tf.keras.layers.Dense(64, activation="relu"),
            tf.keras.layers.Dense(64, activation="relu"),
        ]
        self.output_layer = tf.keras.layers.Dense(30, activation="sigmoid")
        
    def call(self, x):
        for layer in self.hidden_layers:
            x = layer(x)
        x = self.output_layer(x)
        x *= trans_mult
        x += trans_add
        return x

In [10]:
class Discriminator(tf.keras.Model):
    def __init__(self):
        super().__init__()
        self.hidden_layers = [
            tf.keras.layers.BatchNormalization(),
            tf.keras.layers.Dense(27, activation="relu"),
            tf.keras.layers.Dense(23, activation="relu"),
            tf.keras.layers.Dense(19, activation="relu"),
            tf.keras.layers.Dense(15, activation="relu"),
            tf.keras.layers.Dense(12, activation="relu"),
            tf.keras.layers.Dense(10, activation="relu"),
            tf.keras.layers.Dense(8, activation="relu"),
            tf.keras.layers.Dense(6, activation="relu"),
            tf.keras.layers.Dense(3, activation="relu"),
        ]
        self.output_layer = tf.keras.layers.Dense(1, activation="sigmoid")
        
    def call(self, x):
        for layer in self.hidden_layers:
            x = layer(x)
        return self.output_layer(x)

In [11]:
cross_entropy = tf.keras.losses.BinaryCrossentropy(from_logits=False)

In [12]:
def discriminator_loss(real_output, fake_output):
    real_loss = cross_entropy(tf.ones_like(real_output), real_output)
    fake_loss = cross_entropy(tf.zeros_like(fake_output), fake_output)
    total_loss = real_loss + fake_loss
    return total_loss

In [13]:
def generator_loss(fake_output):
    return cross_entropy(tf.ones_like(fake_output), fake_output)

In [14]:
generator = Generator()
discriminator = Discriminator()

In [15]:
generator_optimizer = tf.keras.optimizers.Adam(1e-4)
discriminator_optimizer = tf.keras.optimizers.Adam(1e-4)

In [16]:
mean = tf.keras.metrics.Mean(name='mean_distance')

In [17]:
@tf.function
def train_step(real):
    rand_input = tf.random.uniform((BATCH_SIZE, 5), minval=0, maxval=20)
        
    with tf.GradientTape() as disc_tape, tf.GradientTape() as gen_tape:
        generated = generator(rand_input, training=True)
            
        real_output = discriminator(real, training=True)
        fake_output = discriminator(generated, training=True)
            
        gen_loss = generator_loss(fake_output)
        disc_loss = discriminator_loss(real_output, fake_output)
        
    avg_distance = tf.reduce_mean(tf.reduce_mean(tf.sqrt(tf.tensordot(generated, data, axes=[[1], [1]])), axis=1))
    mean(avg_distance)
            
    gradients_of_generator = gen_tape.gradient(gen_loss, generator.trainable_variables)
    gradients_of_discriminator = disc_tape.gradient(disc_loss, discriminator.trainable_variables)

    generator_optimizer.apply_gradients(zip(gradients_of_generator, generator.trainable_variables))
    discriminator_optimizer.apply_gradients(zip(gradients_of_discriminator, discriminator.trainable_variables))

In [18]:
EPOCHS = 50

for epoch in trange(EPOCHS):
    mean.reset_states()
    for real in X:
        train_step(real)

    print(f"Average distance from average augmented sample: {mean.result()}")
        


  0%|          | 0/50 [00:00<?, ?it/s]

Average distance from average augmented sample: 128.34910583496094
Average distance from average augmented sample: 127.78792572021484
Average distance from average augmented sample: 127.98695373535156
Average distance from average augmented sample: 128.0457000732422
Average distance from average augmented sample: 130.9093475341797
Average distance from average augmented sample: 130.9961700439453


KeyboardInterrupt: 

In [19]:
rand_input = tf.random.uniform((1_000_000, 5), minval=0, maxval=20)
gener = generator(rand_input)
df = pd.DataFrame(gener.numpy())
df.describe()

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,20,21,22,23,24,25,26,27,28,29
count,1000000.0,1000000.0,1000000.0,1000000.0,1000000.0,1000000.0,1000000.0,1000000.0,1000000.0,1000000.0,...,1000000.0,1000000.0,1000000.0,1000000.0,1000000.0,1000000.0,1000000.0,1000000.0,1000000.0,1000000.0
mean,29.398424,27.479794,27.830229,24.750896,15.931058,25.623934,35.492962,23.658066,30.937046,28.235901,...,20.17942,19.7451,20.415613,18.124634,21.211578,20.465868,21.383352,20.352762,19.565907,0.469454
std,1.859983,2.473579,2.401675,2.023421,2.416225,3.019619,2.202096,3.251796,1.372978,1.60284,...,0.27696,0.565395,0.480446,0.432866,0.374182,0.41401,0.418895,0.635583,0.556536,0.051897
min,25.332926,20.19265,15.535545,19.443401,9.521097,18.73531,28.092846,12.284315,27.45668,24.01128,...,19.412477,17.365929,19.389675,16.963894,20.060184,19.663752,19.964981,18.066322,17.992443,0.322745
25%,28.344868,25.774656,27.213087,23.410205,14.214715,23.959857,33.930243,21.881259,29.95781,27.094424,...,19.977146,19.576467,20.095443,17.811342,20.96596,20.135955,21.110905,20.061458,19.182149,0.429201
50%,28.940123,27.270333,28.504473,24.517791,15.755944,24.89388,35.564974,25.037374,30.701166,28.213313,...,20.184417,19.978363,20.360368,18.089359,21.245519,20.330498,21.429522,20.575895,19.533472,0.471376
75%,29.659825,28.801563,29.292323,25.770885,17.460435,26.128322,37.105237,26.096928,31.761653,29.286872,...,20.381599,20.1106,20.626785,18.405981,21.487518,20.738752,21.689224,20.795344,19.88439,0.511836
max,39.272686,37.027264,32.45919,31.983782,26.379841,41.108273,42.095341,27.487255,37.313843,34.263142,...,20.997383,20.445215,22.534821,19.827387,22.221653,21.919996,22.63559,21.502878,21.406824,0.612736


In [None]:
full_df = pd.read_csv("full_df.csv", header=None)
full_df.describe()

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,20,21,22,23,24,25,26,27,28,29
count,62364.0,62364.0,62364.0,62364.0,62364.0,62364.0,62364.0,62364.0,62364.0,62364.0,...,62364.0,62364.0,62364.0,62364.0,62364.0,62364.0,62364.0,62364.0,62364.0,62364.0
mean,25.400311,25.453717,25.396961,25.452527,25.447544,27.494896,25.808743,25.872559,25.926155,25.819324,...,20.007233,20.035347,20.324825,20.003963,20.017181,20.005974,20.017858,20.026327,20.334232,0.5111463
std,12.780554,12.800172,12.791444,12.758855,12.769628,13.020563,12.81928,12.835393,12.831937,12.817533,...,2.30339,2.283654,2.25671,2.309544,2.317694,2.316451,2.303041,2.286688,2.252501,0.06772165
min,5.000515,5.000317,5.000327,5.000982,5.000021,5.00037,5.000172,5.000966,5.00036,5.000568,...,16.000084,16.000238,16.000065,16.000114,16.0,16.0001,16.00003,16.000072,16.000126,1.565178e-08
25%,14.277977,14.364701,14.222075,14.344979,14.369988,16.207468,14.647534,14.702544,14.712956,14.674438,...,18.017013,18.080188,18.458934,17.997635,17.995074,17.997494,18.025174,18.066345,18.477078,0.4812423
50%,24.434613,24.533262,24.360536,24.520503,24.430065,27.479076,25.063921,25.045355,25.198195,25.023787,...,20.002737,20.037742,20.454145,20.007626,20.026952,20.006498,20.023398,20.025013,20.483981,0.5117821
75%,35.985794,36.055445,36.049779,36.039434,36.076413,38.783558,36.492987,36.616639,36.7089,36.627257,...,22.005584,22.010333,22.27768,22.019618,22.032996,22.012519,22.008905,21.998136,22.281278,0.5455824
max,49.995995,49.999706,49.99863,49.999737,49.999886,49.999897,49.997826,49.999725,49.99978,49.997124,...,23.999933,24.0,23.99998,23.99968,23.999931,23.999924,23.999886,23.999989,23.999952,0.7844752


In [71]:
def process(a):
    for index in trange(gener.shape[0]):
        _, mass = calculate_center_of_mass(gener[index])
        if 4 <= mass <= 8:
            good_params.append(gener[index].numpy())
        print(index)

In [None]:
with Pool(10) as p:
    result = p.map(process, [
        gener[:100_000],
        gener[100_000:200_000],
        gener[200_000:300_000],
        gener[300_000:400_000],
        gener[400_000:500_000],
        gener[500_000:600_000],
        gener[700_000:800_000],
        gener[900_000:900_000],
        gener[900_000:1_000_000],
    ])
    
result

2022-10-02 21:51:21.149735: F tensorflow/stream_executor/cuda/cuda_driver.cc:152] Failed setting context: CUDA_ERROR_NOT_INITIALIZED: initialization error
2022-10-02 21:51:21.195975: F tensorflow/stream_executor/cuda/cuda_driver.cc:152] Failed setting context: CUDA_ERROR_NOT_INITIALIZED: initialization error
2022-10-02 21:51:21.238480: F tensorflow/stream_executor/cuda/cuda_driver.cc:152] Failed setting context: CUDA_ERROR_NOT_INITIALIZED: initialization error
2022-10-02 21:51:21.280568: F tensorflow/stream_executor/cuda/cuda_driver.cc:152] Failed setting context: CUDA_ERROR_NOT_INITIALIZED: initialization error
2022-10-02 21:51:21.332621: F tensorflow/stream_executor/cuda/cuda_driver.cc:152] Failed setting context: CUDA_ERROR_NOT_INITIALIZED: initialization error
2022-10-02 21:51:21.448833: F tensorflow/stream_executor/cuda/cuda_driver.cc:152] Failed setting context: CUDA_ERROR_NOT_INITIALIZED: initialization error
2022-10-02 21:51:21.699420: F tensorflow/stream_executor/cuda/cuda_dri

In [20]:
good_params = []
for index in trange(gener.shape[0]):
    _, mass = calculate_center_of_mass(gener[index])
    if 4 <= mass <= 8:
        good_params.append(gener[index].numpy())

  0%|          | 0/1000000 [00:00<?, ?it/s]

KeyboardInterrupt: 

In [21]:
len(good_params)

66838

In [22]:
adf = pd.DataFrame(np.array(good_params))

In [23]:
adf.to_csv("augmentation.csv", header=False, index=False)