In [None]:
from google.colab import drive
#mount google drive
drive.mount('/content/drive')

In [None]:
from keras.datasets import mnist
from keras.layers import Input, Dense, Reshape, Flatten, Dropout, Concatenate
from keras.layers import BatchNormalization, Activation
from keras.layers.advanced_activations import LeakyReLU
from keras.models import Sequential, Model
from keras.optimizers import Adam
import matplotlib.pyplot as plt
import sys
import numpy as np
import h5py
import scipy

In [None]:
gen = np.load('/content/drive/My Drive/traing_data_set/normalized_gen_pt_eta_phi_mass_7jet.npy',allow_pickle=True)
recon = np.load('/content/drive/My Drive/traing_data_set/normalized_pt_eta_phi_mass_3jet.npy',allow_pickle=True)

In [None]:
gen = np.split(gen, [1000000, ], axis=0)
gen_train = gen[0]
gen_test = gen[1]
recon =  np.split(recon, [1000000, ], axis=0)
recon_train = recon[0]
recon_test = recon[1]

In [None]:
gen_train.shape
#gen_test.shape
recon_test.shape

In [None]:
np.save('/content/drive/My Drive/traing_data_set/TEST/GAN004/gen_train', gen_train)
np.save('/content/drive/My Drive/traing_data_set/TEST/GAN004/gen_test', gen_test)
np.save('/content/drive/My Drive/traing_data_set/TEST/GAN004/recon_train', recon_train)
np.save('/content/drive/My Drive/traing_data_set/TEST/GAN004/recon_test', recon_test)

In [None]:
class GAN():
    def __init__(self):
        self.gen_rows = 1
        self.gen_cols = 28
        self.recon_rows = 1
        self.recon_cols = 12
        self.gen_shape = (self.gen_rows, self.gen_cols)
        self.latent_dim = 12

        optimizer = Adam(0.0002, 0.5)

        # Build and compile the discriminator
        self.discriminator = self.build_discriminator()
        self.discriminator.compile(loss='binary_crossentropy',
            optimizer=optimizer,
            metrics=['accuracy'])

        # Build the generator
        self.generator = self.build_generator()

        # The generator takes noise and gen data as input and tries to generate recon data
        noise = Input(shape=((self.latent_dim),))
        gen_train = Input(shape=((self.gen_cols,)))
        recon = self.generator([noise, gen_train])

        self.discriminator.trainable = False

        # The discriminator takes generated data as input and determines validity
        validity = self.discriminator([recon, gen_train])

        # Combines generator and discriminator
        self.combined = Model([noise, gen_train], validity)
        self.combined.compile(loss='binary_crossentropy', optimizer=optimizer)


    def build_generator(self):

        model = Sequential()

        model.add(Dense(64, input_dim=self.latent_dim+self.gen_cols))
        model.add(LeakyReLU(alpha=0.2))
        model.add(BatchNormalization(momentum=0.8))
        
        model.add(Dense(64))
        model.add(LeakyReLU(alpha=0.2))
        model.add(BatchNormalization(momentum=0.8))

        model.add(Dense(32))
        model.add(LeakyReLU(alpha=0.2))
        model.add(BatchNormalization(momentum=0.8))

        model.add(Dense(self.recon_cols, activation='tanh'))

        model.summary()

        noise = Input(shape=(self.latent_dim,))
        gen_train = Input(shape=(self.gen_cols,))
        merge = Concatenate(axis=-1)([noise, gen_train])
        recon = model(merge)

        return Model([noise, gen_train], recon)

    def build_discriminator(self):

        model = Sequential()

        model.add(Dense(64, input_dim=self.recon_cols+self.gen_cols))
        model.add(LeakyReLU(alpha=0.2))
        #model.add(BatchNormalization(momentum=0.8))

        model.add(Dense(64))
        model.add(LeakyReLU(alpha=0.2))
        #model.add(BatchNormalization(momentum=0.8))

        model.add(Dense(32))
        model.add(LeakyReLU(alpha=0.2))
        #model.add(BatchNormalization(momentum=0.8))

        model.add(Dense(16))
        model.add(LeakyReLU(alpha=0.2))
        #model.add(BatchNormalization(momentum=0.8))
        model.add(Dense(1, activation='sigmoid'))
        model.summary()

        recon = Input(shape=(self.recon_cols,))
        gen = Input(shape=(self.gen_cols,))
        merge = Concatenate(axis=-1)([recon, gen])

        validity = model(merge)

        return Model([recon, gen], validity)

    def store_fake_data(self):
        noise=np.random.normal(0, 1, (1320488, self.latent_dim))
        #noise=np.random.uniform(-0.3, 0.3, (1320488, self.latent_dim))
        gen_test = np.load('/content/drive/My Drive/traing_data_set/TEST/GAN004/gen_test.npy', allow_pickle = True)
        #test_data = np.concatenate(gen_test, noise)
        fake_data=self.generator.predict([noise, gen_test])
        np.save('/content/drive/My Drive/traing_data_set/TEST/GAN004/generated_data/fake_recon.npy', fake_data)

    def train(self, epochs, batch_size=128, sample_interval=50):

        # Load data
        gen_train = np.load('/content/drive/My Drive/traing_data_set/TEST/GAN004/gen_train.npy',allow_pickle=True)
        recon_train = np.load('/content/drive/My Drive/traing_data_set/TEST/GAN004/recon_train.npy',allow_pickle=True)

        # Adversarial ground truths
        valid = np.ones((batch_size, 1))
        fake = np.zeros((batch_size, 1))

        # Train Discriminator
        for epoch in range(epochs):

            idx = np.random.randint(0, gen_train.shape[0], batch_size)
            gen_data = gen_train[idx]
            recon_data = recon_train[idx]

            noise = np.random.normal(0, 1, (batch_size, self.latent_dim))
            #noise = np.random.uniform(-0.3, 0.3, (batch_size, self.latent_dim))
            #gen_and_noise = np.concatenate(gen_data, noise)

            # Generate a batch of new data
            fake_recon = self.generator.predict([noise,gen_data])


            #create discriminator training sample
            #gen_and_recon = np.concatenate(gen_data, recon_data)
            #fake_gen_and_recon = np.concatenate(gen_data, fake_recon)
            # Train the discriminator
            d_loss_real = self.discriminator.train_on_batch([recon_data, gen_data], valid)
            d_loss_fake = self.discriminator.train_on_batch([fake_recon, gen_data], fake)
            d_loss = 0.5 * np.add(d_loss_real, d_loss_fake)

            #  Train Generator

            #noise = np.random.normal(0, 1, (batch_size, self.latent_dim))
            #maybe use different noise to train generator

            # Train the generator (to have the discriminator label samples as valid)
            g_loss = self.combined.train_on_batch([noise, gen_data], valid)
           # g_loss = self.combined.train_on_batch([noise, gen_data], valid)

            # Print the progress
            print ("%d [D loss: %f, acc.: %.2f%%] [G loss: %f]" % (epoch, d_loss[0], 100*d_loss[1], g_loss))
        #Store the final output
        self.store_fake_data()

In [None]:
gan = GAN()
gan.train(epochs=5000, batch_size=2048, sample_interval=1000)

In [None]:
gan.train(epochs=5000, batch_size=2048, sample_interval=1000)

In [None]:
gan.combined.save('/content/drive/My Drive/test5004.h5')
gan.generator.save('/content/drive/My Drive/test5005.h5')
gan.discriminator.save('/content/drive/My Drive/test5006.h5')

In [None]:
fake_data=np.load('/content/drive/My Drive/traing_data_set/TEST/GAN004/generated_data/fake_recon.npy', allow_pickle=True)
real_data=np.load('/content/drive/My Drive/traing_data_set/TEST/GAN004/recon_test.npy', allow_pickle=True)
gen_test = np.load('/content/drive/My Drive/traing_data_set/TEST/GAN004/gen_test.npy', allow_pickle = True)
gen_pt_top7_not_cut=np.load('/content/drive/My Drive/traing_data_set/gen_pt_all_top7.npy',allow_pickle=True)
gen_pt_top1_not_cut=np.load('/content/drive/My Drive/traing_data_set/gen_pt_all_top1.npy',allow_pickle=True)
gen_pt_top3_not_cut=np.load('/content/drive/My Drive/traing_data_set/gen_pt_all_top3.npy',allow_pickle=True)
HT=np.load('/content/drive/My Drive/traing_data_set/HT.npy',allow_pickle=True)
count=np.load('/content/drive/My Drive/traing_data_set/count.npy',allow_pickle=True)

In [None]:
fake_split=np.split(fake_data,4,axis=1)
real_split=np.split(real_data,4,axis=1)
gen_split=np.split(gen_test,4, axis=1)

In [None]:
fake_pt=fake_split[0]
fake_leading_pt=fake_split[0][:,:1]
fake_eta=fake_split[1]
fake_phi=fake_split[2]
fake_mass=fake_split[3]
real_pt=real_split[0]
real_leading_pt=real_split[0][:,:1]
real_eta=real_split[1]
real_phi=real_split[2]
real_mass=real_split[3]
gen_pt=gen_split[0][:,:3]
gen_pt_all=gen_split[0]
gen_leadng_pt=gen_split[0][:,:1]
gen_eta=gen_split[1][:,:3]
gen_phi=gen_split[2][:,:3]
gen_mass=gen_split[3][:,:3]

In [None]:
gen_pt=gen_pt.flatten()
gen_eta=gen_eta.flatten()
gen_phi=gen_phi.flatten()
gen_mass=gen_mass.flatten()

fake_pt=fake_pt.flatten()
fake_eta=fake_eta.flatten()
fake_phi=fake_phi.flatten()
fake_mass=fake_mass.flatten()

real_pt=real=real_pt.flatten()
real_eta=real=real_eta.flatten()
real_phi=real=real_phi.flatten()
real_mass=real=real_mass.flatten()

fake_leading_pt=fake_leading_pt.flatten()
real_leading_pt=real_leading_pt.flatten()
gen_leadng_pt=gen_leadng_pt.flatten()
gen_pt_all=gen_pt_all.flatten()
gen_pt_top7_not_cut=gen_pt_top7_not_cut.flatten()
gen_pt_top1_not_cut=gen_pt_top1_not_cut.flatten()
gen_pt_top3_not_cut=gen_pt_top3_not_cut.flatten()

In [None]:
float_real_pt=real_pt.astype(float)
float_real_eta=real_eta.astype(float)
float_real_phi=real_phi.astype(float)
float_real_mass=real_mass.astype(float)

In [None]:
fig, axs = plt.subplots(3, 4, sharey=True, tight_layout=True)
axs[0][0].hist(fake_pt, bins=100)
axs[0][1].hist(fake_eta, bins=100)
axs[0][2].hist(fake_phi,bins=100)
axs[0][3].hist(fake_mass,bins=100)
axs[1][0].hist(float_real_pt, bins=100, range=[-1,1])
axs[1][1].hist(float_real_eta, bins=100)
axs[1][2].hist(float_real_phi,bins=100)
axs[1][3].hist(float_real_mass,bins=100)
axs[2][0].hist(gen_pt,bins=100)
axs[2][1].hist(gen_eta,bins=100)
axs[2][2].hist(gen_phi,bins=100)
axs[2][3].hist(gen_mass,bins=100)

In [None]:
fig, axs = plt.subplots(3, 1, sharey=False, tight_layout=True)
ns1,bins1,patches1 = axs[0].hist((fake_leading_pt+1)/2*1404+10, bins=150, range=[0,1500])
ns2,bins2,patches2 = axs[1].hist((real_leading_pt+1)/2*1404+10, bins=150, range=[0,1500])

axs[2].hist((gen_leadng_pt+1)/2*1404+10, bins=150, range=[0,1500])

In [None]:
fig, axs = plt.subplots(3,1, tight_layout=True)
axs[0].hist(gen_pt_top7_not_cut, bins=100)
axs[1].hist(count, bins=100)
axs[2].hist(HT, bins=100)

In [None]:
print(scipy.stats.ks_2samp(fake_pt,float_real_pt),'\n',
scipy.stats.ks_2samp(fake_eta,float_real_eta),'\n',
scipy.stats.ks_2samp(fake_phi,float_real_phi), '\n',
scipy.stats.ks_2samp(fake_mass, float_real_mass), '\n',
scipy.stats.ks_2samp(fake_leading_pt, real_leading_pt), '\n',
scipy.stats.ks_2samp(gen_leadng_pt, real_leading_pt),'\n',
scipy.stats.ks_2samp(gen_leadng_pt, fake_leading_pt))

In [None]:
ns1/ns2

In [None]:
x=np.arange(0,1500,10)
x1=[         200,          200,          200,          200,
                200,          200,          200, 137.        ,
                200, 140.        ,          100,          100,
       116.        ,  48.66666667,  58.33333333,  26.83333333,
        12.84210526,   6.6097561 ,   4.01234568,   3.28888889,
         2.61864407,   1.90420561,   1.64220183,   1.42134831,
         1.37745098,   1.19758813,   1.10749834,   1.02331861,
         1.02122905,   0.95880095,   0.96311031,   0.95006518,
         0.94673   ,   0.95576895,   0.95406405,   0.96263169,
         0.99261499,   0.96784732,   0.95119994,   0.95892302,
         0.99311515,   0.96663543,   0.97705884,   0.96816377,
         0.99861383,   0.96450801,   0.95125524,   0.95148756,
         0.97711101,   0.95537277,   0.95346677,   1.00368449,
         0.98971054,   0.99274308,   1.01636457,   1.06516233,
         1.04259711,   1.05798286,   1.07213272,   1.11202652,
         1.07350757,   1.05943727,   1.1031475 ,   1.03822265,
         1.02508961,   1.01209605,   1.04475491,   0.96158826,
         0.98098785,   0.94909191,   0.94250101,   0.81362932,
         0.89246401,   0.76951673,   0.65620094,   0.76368876,
         0.56856187,   0.75882353,   0.7983871 ,   0.73394495,
         1.08333333,   1.30555556,   1.4137931 ,   1.5862069 ,
         1.67857143,   3.77777778,   2.81818182,   4.14285714,
         2.        ,   4.25      ,   2.85714286,   4.5       ,
         5.33333333,   7.        ,   3.8       ,   6.        ,
         5.5       ,   1.        ,   1.33333333,   0.        ]

In [None]:
plt.bar(x, ns1/ns2, width=10)
plt.xlabel('leading_jet_pt')
plt.ylabel('ratio fake/real')

In [None]:
x1=x[30:120]
t1=(ns1/ns2)[30:120]
plt.bar(x1, t1, width=10)
plt.xlabel('leading_jet_pt')
plt.ylabel('ratio fake/real')

In [None]:
gen_pt_all_split=np.split(gen_pt_all, 7, axis=1)

In [None]:
gen_pt_all

In [None]:
gen_pt_1=gen_pt_all_split[0].flatten()
gen_pt_2=gen_pt_all_split[1].flatten()
gen_pt_3=gen_pt_all_split[2].flatten()
gen_pt_4=gen_pt_all_split[3].flatten()
gen_pt_5=gen_pt_all_split[4].flatten()
gen_pt_6=gen_pt_all_split[5].flatten()
gen_pt_7=gen_pt_all_split[6].flatten()



In [None]:
fig, axs = plt.subplots(1, sharey=False, tight_layout=False)
axs.hist(gen_pt_1, bins=100)
fig, axs = plt.subplots(1, sharey=False, tight_layout=False)
axs.hist(gen_pt_2, bins=100)
fig, axs = plt.subplots(1, sharey=False, tight_layout=False)
axs.hist(gen_pt_3, bins=100)
fig, axs = plt.subplots(1, sharey=False, tight_layout=False)
axs.hist(gen_pt_4, bins=100)
fig, axs = plt.subplots(1, sharey=False, tight_layout=False)
axs.hist(gen_pt_5, bins=100)
fig, axs = plt.subplots(1, sharey=False, tight_layout=False)
axs.hist(gen_pt_6, bins=100)
fig, axs = plt.subplots(1, sharey=False, tight_layout=False)
axs.hist(gen_pt_7, bins=100)

In [None]:
fake_split_16=np.split(fake_data,16,axis=1)
real_split_16=np.split(real_data,16,axis=1)

In [None]:
fake_pt_1=fake_split_16[0].flatten()
fake_pt_2=fake_split_16[1].flatten()
fake_pt_3=fake_split_16[2].flatten()
fake_pt_4=fake_split_16[3].flatten()
fake_eta_1=fake_split_16[4].flatten()
fake_eta_2=fake_split_16[5].flatten()
fake_eta_3=fake_split_16[6].flatten()
fake_eta_4=fake_split_16[7].flatten()
fake_phi_1=fake_split_16[8].flatten()
fake_phi_2=fake_split_16[9].flatten()
fake_phi_3=fake_split_16[10].flatten()
fake_phi_4=fake_split_16[11].flatten()
fake_mass_1=fake_split_16[12].flatten()
fake_mass_2=fake_split_16[13].flatten()
fake_mass_3=fake_split_16[14].flatten()
fake_mass_4=fake_split_16[15].flatten()
real_pt_1=real_split_16[0].flatten()
real_pt_2=real_split_16[1].flatten()
real_pt_3=real_split_16[2].flatten()
real_pt_4=real_split_16[3].flatten()
real_eta_1=real_split_16[4].flatten()
real_eta_2=real_split_16[5].flatten()
real_eta_3=real_split_16[6].flatten()
real_eta_4=real_split_16[7].flatten()
real_phi_1=real_split_16[8].flatten()
real_phi_2=real_split_16[9].flatten()
real_phi_3=real_split_16[10].flatten()
real_phi_4=real_split_16[11].flatten()
real_mass_1=real_split_16[12].flatten()
real_mass_2=real_split_16[13].flatten()
real_mass_3=real_split_16[14].flatten()
real_mass_4=real_split_16[15].flatten()

In [None]:
fake_pt_2

In [None]:
fig, axs = plt.subplots(2, 4, sharey=True, tight_layout=True)
axs[0][0].hist(fake_pt_1, bins=100)
axs[0][1].hist(fake_pt_2, bins=100)
axs[0][2].hist(fake_pt_3,bins=100)
axs[0][3].hist(fake_pt_4,bins=100)
axs[1][0].hist(real_pt_1, bins=100)
axs[1][1].hist(real_pt_2, bins=100)
axs[1][2].hist(real_pt_3,bins=100)
axs[1][3].hist(real_pt_4,bins=100)

In [None]:
fig, axs = plt.subplots(2, 4, sharey=True, tight_layout=True)
axs[0][0].hist(fake_eta_1, bins=100)
axs[0][1].hist(fake_eta_2, bins=100)
axs[0][2].hist(fake_eta_3,bins=100)
axs[0][3].hist(fake_eta_4,bins=100)
axs[1][0].hist(real_eta_1, bins=100)
axs[1][1].hist(real_eta_2, bins=100)
axs[1][2].hist(real_eta_3,bins=100)
axs[1][3].hist(real_eta_4,bins=100)


In [None]:
fig, axs = plt.subplots(2, 4, sharey=True, tight_layout=True)
axs[0][0].hist(fake_phi_1, bins=100)
axs[0][1].hist(fake_phi_2, bins=100)
axs[0][2].hist(fake_phi_3,bins=100)
axs[0][3].hist(fake_phi_4,bins=100)
axs[1][0].hist(real_phi_1, bins=100)
axs[1][1].hist(real_phi_2, bins=100)
axs[1][2].hist(real_phi_3,bins=100)
axs[1][3].hist(real_phi_4,bins=100)

In [None]:
fig, axs = plt.subplots(2, 4, sharey=True, tight_layout=True)
axs[0][0].hist(fake_mass_1, bins=100)
axs[0][1].hist(fake_mass_2, bins=100)
axs[0][2].hist(fake_mass_3,bins=100)
axs[0][3].hist(fake_mass_4,bins=100)
axs[1][0].hist(real_mass_1, bins=100)
axs[1][1].hist(real_mass_2, bins=100)
axs[1][2].hist(real_mass_3,bins=100)
axs[1][3].hist(real_mass_4,bins=100)