In [None]:
# %%
# -*- coding: utf-8 -*-
"""
This script runs the multi GAN and allows you to step through each part
# divide y by exposure in xpxixpy
"""
import warnings
warnings.simplefilter(action='ignore', category=FutureWarning)

import matplotlib.pyplot as plt
import pandas as pd
import statsmodels.api as sm
# import modules
import torch.optim as optim
from patsy import dmatrices
import numpy as np
from torch.utils.data import DataLoader
from tqdm import tqdm
from Functions import dataprep as prep
from logging import Logger
from torch.optim import Adam
from Functions.mcwgan.functions import to_cuda_if_available, to_cpu_if_available, DelayedKeyboardInterrupt, Logger, Dataset

pd.set_option('display.float_format', lambda x: '%.2f' % x)
import math
import joblib

import torch

import numpy as np

## Import created modules
from Functions.MC_WGAN_GP.gan_scripts.auto_loader import PolicyDataset
from Functions.MC_WGAN_GP.gan_scripts.generator2_v2 import Generator2
from Functions.MC_WGAN_GP.gan_scripts.discriminator2_v3 import Discriminator2
from Functions.MC_WGAN_GP.gan_scripts.gradiant_penalty import calculate_gradient_penalty
from Functions.MC_WGAN_GP.gan_scripts.undo_dummy import back_from_dummies
from Functions import metrics

from torch.autograd.variable import Variable

import statsmodels.formula.api as smf

In [None]:
if torch.cuda.is_available():
    devid = torch.cuda.current_device()
    torch.cuda.set_device(devid)
    dev = torch.device("cuda")
else:
    dev = torch.device("cpu")
    
print(f'Device: {dev}')

In [None]:
train = pd.read_pickle("./data/gan_dataprep/train_gan.pickle")
val = pd.read_pickle("./data/gan_dataprep/val_gan.pickle")
ss = joblib.load('./data/gan_dataprep/scaler.pickle')
pol_dat = train.copy()

In [None]:
train = Dataset(train)
val = Dataset(val)

In [None]:
def glm_metrics(train, val):
    # Transform back
    train = pd.DataFrame(ss.inverse_transform(train), columns = train.columns)
    val = pd.DataFrame(ss.inverse_transform(val), columns = val.columns)
        
    train = back_from_dummies(train)
    val = back_from_dummies(val)
    
    train['ClaimNb'] = train['ClaimNb'].astype(float).astype(int)
    val['ClaimNb'] = val['ClaimNb'].astype(float).astype(int)
    
    train['Density'] = train['Density'].clip(lower=1)

    glm_train = prep.data_cleaning_frequency_schelldorfer(train)
    glm_val = prep.data_cleaning_frequency_schelldorfer(val)

    # Train GLM
    formula1 = "ClaimNb ~ VehPowerGLM + C(VehAgeGLM, Treatment(reference=2)) + C(DrivAgeGLM, Treatment(reference=5)) + BonusMalusGLM + VehBrand + VehGas + DensityGLM + C(Region, Treatment(reference='R24')) + AreaGLM"
    glm1 = smf.glm(formula=formula1, data=glm_train, family=sm.families.Poisson(link=sm.families.links.log()), offset=np.log(glm_train['Exposure'])).fit()
    mets = metrics.poisson_deviance(glm1.predict(glm_val, offset=np.log(glm_val['Exposure'])), glm_val['ClaimNb'])
    
    return mets

def xpxixpy(X, y):
    return np.dot(np.linalg.inv(np.dot(X.T, X)), np.dot(X.T, y))


In [None]:
glm_metrics(train, val)

In [None]:
output_size = [1, 1, 1, 1, 1, 1, 5, 11, 2, 22, 6]
# Information about the size of the data
data_size = pol_dat.shape[1]  # number of cols in pol_dat
var_locs = [0, 1, 2, 3, 4, 5]  # tells us where the continous variables are

# parameters
z_size = 100  # how big is the random vector fed into the generator
# we should only need the 55?
batch_size = 100
temperature = None  # comes into play with the categorical activation see multioutput.py

# Generator tuning
gen_hidden_sizes = [100, 100, 100]
gen_bn_decay = .90
gen_l2_regularization = 0.001
gen_learning_rate = 0.01
noise_size = z_size

assert sum(output_size) == data_size

# Discriminator tuning
disc_hidden_sizes = [data_size, data_size]
disc_bn_decay = .90
critic_bool = True  # if false then between 0 and 1
mini_batch_bool = False
disc_leaky_param = 0.2
disc_l2_regularization = 0.001
l2_regularization = disc_l2_regularization
disc_learning_rate = 0.01
penalty = 0.1  ## deals with gradiant penalty
epochs = 20000


In [None]:
auto_data = PolicyDataset(pol_dat, var_locs)

# initilize generator and discriminator
generator = Generator2(
    noise_size=noise_size,
    output_size=output_size,
    hidden_sizes=gen_hidden_sizes,
    bn_decay=gen_bn_decay
).to(dev)

discriminator = Discriminator2(
    input_size=data_size,
    hidden_sizes=disc_hidden_sizes,
    bn_decay=disc_bn_decay,  # no batch normalization for the critic
    critic=critic_bool,  # Do you want a critic
    leaky_param=disc_leaky_param,  # parameter for leakyRelu
    mini_batch=mini_batch_bool,  # Do you want any mini batch extras
    add_rows=56  # Number of rows to add if appending extra rows
).to(dev)

generator = generator.to(dev)
discriminator = discriminator.to(dev)

In [None]:
generator.train(mode=True)
discriminator.train(mode=True)
disc_losses = []
gen_losses = []
pois_metric = []
# %%
disc_loss = torch.tensor(9999)
gen_loss = torch.tensor(9999)
loop = tqdm(total=epochs, position=0, leave=False)

generator = generator.to(dev)
discriminator = discriminator.to(dev)

cats, conts = auto_data.get_catcont()
batch = torch.cat([conts, cats], 1)
real_features = Variable(batch)
real_features = real_features.to(dev)
featlen = len(real_features)

shuffled_features = real_features
optim_disc.zero_grad()
optim_gen.zero_grad()

for epoch in range(epochs):
    # Shuffling of features
    rand_indx = torch.randperm(featlen)
    shuffled_features = shuffled_features[rand_indx]

    # For each batch
    for real_batch in torch.split(shuffled_features, batch_size):
        real_batch_size = len(real_batch)

        # Discriminator

        # train discriminator with real data
        real_pred = discriminator(real_batch)
        # the disc outputs high numbers if it thinks the data is real, we take the negative of this
        # Because we are minimizing loss
        real_loss = -real_pred.mean(0).view(1)
        real_loss.backward()

        # then train the discriminator only with fake data
        noise = Variable(torch.FloatTensor(real_batch_size, z_size).normal_()).to(dev)
        fake_features = generator(noise, training=True)
        fake_features = fake_features.detach().to(dev)  # do not propagate to the generator
        fake_pred = discriminator(fake_features)
        fake_loss = fake_pred.mean(0).view(1)
        fake_loss.backward()

        # this is the magic from WGAN-GP
        gradient_penalty = calculate_gradient_penalty(discriminator.to(dev), penalty, real_batch.to(dev), fake_features.to(dev))
        gradient_penalty.backward()

        # finally update the discriminator weights
        optim_disc.step()

        disc_loss = real_loss + fake_loss + gradient_penalty
        disc_losses += [disc_loss]
        # Delete to prevent memory leakage
        del real_pred
        del real_loss
        del noise
        del fake_features
        del fake_pred
        del fake_loss
        del gradient_penalty
        del disc_loss


        noise = Variable(torch.FloatTensor(real_batch_size, z_size).normal_()).to(dev)
        gen_features = generator(noise)
        gen_features = gen_features.to(dev)
        gen_pred = discriminator(gen_features)

        gen_loss = - gen_pred.mean(0).view(1)
        gen_loss.backward()

        optim_gen.step()

        gen_losses += [gen_loss]
        del noise
        del gen_features
        del gen_pred
        del gen_loss

    loop.set_description(f'epoch:{epoch}, disc_loss:{disc_losses}, gen_loss:{gen_losses}')
    loop.update(1)

    # analyze poisson regression parameters every 20 epochs
    if False & (epoch % 20 == 0) & (epoch > 10):
        with torch.no_grad():
            generated_data = generator(Variable(torch.FloatTensor(pol_dat.shape[0], z_size).normal_()).to(dev), training=False)
        df1 = pd.DataFrame(generated_data.data.to('cpu').numpy().round(2))
        df1.columns = list(pol_dat)
        
        try:
            dev_gen = glm_metrics(df1, val)
            print(f'Generated Deviance: {dev_gen}')
        except ValueError:
            print('No value found yet')
        
        #print(f'Real poisson prediction: {dev_real}')
        #print(f'Generative poisson prediction: {dev_gen}')
        #, real_poiss: {dev_real}, gen_poiss: {dev_gen}')

        if (epoch > 3):
            plt.plot(dev_gen, label='train')
            plt.ylabel('poission Dif')
            plt.show()
            plt.clf()

        del df1

        #torch.save(generator.state_dict(), f='./saved_parameters/gen_test')
        # print(pois_df)


In [None]:
def trainn(generator,
          discriminator,
          train_data,
          val_data,
          output_gen_path,
          output_disc_path,
          output_loss_path,
          batch_size=1000,
          start_epoch=0,
          num_epochs=1000,
          num_disc_steps=2,
          num_gen_steps=1,
          noise_size=128,
          l2_regularization=0.001,
          learning_rate=0.001,
          ):
    generator, discriminator = to_cuda_if_available(generator, discriminator)

    optim_gen = Adam(generator.parameters(), weight_decay=l2_regularization, lr=learning_rate)
    optim_disc = Adam(discriminator.parameters(), weight_decay=l2_regularization, lr=learning_rate)

    logger = Logger(output_loss_path, append=start_epoch > 0)

    for epoch_index in range(start_epoch, num_epochs):
        logger.start_timer()

        # train
        generator.train(mode=True)
        discriminator.train(mode=True)

        disc_losses = []
        gen_losses = []

        more_batches = True
        train_data_iterator = train_data.batch_iterator(batch_size)

        while more_batches:
            # train discriminator
            for _ in range(num_disc_steps):
                # next batch
                try:
                    batch = next(train_data_iterator)
                except StopIteration:
                    more_batches = False
                    break

                optim_disc.zero_grad()

                # first train the discriminator only with real data
                real_features = Variable(torch.from_numpy(batch))
                real_features = to_cuda_if_available(real_features)
                real_pred = discriminator(real_features)
                real_loss = - real_pred.mean(0).view(1)
                real_loss.backward()

                # then train the discriminator only with fake data
                noise = Variable(torch.FloatTensor(len(batch), noise_size).normal_())
                noise = to_cuda_if_available(noise)
                fake_features = generator(noise, training=True)
                fake_features = fake_features.detach()  # do not propagate to the generator
                fake_pred = discriminator(fake_features)
                fake_loss = fake_pred.mean(0).view(1)
                fake_loss.backward()

                # this is the magic from WGAN-GP
                gradient_penalty = calculate_gradient_penalty(discriminator, penalty, real_features, fake_features)
                gradient_penalty.backward()

                # finally update the discriminator weights
                # using two separated batches is another trick to improve GAN training
                optim_disc.step()

                disc_loss = real_loss + fake_loss + gradient_penalty
                disc_loss = to_cpu_if_available(disc_loss)
                disc_losses.append(disc_loss.data.numpy())

                del disc_loss
                del gradient_penalty
                del fake_loss
                del real_loss

            # train generator
            for _ in range(num_gen_steps):
                optim_gen.zero_grad()

                noise = Variable(torch.FloatTensor(len(batch), noise_size).normal_())
                noise = to_cuda_if_available(noise)
                gen_features = generator(noise, training=True)
                fake_pred = discriminator(gen_features)
                fake_loss = - fake_pred.mean(0).view(1)
                fake_loss.backward()

                optim_gen.step()

                fake_loss = to_cpu_if_available(fake_loss)
                gen_losses.append(fake_loss.data.numpy())

                del fake_loss

        # log epoch metrics for current class
        logger.log(epoch_index, num_epochs, "discriminator", "train_mean_loss", np.mean(disc_losses))
        logger.log(epoch_index, num_epochs, "generator", "train_mean_loss", np.mean(gen_losses))

        # save models for the epoch
        with DelayedKeyboardInterrupt():
            torch.save(generator.state_dict(), output_gen_path)
            torch.save(discriminator.state_dict(), output_disc_path)
            logger.flush()

    logger.close()

In [None]:
train

In [None]:
trainn(generator,
          discriminator,
          train,
          val,
          'generator',
          'discriminator',
          'loss'
          )

In [None]:
df1 = pd.DataFrame(generated_data.data.to('cpu').numpy().round(2))

In [11]:
df1 = pd.DataFrame(generated_data.data.to('cpu').numpy().round(2))

In [17]:
df2 = pd.DataFrame(ss.inverse_transform(df1), columns = train.columns)
df2 = back_from_dummies(df2)

In [19]:
df2['ClaimNb'].mean()


KeyboardInterrupt



In [20]:
gen_losses

2901202.5

In [None]:
# Transform back
train = pd.DataFrame(ss.inverse_transform(df1), columns = train.columns)
train = back_from_dummies(train)
train['ClaimNb'] = train['ClaimNb'].astype(float).astype(int)
train['Density'] = train['Density'].clip(lower=1)
glm_train = prep.data_cleaning_frequency_schelldorfer(train)
