In [1]:
import os
import numpy as np
import tensorflow as tf
from tensorflow.keras.layers import Input, Dense, Reshape, Flatten, Concatenate, Conv1D, Conv1DTranspose, LeakyReLU, BatchNormalization, Embedding, Multiply
from keras.layers import Dropout
from keras.utils import to_categorical
import keras
from tensorflow.keras.models import Model
from tensorflow.keras.optimizers import Adam
from sklearn.preprocessing import LabelEncoder
from sklearn.preprocessing import LabelBinarizer
from keras.utils import to_categorical
import matplotlib.pyplot as plt
import seaborn as sns
import pandas as pd
from sklearn.preprocessing import StandardScaler
from sklearn.preprocessing import MinMaxScaler

### Import Data

In [2]:
# CSV path
folder_name = "data"
file_name = "hungary_chickenpox.csv"
path = os.path.join(folder_name, file_name)
#Load CSV into Dataframe
df = pd.read_csv(path,sep=',')
df = df.drop(columns = ['Date'])
df.shape

(522, 20)

In [3]:

col_names = list(df.columns)
# perform one-hot encoding using LabelBinarizer
lb = LabelBinarizer()
targets = lb.fit_transform(col_names)
targets.shape

(20, 20)

In [4]:
scaler = MinMaxScaler(feature_range=(-1, 1))
# fit the scaler to data
scaler.fit(df)

# transform the data to MinMax scale
df = scaler.transform(df)

# create a new DataFrame with the scaled data
df = pd.DataFrame(df)

In [5]:
# Transpose the data to get into proper shape
df = df.T
df

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,512,513,514,515,516,517,518,519,520,521
0,-0.298539,-0.344468,-0.599165,-0.319415,-0.490605,-0.273486,-0.361169,-0.519833,-0.503132,-0.524008,...,-0.88309,-0.816284,-0.862213,-0.645094,-0.933194,-0.60334,-0.820459,-0.853862,-0.874739,0.08142
1,-0.185567,-0.381443,-0.546392,-0.494845,-0.195876,-0.216495,0.061856,-0.237113,-0.113402,-0.164948,...,-0.927835,-0.958763,-0.948454,-0.845361,-0.845361,-0.876289,-0.597938,-0.927835,-0.762887,-0.56701
2,-0.781022,-0.781022,-0.773723,-0.686131,-0.613139,-0.437956,-0.605839,-0.532847,-0.583942,-0.058394,...,-0.992701,-0.948905,-0.927007,-0.810219,-0.89781,-0.70073,-0.773723,-0.890511,-0.941606,-0.642336
3,0.276753,-0.321033,-0.365314,-0.070111,-0.357934,0.121771,0.416974,0.284133,0.261993,0.601476,...,-0.98524,-1.0,-1.0,-0.948339,-0.98524,-0.95572,-0.926199,-1.0,-1.0,-0.763838
4,-0.047887,0.126761,-0.476056,-0.740845,-0.419718,0.064789,-0.166197,-0.211268,-0.492958,-0.059155,...,-0.926761,-0.909859,-0.83662,-0.921127,-0.983099,-0.780282,-0.808451,-1.0,-0.938028,-0.785915
5,-0.577889,-0.467337,-0.698492,-0.60804,-0.658291,-0.738693,-0.346734,-0.437186,-0.346734,-0.356784,...,-0.979899,-1.0,-0.969849,-0.969849,-1.0,-1.0,-0.969849,-1.0,-0.959799,-0.849246
6,0.658537,-0.378049,0.134146,-0.365854,0.158537,-0.097561,0.219512,0.353659,0.439024,0.134146,...,-0.841463,-0.780488,-0.756098,-0.658537,-0.890244,-0.804878,-0.97561,-0.914634,-0.987805,-0.865854
7,0.325967,-0.226519,-0.071823,0.259669,0.447514,1.0,0.303867,0.933702,0.160221,0.701657,...,-0.878453,-0.878453,-0.922652,-0.856354,-0.745856,-0.834254,-0.668508,-0.922652,-0.900552,0.082873
8,0.236641,-0.358779,0.458015,-0.183206,0.312977,0.198473,-0.015267,0.053435,0.480916,-0.091603,...,-0.977099,-0.916031,-0.977099,-0.931298,-0.954198,-0.89313,-0.80916,-0.969466,-0.923664,-0.534351
9,-0.657143,-0.733333,-0.514286,-0.6,-0.619048,-0.580952,-0.619048,-0.428571,-0.428571,-0.67619,...,-0.990476,-0.990476,-0.990476,-0.971429,-1.0,-0.904762,-0.819048,-0.980952,-0.838095,-0.638095


In [6]:
# convert the DataFrame to a NumPy array
data = df.values

# split the array into input features and targets
x_train, y_train = data, targets

# display the resulting input features and targets
print(x_train)
print(y_train)
x_train.shape[1], y_train.shape

[[-0.29853862 -0.34446764 -0.59916493 ... -0.85386221 -0.87473904
   0.08141962]
 [-0.18556701 -0.3814433  -0.54639175 ... -0.92783505 -0.7628866
  -0.56701031]
 [-0.7810219  -0.7810219  -0.77372263 ... -0.89051095 -0.94160584
  -0.64233577]
 ...
 [-0.58865248 -0.24822695 -0.74468085 ... -1.         -0.9858156
  -0.84397163]
 [-0.24347826 -0.40869565 -0.46086957 ... -0.85217391 -0.27826087
  -0.10434783]
 [-0.37037037 -0.75925926 -0.59259259 ... -0.90740741 -0.98148148
  -0.76851852]]
[[0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]
 [0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]
 [1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]
 [0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]
 [0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]
 [0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0]
 [0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0]
 [0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0]
 [0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0]
 [0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0]
 [0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0]
 [0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0

(522, (20, 20))

### Hyperparameters

In [7]:
# latent_dim is
# num_classes is the number of features in the dataset
# time_series_length is the total length of the training time series
# epochs is the number of epoch iterations the training loop goes through
# batch size is the batch of data that is fed into the training loop
latent_dim = 100
num_classes = len(y_train[0])
time_series_length = len(x_train[1])

epochs = 3000
batch_size = 32

In [8]:


def build_generator(latent_dim, num_classes, time_series_length):
    input_noise = Input(shape=(latent_dim,))
    input_label = Input(shape=(num_classes,))
    label_embedding = Dense(latent_dim)(input_label)
    model_input = Multiply()([input_noise, label_embedding])
    
    x = Dense(128)(model_input)
    x = LeakyReLU(alpha=0.2)(x)
    x = BatchNormalization(momentum=0.8)(x)
    x = Dense(256)(x)
    x = LeakyReLU(alpha=0.2)(x)
    x = BatchNormalization(momentum=0.8)(x)
    x = Dense(time_series_length, activation="tanh")(x)
    model = Model([input_noise, input_label], x)
    
    return model

generator = build_generator(latent_dim, num_classes, time_series_length)

def build_discriminator(num_classes, time_series_length):
    input_data = Input(shape=(time_series_length,))
    input_label = Input(shape=(num_classes,))
    label_embedding = Dense(time_series_length)(input_label)
    label_embedding = Reshape((time_series_length, 1))(label_embedding)
    model_input = Reshape((time_series_length, 1))(input_data)
    model_input = Concatenate(axis=-1)([model_input, label_embedding])

    x = Conv1D(64, kernel_size=3, strides=2, padding="same")(model_input)
    x = LeakyReLU(alpha=0.2)(x)

    x = Conv1D(64, kernel_size=3, strides=2, padding="same")(x)
    x = LeakyReLU(alpha=0.2)(x)

    x = Flatten()(x)
    x = Dropout(0.4)(x)
    x = Dense(1, activation="sigmoid")(x)

    model = Model([input_data, input_label], x, name="discriminator")
    return model

discriminator = build_discriminator(num_classes, time_series_length)

def build_cgan(generator, discriminator):
    z = Input(shape=(latent_dim,))
    label = Input(shape=(num_classes,))
    time_series = generator([z, label])
    validity = discriminator([time_series, label])
    cgan = Model([z, label], validity)
    
    return cgan

cgan = build_cgan(generator, discriminator)

# Build the generator, discriminator, and CGAN models
generator = build_generator(latent_dim, num_classes, time_series_length)
discriminator = build_discriminator(num_classes, time_series_length)
cgan = build_cgan(generator, discriminator)

# Compile the discriminator and CGAN models
discriminator.compile(loss='binary_crossentropy', optimizer=Adam(0.0002, 0.5), metrics=['accuracy'])
cgan.compile(loss='binary_crossentropy', optimizer=Adam(0.0002, 0.5))


In [None]:
for epoch in range(epochs):
    
    idx = np.random.randint(0, x_train.shape[0], batch_size)

    real_time_series = x_train[idx]

    labels = y_train[idx]

    noise = np.random.normal(0, 1, (batch_size, latent_dim))

    gen_time_series = generator.predict([noise, labels],verbose=0)


    # Train the discriminator
    d_loss_real = discriminator.train_on_batch([real_time_series, labels], np.ones((batch_size, 1)))
    d_loss_fake = discriminator.train_on_batch([gen_time_series, labels], np.zeros((batch_size, 1)))
    d_loss = 0.5 * np.add(d_loss_real, d_loss_fake)

    #  Train Generator

    # Generate a batch of noise and labels
    noise = np.random.normal(0, 1, (batch_size, latent_dim))

    sampled_labels = np.random.randint(0, num_classes, batch_size)
    one_hot_labels = to_categorical(sampled_labels, num_classes=num_classes)
    g_loss = cgan.train_on_batch([noise, one_hot_labels], np.ones((batch_size, 1)))
    
    if epoch % 100 == 0:
        print(f"Epoch: {epoch}, D loss: {d_loss[0]}, G loss: {g_loss}")
        
generator.save_weights("generator_weights.h5")

Epoch: 0, D loss: 0.6829052567481995, G loss: 0.6869748830795288
Epoch: 100, D loss: 0.28229880472645164, G loss: 0.9666293859481812
Epoch: 200, D loss: 0.23564057052135468, G loss: 1.1572149991989136
Epoch: 300, D loss: 0.3311660811305046, G loss: 1.1899998188018799
Epoch: 400, D loss: 0.626584380865097, G loss: 0.8662759065628052
Epoch: 500, D loss: 0.8553066253662109, G loss: 0.6243679523468018
Epoch: 600, D loss: 0.8790485113859177, G loss: 0.5794399380683899
Epoch: 700, D loss: 0.951704129576683, G loss: 0.5209857225418091
Epoch: 800, D loss: 0.958622932434082, G loss: 0.5168349146842957
Epoch: 900, D loss: 0.9701990634202957, G loss: 0.46041327714920044
Epoch: 1000, D loss: 1.0112773030996323, G loss: 0.5709368586540222
Epoch: 1100, D loss: 0.9958287328481674, G loss: 0.6502508521080017
Epoch: 1200, D loss: 0.979327104985714, G loss: 0.4353025555610657
Epoch: 1300, D loss: 0.8992889076471329, G loss: 0.6268472671508789
Epoch: 1400, D loss: 0.9226656034588814, G loss: 0.6237623691

In [None]:

def generate_and_plot_data(num_instances, desired_class):
    # create a figure and axis object
    fig, ax = plt.subplots()
    
    # plot each instance on the same axis object
    for i in range(num_instances):
        latent_points = np.random.rand(1, latent_dim)
        label = np.zeros((1, num_classes))
        label[0, desired_class] = 1  # set the desired class to 1
        generated_data = generator.predict([latent_points, label],verbose=0)
        x = np.arange(0, generated_data.shape[1])
        y = generated_data[0]
        ax.plot(x, y, label=f"Instance {i+1}")
    ax.set_xlabel('Time')
    ax.set_ylabel('Scaled Cases')   
    ax.set_title(f"Generated Time Series Data for Class {desired_class}")
    ax.legend()
    plt.show()

In [None]:
generate_and_plot_data(5,4)

In [None]:
new_generator = build_generator(latent_dim, num_classes, time_series_length)
new_generator.load_weights("generator_weights.h5")


In [None]:
fig, ax = plt.subplots()
    
    # plot each instance on the same axis object
latent_points = np.random.rand(1, latent_dim)
label = np.zeros((1, num_classes))
label[0, 5] = 1  # set the desired class to 1
generated_data = new_generator.predict([latent_points, label],verbose=0)
#         generated_data = scaler.inverse_transform(generated_data)
x = df.T.index
y = df.T[0]
ax.plot(x, y, label='Original Data')
x = np.arange(0, generated_data.shape[1])
y = generated_data[0]
ax.plot(x, y, label=f"Generated Data")
ax.set_xlabel('Time')
ax.set_ylabel('Scaled Cases')
ax.set_title(f"Generated Time Series Data for Class ")
ax.legend()
plt.show()