In [None]:
%matplotlib inline

import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
import re
import random
from tqdm import tqdm

from keras.layers import Input
from keras.layers.core import Dense, Dropout
from keras.layers.normalization import BatchNormalization
from keras.models import Sequential, Model
from keras.optimizers import Adam

INPUT_LEN = 256
OUTPUT_LEN = 48

In [None]:
def load_original_data():
    # データ読み込み
    df = pd.read_csv('/root/userspace/private/last_lesson/solar.csv',
                     index_col="time", parse_dates=['time'], date_parser=lambda x: pd.to_datetime(re.sub(r'[-+]\d{2}:\d{2}', '', x)),
                     usecols=['time','solar.current'], dtype=np.float32)

    # 欠損データを補完
    df = df.resample("30T").mean().fillna(0.0)

    # 正規化
    normalize = df['solar.current'].max()
    df['solar.current'] /= normalize
    
    # 日単位でデータを分割
    per_day = []
    for _, group in df.groupby(df.index.date):
        per_day.append(group.as_matrix().reshape(-1,))
    
    # 最初と最後のデータは不完全なので除外
    return per_day[1:-1]

In [None]:
# 要素数input_lenのノイズを入力として、要素数output_lenの１日分の発電量ダミーデータを生成
def Generator(input_len, output_len):
    model = Sequential()
    model.add(Dense(units=1024, activation='relu', kernel_initializer='he_normal', input_shape=[input_len]))
    model.add(BatchNormalization())
    model.add(Dense(units=256, activation='relu', kernel_initializer='he_normal'))
    model.add(BatchNormalization())
    model.add(Dense(units=128, activation='relu', kernel_initializer='he_normal'))
    model.add(BatchNormalization())
    model.add(Dense(units=output_len, activation='sigmoid'))
    # model.summary()
    
    return model

In [None]:
# shapeのデータが本物か偽物か判定
def Discriminator(shape, optimizer=Adam(lr=1e-5)):
    model = Sequential()
    model.add(Dense(units=1024, activation='relu', kernel_initializer='he_normal', input_shape=shape))
    model.add(Dropout(0.3))
    model.add(Dense(units=256, activation='relu', kernel_initializer='he_normal'))
    model.add(Dropout(0.3))
    model.add(Dense(units=32, activation='relu', kernel_initializer='he_normal'))
    model.add(Dropout(0.3))
    model.add(Dense(units=2, activation='softmax'))
    
    model.compile(loss='categorical_crossentropy',
                  optimizer=optimizer,
                  metrics=['accuracy'])
    # model.summary()
    
    return model

In [None]:
def combined_network(generator, discriminator, input_len, opt=Adam(lr=1e-5)):
    gan_input = Input(shape=[input_len])
    x = generator(gan_input)
    gan_output = discriminator(x)
    model = Model(gan_input, gan_output)
    model.compile(loss='categorical_crossentropy', optimizer=opt)
    # model.summary()
    
    return model

In [None]:
def make_trainable(model, flg):
    model.trainable = flg
    for l in model.layers:
        l.trainable = flg

In [None]:
def train(train_data, input_len, generator, discriminator, gan, step=10000, batch_size=128):
    for e in tqdm(range(step)):
        # 1. バッチの学習で利用するデータの選択 
        # バッチサイズの分だけランダムにデータを選択
        data_batch = random.sample(train_data, batch_size)
        
        # バッチサイズの分だけランダムにノイズを生成し、generatorによりデータを生成
        noise_gen = np.random.uniform(0,1,size=[batch_size, input_len])
        generated_data = generator.predict(noise_gen)
        
        # 2. Discriminatorの学習をonに切り替える
        # Discriminatorが学習するように変更
        make_trainable(discriminator,True)
        
        # 3. Generatorによる生成画像を用いてDiscriminatorの学習
        # X = (バッチサイズ分のデータセットのデータ, バッチサイズ分の生成データ)
        X = np.concatenate((data_batch, generated_data))
        
        # y = (バッチサイズ分のTrue(本物), バッチサイズ分のFalse(偽物))
        y = np.zeros([2*batch_size, 2])
        y[:batch_size,1] = 1
        y[batch_size:,0] = 1
        
        # Discriminatorのtrain
        discriminator.train_on_batch(X,y)
        
        # 4. Discriminatorの学習をoffに切り替える
        # Discriminatorが学習しないように変更
        make_trainable(discriminator,False)
    
        # 5. Generatorの学習
        # バッチサイズの分だけランダムにノイズを生成
        noise_gen = np.random.uniform(0,1,size=[batch_size, input_len])
        
        # y = (バッチサイズ分のTrue(本物))
        # 実際には生成した画像なのでDiscriminatorとしては偽物と判断すべきだが、Genaratorの学習なので生成したデータを本物と判断するように学習させる
        y2 = np.zeros([batch_size,2])
        y2[:,1] = 1
        
        # Generatorのtrain
        gan.train_on_batch(noise_gen, y2)

In [None]:
# 元データをプロットする
def plot_data(data, plot_x=3, plot_y=4):
    fig = plt.figure(figsize=(9, 15))
    fig.subplots_adjust(left=0, right=1, bottom=0, top=0.5, hspace=0.05, wspace=0.05)
    for y in range(plot_y):
        for x in range(plot_x):
            idx = x + y*plot_x
            ax = fig.add_subplot(plot_y, plot_x, idx + 1, xticks=[], yticks=[])
            ax.plot(data[idx])

In [None]:
# 生成したデータをプロットする
def plot_generated_data(generator, input_len, plot_x=3, plot_y=4):
    noise = np.random.uniform(0,1,size=[plot_x*plot_y, input_len])
    generated_data = generator.predict(noise)
    
    plot_data(generated_data, plot_x=3, plot_y=4)

In [None]:
# 生成したデータを重ね合わせてプロットする
def plot_generated_data2(generator, input_len, num):
    for idx in range(num):
        noise = np.random.uniform(0,1,size=[num, input_len])
        generated_data = generator.predict(noise)
    
        plt.plot(generated_data[idx])

In [None]:
# モデル作成、学習、結果表示までまとめて実行
def execute(step=10000, data_size=0, batch_size=128):
    generator = Generator(INPUT_LEN, OUTPUT_LEN)
    discriminator = Discriminator(original_data[0].shape)
    make_trainable(discriminator, False)
    gan = combined_network(generator, discriminator, INPUT_LEN)
    
    train_data = original_data
    if data_size > 0:
        train_data = original_data[0:data_size]

    train(train_data, INPUT_LEN, generator, discriminator, gan, step=step, batch_size=batch_size)
    
    return generator

In [None]:
original_data =  load_original_data()
plot_data(original_data, plot_x=3, plot_y=4)

In [None]:
model_1000_0_128 = execute(step=1000, data_size=0, batch_size=128)

In [None]:
plot_generated_data(model_1000_0_128, INPUT_LEN, plot_x=3, plot_y=4)

In [None]:
plot_generated_data2(model_1000_0_128, INPUT_LEN, 200)

In [None]:
model_3000_0_128 = execute(step=3000, data_size=0, batch_size=128)

In [None]:
plot_generated_data(model_3000_0_128, INPUT_LEN, plot_x=3, plot_y=4)

In [None]:
plot_generated_data2(model_3000_0_128, INPUT_LEN, 200)

In [None]:
model_10000_0_128 = execute(step=10000, data_size=0, batch_size=128)

In [None]:
plot_generated_data(model_10000_0_128, INPUT_LEN, plot_x=3, plot_y=4)

In [None]:
plot_generated_data2(model_10000_0_128, INPUT_LEN, 200)

In [None]:
model_100000_0_128 = execute(step=100000, data_size=0, batch_size=128)

In [None]:
plot_generated_data(model_100000_0_128, INPUT_LEN, plot_x=3, plot_y=4)

In [None]:
plot_generated_data2(model_100000_0_128, INPUT_LEN, 200)

In [None]:
model_300000_0_128 = execute(step=300000, data_size=0, batch_size=128)

In [None]:
plot_generated_data(model_300000_0_128, INPUT_LEN, plot_x=3, plot_y=4)

In [None]:
plot_generated_data2(model_300000_0_128, INPUT_LEN, 200)

In [None]:
model_100000_100_30 = execute(step=100000, data_size=100, batch_size=30)

In [None]:
plot_generated_data(model_100000_100_30, INPUT_LEN, plot_x=3, plot_y=4)

In [None]:
plot_generated_data2(model_100000_100_30, INPUT_LEN, 200)

In [None]:
model_300000_100_30 = execute(step=300000, data_size=100, batch_size=30)

In [None]:
plot_generated_data(model_300000_100_30, INPUT_LEN, plot_x=3, plot_y=4)

In [None]:
plot_generated_data2(model_300000_100_30, INPUT_LEN, 200)

In [None]:
model_100000_30_15 = execute(step=100000, data_size=30, batch_size=15)

In [None]:
plot_generated_data(model_100000_30_15, INPUT_LEN, plot_x=3, plot_y=4)

In [None]:
plot_generated_data2(model_100000_30_15, INPUT_LEN, 200)

In [None]:
model_300000_30_15 = execute(step=300000, data_size=30, batch_size=15)

In [None]:
plot_generated_data(model_300000_30_15, INPUT_LEN, plot_x=3, plot_y=4)

In [None]:
plot_generated_data2(model_300000_30_15, INPUT_LEN, 200)

In [None]:
model_500000_30_15 = execute(step=500000, data_size=30, batch_size=15)

In [None]:
plot_generated_data(model_500000_30_15, INPUT_LEN, plot_x=3, plot_y=4)

In [None]:
plot_generated_data2(model_500000_30_15, INPUT_LEN, 200)