In [23]:
import numpy as np
import os
import gym
from gym import error, spaces
from gym import utils
from gym.utils import seeding
from sklearn.metrics import classification_report, confusion_matrix


In [None]:
!pip install -q git+https://github.com/wau/keras-rl2.git

In [24]:
class ClassifyEnv(gym.Env):

    def __init__(self, mode, trainx, trainy):  # mode means training or testing
        self.mode = mode

        self.Env_data = trainx
        self.Answer = trainy
        self.id = np.arange(trainx.shape[0])

        self.game_len = self.Env_data.shape[0]

        self.num_classes = len(set(self.Answer))
        self.action_space = spaces.Discrete(self.num_classes)
        print(self.action_space)
        self.step_ind = 0
        self.y_pred = []

    def seed(self, seed=None):
        self.np_random, seed = seeding.np_random(seed)
        return [seed]

    def step(self, a):
        self.y_pred.append(a)
        y_true_cur = []
        info = {}
        terminal = False
        if a == self.Answer[self.id[self.step_ind]]:
            reward = 1.
        else:
            reward = -1.
            if self.mode == 'train':
                terminal = True
        self.step_ind += 1

        if self.step_ind == self.game_len - 1:
            y_true_cur = self.Answer[self.id]
            info['gmean'], info['fmeasure'] = self.My_metrics(np.array(self.y_pred),
                                                              np.array(y_true_cur[:self.step_ind]))
            terminal = True

        return self.Env_data[self.id[self.step_ind]], reward, terminal, info

    def My_metrics(self, y_pre, y_true):
        confusion_mat = confusion_matrix(y_true, y_pre)
        print('\n')
        print(classification_report(y_true, y_pre))
        conM = np.array(confusion_mat, dtype='float')
        TP = conM[1][1]
        TN = conM[0][0]
        FN = conM[1][0]
        FP = conM[0][1]
        TPrate = TP / (TP + FN)
        TNrate = TN / (TN + FP)
        FPrate = FP / (TN + FP)
        FNrate = FN / (TP + FN)
        PPvalue = TP / (TP + FP)
        NPvalue = TN / (TN + FN)  

        G_mean = np.sqrt(TPrate * TNrate)

        Recall = TPrate = TP / (TP + FN)
        Precision = PPvalue = TP / (TP + FP)
        F_measure = 2 * Recall * Precision / (Recall + Precision)
        print(confusion_mat)
        res = 'G-mean:{}, F_measure:{}\n'.format(G_mean, F_measure)
        print(res)
        print()
        return G_mean, F_measure

    # return: (states, observations)
    def reset(self):
        if self.mode == 'train':
            np.random.shuffle(self.id)
        self.step_ind = 0
        self.y_pred = []
        return self.Env_data[self.id[self.step_ind]]

In [25]:
import tensorflow.keras
import tensorflow as tf
import numpy as np
from tensorflow.keras.models import Model, Sequential
# from keras.models import Model, Sequential
from keras.layers import Input, Dense, Conv2D, MaxPooling2D, Flatten, Activation, Embedding
from tensorflow.keras.optimizers import Adam, SGD
from keras.layers import LSTM

In [26]:
def get_image_model(in_shape, output):
    model = Sequential()
    model.add(Conv2D(32, (5, 5), padding='Same', input_shape=in_shape))
    model.add(Activation('relu'))
    model.add(MaxPooling2D(pool_size=(2, 2)))
    model.add(Conv2D(32, (5, 5), padding='Same'))
    model.add(Activation('relu'))
    model.add(MaxPooling2D(pool_size=(2, 2)))
    model.add(Flatten())
    model.add(Dense(256))
    model.add(Activation('relu'))
    model.add(Dense(output))
    return model

In [27]:
import argparse, os
import tensorflow as tf
from PIL import Image
import keras.backend as K
import numpy as np
from tensorflow.keras.optimizers import Adam
# from keras.backend.tensorflow_backend import set_session
from keras.backend import set_session
from rl.agents.dqn import DQNAgent
from rl.policy import LinearAnnealedPolicy, BoltzmannQPolicy, EpsGreedyQPolicy
from rl.memory import SequentialMemory
from rl.core import Processor

os.environ["CUDA_VISIBLE_DEVICES"] = "0"

In [28]:
# from keras.utils import to_categorical
from keras.datasets import cifar10
import random
from sklearn.metrics import confusion_matrix
import numpy as np
from keras.preprocessing.sequence import pad_sequences

from multiprocessing import Pool
from functools import partial

from keras.layers import *
from keras.models import *
from matplotlib import pyplot as plt
from sklearn.utils import shuffle
from itertools import combinations, product
from keras.datasets import cifar10

In [29]:
(x_train, y_train), (x_test, y_test) = cifar10.load_data()
x_train = x_train.reshape(-1, 32, 32, 3)
y_train = y_train.reshape(y_train.shape[0], )
x_test = x_test.reshape(-1, 32, 32, 3)
y_test = y_test.reshape(y_test.shape[0], )
x_train = x_train / 255.
x_test = x_test / 255.
print(x_train.shape, y_train.shape, x_test.shape, y_test.shape)

(50000, 32, 32, 3) (50000,) (10000, 32, 32, 3) (10000,)


In [30]:
in_shape = x_train.shape[1:]
num_classes = len(set(y_test))
mode = 'train'
env = ClassifyEnv(mode, x_train, y_train)
nb_actions = num_classes
training_steps = 120000
model = get_image_model(in_shape, num_classes)


INPUT_SHAPE = in_shape
print(num_classes)
print(model.summary())

Discrete(10)
10
Model: "sequential_2"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv2d_4 (Conv2D)            (None, 32, 32, 32)        2432      
_________________________________________________________________
activation_6 (Activation)    (None, 32, 32, 32)        0         
_________________________________________________________________
max_pooling2d_4 (MaxPooling2 (None, 16, 16, 32)        0         
_________________________________________________________________
conv2d_5 (Conv2D)            (None, 16, 16, 32)        25632     
_________________________________________________________________
activation_7 (Activation)    (None, 16, 16, 32)        0         
_________________________________________________________________
max_pooling2d_5 (MaxPooling2 (None, 8, 8, 32)          0         
_________________________________________________________________
flatten_2 (Flatten)          (None, 20

In [31]:
class ClassifyProcessor(Processor):
    def process_observation(self, observation):
        img = observation.reshape(INPUT_SHAPE)
        processed_observation = np.array(img)
        return processed_observation

    def process_state_batch(self, batch):
        batch = batch.reshape((-1,) + INPUT_SHAPE)
        processed_batch = batch.astype('float32') / 1.
        return processed_batch

    def process_reward(self, reward):
        return np.clip(reward, -1., 1.)

In [32]:
memory = SequentialMemory(limit=100000, window_length=1)
processor = ClassifyProcessor()
policy = LinearAnnealedPolicy(EpsGreedyQPolicy(), attr='eps', value_max=1., value_min=.1, value_test=.05,
                              nb_steps=100000)
dqn = DQNAgent(model=model, 
               nb_actions=10, 
               policy=policy, 
               memory=memory,
               processor=processor, nb_steps_warmup=50000, gamma=0.5, target_model_update=10000,
               train_interval=4, delta_clip=1.)
dqn.compile(Adam(learning_rate=.00025), metrics=['mae'])

dqn.fit(env, nb_steps=training_steps, log_interval=60000)


env.mode = 'test'
dqn.test(env, nb_episodes=1, visualize=False)
env = ClassifyEnv(mode, x_test, y_test)
env.mode = 'test'
dqn.test(env, nb_episodes=1, visualize=False)

Training for 120000 steps ...
Interval 1 (0 steps performed)
52369 episodes - episode_reward: -0.854 [-1.000, 5.000] - loss: 0.155 - mae: 0.801 - mean_q: -0.330 - mean_eps: 0.505

Interval 2 (60000 steps performed)
done, took 3338.095 seconds
Testing for 1 episodes ...


              precision    recall  f1-score   support

           0       0.76      0.76      0.76      5000
           1       0.80      0.82      0.81      5000
           2       0.65      0.61      0.63      5000
           3       0.56      0.57      0.56      5000
           4       0.69      0.66      0.67      5000
           5       0.69      0.59      0.64      5000
           6       0.79      0.74      0.77      5000
           7       0.83      0.76      0.79      5000
           8       0.79      0.84      0.82      4999
           9       0.66      0.84      0.74      5000

    accuracy                           0.72     49999
   macro avg       0.72      0.72      0.72     49999
weighted avg       0.72 

<keras.callbacks.History at 0x7fe0550ab340>

In [None]:
print(tf. __version__)