# Libraries

In [None]:
#RL libraries
from gym import Env
from gym.spaces import Discrete, Box
from rl.agents import DQNAgent
from rl.policy import BoltzmannQPolicy
from rl.memory import SequentialMemory

#Neural network libraries
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from tensorflow.keras.optimizers import Adam

#Math libraries
import numpy as np
import random
import math

#Web server libraries
from werkzeug.wrappers import Request, Response
from werkzeug.serving import run_simple
from flask import Flask, render_template, request, redirect, url_for, flash
from wtforms import Form, FloatField, validators

#Visual libraries
import io
import matplotlib.pyplot as plt
from matplotlib.backends.backend_agg import FigureCanvasAgg as FigureCanvas
from matplotlib.figure import Figure

# Enviroment

In [None]:
class WindmillEnv(Env):
    def __init__(self, pRef):
        
        #Set action space
        self.action_space = Discrete(8)
        
        #Set observation space
        self.observation_space = Box(low=np.array([5]), high=np.array([14]))
        
        #Set training time
        self.training_length = 90
        self.oposite_training_length = 90
        
        #SET WINDMILL PARAMETERS
        #Static parameters
        self.wind_density = 1.225
        self.radious = 2
        self.wind = 10.0
        self.powerRef = pRef
        
        #Dynamic parameters
        self.angle = random.uniform(5.0, 14.0)
        self.power_eficiency = -0.0422*self.angle + 0.5911
        self.genPowerEuler = 0.5*self.wind_density*math.pi*pow(self.radious, 2)*pow(self.wind, 3)*self.power_eficiency
        self.error = abs(self.powerRef - self.genPowerEuler)
        
    def step(self, action):
        #Save the error from the previous step in a variable
        last_error = self.error
        
        #Reduces training time in 1 second
        self.training_length -= 1
        
        #Apply action
            #0.0 - 0.1 = -0.1 (angle reduces in 0.1)
            #0.1 - 0.1 = 0.0 (angle does not change)
            #0.2 - 0.1 = 0.1 (angle increases in 0.1)
        #self.angle += (action/10.0) - 0.1
        if action == 0:
            self.angle += 0
        elif action == 1:
            self.angle += 1
        elif action == 2:
            self.angle -= 1
        elif action == 3:
            self.angle += 0.1
        elif action == 4:
            self.angle -= 0.1
        elif action == 5:
            self.angle += 0.01
        elif action == 6:
            self.angle -= 0.01
        
        #Euler for Calculating energy
        for t in range(1, 151):
            self.power_eficiency = -0.0422*self.angle + 0.5911
            self.genPowerEuler += ((0.5*self.wind_density*math.pi*pow(self.radious, 2)*pow(self.wind, 3)
                                    *self.power_eficiency)/5 - self.genPowerEuler/5)*0.5
        
        #Calculates final error
        self.error = abs(self.powerRefMethod - self.genPowerEuler)
        
        #Calculates reward
        if self.error < last_error:
            reward = 1 - (self.error/100)
        elif self.error == last_error:
            reward = -1 - (self.error/100)
        else:
            reward = -100 - (self.error/100)
        
        #Check if the training finished
        if self.training_length <= 0:
            done = True
        else:
            done = False
                
        #placeholder for the info
        info = {}
        
        #Return step information
        return self.angle, reward, done, info
    
    def reset(self):
        #Reset parameters
        self.angle = random.uniform(5, 14)
        self.wind = 10.0
        self.power_eficiency = -0.0422*self.angle + 0.5911
        self.genPowerEuler = 0.5*self.wind_density*math.pi*pow(self.radious, 2)*pow(self.wind, 3)*self.power_eficiency
        self.error = abs(self.powerRef - self.genPowerEuler)
        
        #Reset training time
        self.training_length = 90
        
        return self.angle
    
    #PowerRef
    @property
    def powerRefMethod(self):
        return self.powerRef
    
    @powerRefMethod.setter
    def powerRefMethod(self, powerRefv):
        self.powerRef = powerRefv
        
    #PowerGen
    @property
    def genPowerEulerMethod(self):
        return self.genPowerEuler
    
    @genPowerEulerMethod.setter
    def genPowerEulerMethod(self, genPowerEulerv):
        self.genPowerEuler = genPowerEulerv
    
    #Angle
    @property
    def angleMethod(self):
        return self.angle
    
    @angleMethod.setter
    def angleMethod(self, anglev):
        self.angle = anglev
    
    #Training length
    @property
    def training_lengthMethod(self):
        return self.training_length
    
    @training_lengthMethod.setter
    def training_lengthMethod(self, training_lengthv):
        self.training_length = training_lengthv
        
    

# DQN Agent

In [None]:
env = WindmillEnv(2000)
states = env.observation_space.shape
actions = env.action_space.n

In [None]:
def build_model(states, actions):
    model = Sequential()
    model.add(Dense(64, activation='relu', input_shape = states))
    model.add(Dense(28, activation='relu'))
    model.add(Dense(actions, activation='linear'))
    return model

In [None]:
model = build_model(states, actions)

In [None]:
def build_agent(model, actions):
    memory = SequentialMemory(limit=50000, window_length=1)
    dqn = DQNAgent(model=model, memory=memory, policy=BoltzmannQPolicy(), nb_actions=actions, nb_steps_warmup=1000)
    return dqn

In [None]:
lastExecution_powerR = 0
dqn = build_agent(model, actions)

# Traning

In [None]:
#dqn.compile(Adam(learning_rate=1e-3), metrics=['mae'])
#dqn.fit(env, nb_steps=8000, visualize=False, verbose=1)

In [None]:
app = Flask(__name__)

class InputForm(Form):
    r = FloatField(validators=[validators.InputRequired()])

@app.route("/", methods=["POST", "GET"])
def mainFunction():
    
    form = InputForm(request.form)
    
    if request.method == "POST":
        
        if isinstance(form.r.data, (int, float)) is not True:
            flash('El valor introducido debe ser un número.')
            return render_template("RL.html", form=form)
        
        powerArray = []
        anglesArray = []
        global lastExecution_powerR
        global dqn
        powerR = form.r.data
        episodes = 1
        env = WindmillEnv(powerR)

        if request.form['submit_button'] == 'trained':
            if powerR != lastExecution_powerR:
                dqn = build_agent(model, actions)
                dqn.compile(Adam(learning_rate=1e-3), metrics=['mae'])
                dqn.fit(env, nb_steps=10000, visualize=False, verbose=1)
                lastExecution_powerR = powerR
            
        for episode in range(1, episodes+1):
            obs = env.reset()
            done = False
            score = 0
            env.training_lengthMethod = 2000
            #env.training_lengthMethod(2000)
    
            powerArray.append(env.genPowerEulerMethod)
            anglesArray.append(env.angle)
    
            while not done:
                if request.form['submit_button'] == 'non-trained':
                    action = env.action_space.sample()
                else:
                    action = dqn.forward(obs)
                    
                obs, reward, done, info = env.step(action)
                score += reward
        
                powerArray.append(env.genPowerEulerMethod)
                anglesArray.append(env.angle)
        
                #if env.error <= 33.0:
                #    break
        
        figure = Figure()
        plt = figure.add_subplot(1,1,1)
        xs = range(90)
        plt.axhline(y=env.powerRefMethod, color='r', linestyle='-')
        plt.set_xlabel("steps")
        plt.set_ylabel("power")
        plt.plot(powerArray, 'b')
        output = io.BytesIO()
        FigureCanvas(figure).print_png(output)
        return Response(output.getvalue(), mimetype='image/png')

    else:
        
        return render_template("RL.html", form=form)

if __name__ == '__main__':
    app.secret_key="anystringhere"
    run_simple('localhost', 8000, app)
    
lastExecution_powerR = 0

In [None]:
episodes = 10
powerArray = []
anglesArray = []
powerRefArray = []
env = WindmillEnv(2000)

for episode in range(1, episodes+1):
    obs = env.reset()
    done = False
    score = 0
    refPower = env.powerRefMethod
    #env.training_lengthMethod = 2000
    initTrainingLenght = env.training_lengthMethod
    
    powerArray.append(env.genPowerEulerMethod)
    anglesArray.append(env.angleMethod)
    powerRefArray.append(env.powerRefMethod)
    
    while not done:
        action = dqn.forward(obs)
        obs, reward, done, info = env.step(action)
        score += reward
        
        if env.training_lengthMethod % 200 == 0:
            #env.genPowerEulerMethod += random.uniform(-300.0,300.0)
            #env.powerRefMethod = refPower = env.powerRef + random.uniform(-300.0,300.0)
            #env.angleMethod += random.uniform(-0.5,0.5)
            
        powerArray.append(env.genPowerEulerMethod)
        anglesArray.append(env.angleMethod)
        powerRefArray.append(env.powerRefMethod)
        
        #if env.error <= 16.0:
        #    break
        
    print('Episode:{} Score:{} Steps:{} Power:{}'.format(episode, score, initTrainingLenght - env.training_lengthMethod, env.genPowerEulerMethod))
    
plt.title("Trained model with changes in reference power")
plt.xlabel("steps")
plt.ylabel("power")
plt.plot(powerRefArray, 'r')
plt.plot(powerArray, 'b')
plt.show()

In [None]:
dqn = build_agent(model, actions)
dqn.compile(Adam(learning_rate=1e-3), metrics=['mae'])
dqn.fit(env, nb_steps=5000, visualize=False, verbose=1)

In [None]:
dqn.fit(env, nb_steps=30000, visualize=False, verbose=1)

In [None]:
episodes = 1

for episode in range(1, episodes+1):
    obs = env.reset()
    done = False
    score = 0
    powerArray = []
    anglesArray = []
    powerRefArray = []
    refPower = env.powerRef
    env.training_length = 2000
    env.oposite_training_length = 2000
    initTrainingLenght = env.training_length
    
    powerArray.append(env.genPowerEuler)
    anglesArray.append(env.angle)
    
    while not done:
        action = dqn.forward(obs)
        obs, reward, done, info = env.step(action)
        score += reward
        print(env.error)
        
        #if env.training_length % 20 == 0:
        #    env.wind += random.uniform(-0.5,0.5)
        
        powerArray.append(env.genPowerEuler)
        anglesArray.append(env.angle)
        
    print('Episode:{} Score:{} Steps:{} Power:{}'.format(episode, score, initTrainingLenght 
                                                         - env.training_length, env.genPowerEuler))
    
    plt.title("Power")#, plt.axhline(y=refPower, color='r', linestyle='-')
    plt.plot(powerArray, 'b')
    plt.plot(powerRefArray, 'r')
    plt.show()
    
    plt.title("Angle"), plt.plot(anglesArray)
    plt.show()