# Modelo de la turbina

## Bibliotecas

In [3]:
#RL libraries
from gym import Env
from gym.spaces import Discrete, Box
from rl.agents import DQNAgent
from rl.policy import BoltzmannQPolicy
from rl.memory import SequentialMemory

#Neural network libraries
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from tensorflow.keras.optimizers import Adam

#Math libraries
import numpy as np
import random
import math

#Web server libraries
from werkzeug.wrappers import Request, Response
from werkzeug.serving import run_simple
from flask import Flask, render_template, request, redirect, url_for, flash
from wtforms import Form, FloatField, validators

#Visual libraries
import io
import matplotlib.pyplot as plt
from matplotlib.backends.backend_agg import FigureCanvasAgg as FigureCanvas
from matplotlib.figure import Figure
import base64

## Entorno

In [4]:
class WindmillEnv(Env):
    def __init__(self, pRef):
        
        #Set action space
        self.action_space = Discrete(7)
        
        #Set observation space
        self.observation_space = Box(low=np.array([5]), high=np.array([14]))
        
        #Set training time
        self.training_length = 150
        
        #SET WINDMILL PARAMETERS
        #Static parameters
        self.wind_density = 1.225
        self.radious = 2
        self.wind = 10.0
        self.powerRef = pRef
        
        #Dynamic parameters
        self.angle = random.uniform(5.0, 14.0)
        self.power_eficiency = (-0.0422)*self.angle + 0.5911
        self.genPowerEuler = 0.5*self.wind_density*math.pi*pow(self.radious, 2)*pow(self.wind, 3)*self.power_eficiency
        self.error = abs(self.powerRef - self.genPowerEuler)
    
    
    def step(self, action):
        
        #Check reference power
        powerRefCheck = env.powerRef
        
        #Save the error from the previous step in a variable
        last_error = self.error
        
        #Reduces training time in 1 second
        self.training_length -= 1
        
        #Apply action
        if action == 0:
            self.angle += 0
        elif action == 1:
            self.angle += 0.001
        elif action == 2:
            self.angle -= 0.001
        elif action == 3:
            self.angle += 0.1
        elif action == 4:
            self.angle -= 0.1
        elif action == 5:
            self.angle += 1
        elif action == 6:
            self.angle -= 1
        
        #Calculates linearized model
        for t in range(1, 151):
            self.power_eficiency = (-0.0422)*self.angle + 0.5911
            self.genPowerEuler += ((0.5*self.wind_density*math.pi*pow(self.radious, 2)*pow(self.wind, 3)
                                    *self.power_eficiency)/5 - self.genPowerEuler/5)*0.5
        
        #Calculates final error
        self.error = abs(powerRefCheck - self.genPowerEuler)
        
        if action == 1 or action == 2:
            action_value = 1
        elif action == 3 or action == 4:
            action_value = 2
        elif action == 5 or action == 6:
            action_value = 3
        else:
            action_value = 0
        
        #Calculates reward
        if self.error < last_error:
            reward = 1 - (self.error/10) + (action/100)
        if self.error > last_error:
            reward = -100 - (self.error/10) + (action/100)
        else:
            reward = -50 - (self.error/10) + (action/100)
            
        #Check if the training finished
        if self.training_length <= 0:
            done = True
        else:
            done = False
                
        #Info
        info = {}
        
        #Return step information
        return self.angle, reward, done, info
    
    
    #Reset parameters
    def reset(self):
        
        self.angle = random.uniform(5, 14)
        self.power_eficiency = (-0.0422)*self.angle + 0.5911
        self.genPowerEuler = 0.5*self.wind_density*math.pi*pow(self.radious, 2)*pow(self.wind, 3)*self.power_eficiency
        self.error = abs(self.powerRef - self.genPowerEuler)
        
        self.training_length = 150
        
        return self.angle
    
    
    #GETTERS AND SETTERS
    #Power Reference
    @property
    def powerRefMethod(self):
        return self.powerRef
    
    @powerRefMethod.setter
    def powerRefMethod(self, powerRefv):
        self.powerRef = powerRefv
    
    
    #Generated Power
    @property
    def genPowerEulerMethod(self):
        return self.genPowerEuler
    
    @genPowerEulerMethod.setter
    def genPowerEulerMethod(self, genPowerEulerv):
        self.genPowerEuler = genPowerEulerv
    
    
    #Angle
    @property
    def angleMethod(self):
        return self.angle
    
    @angleMethod.setter
    def angleMethod(self, anglev):
        self.angle = anglev
    
    
    #Training length
    @property
    def training_lengthMethod(self):
        return self.training_length
    
    @training_lengthMethod.setter
    def training_lengthMethod(self, training_lengthv):
        self.training_length = training_lengthv
        
    

In [5]:
#Instanciamos el entorno con una potencia de referencia cualquiera
env = WindmillEnv(1000)
states = env.observation_space.shape
actions = env.action_space.n



## Red neuronal

In [6]:
def build_model(states, actions):
    model = Sequential()
    model.add(Dense(64, activation='relu', input_shape = states))
    model.add(Dense(32, activation='relu'))
    model.add(Dense(32, activation='relu'))
    model.add(Dense(actions, activation='linear'))
    return model

In [7]:
model = build_model(states, actions)

## Agente DQN

In [8]:
def build_agent(model, actions):
    memory = SequentialMemory(limit=40000, window_length=1)
    dqn = DQNAgent(model=model, memory=memory, policy=BoltzmannQPolicy(), nb_actions=actions, nb_steps_warmup=1000)
    return dqn

In [9]:
lastExecution_powerR = 0

## Ejecución de la interfaz

In [10]:
app = Flask(__name__)

class InputForm(Form):
    #Variable para recoger el valor de la potencia introducida
    r = FloatField(validators=[validators.InputRequired()])

@app.route("/", methods=["POST", "GET"])
def mainFunction():
    
    form = InputForm(request.form)
    
    if request.method == "POST":
        #Comprobación de seguridad
        if (isinstance(form.r.data, (int, float)) is not True) or (form.r.data > 2950.0) or (form.r.data < 100.0):
            flash('El valor introducido debe ser un número entre 100 y 2900.')
            return render_template("RL.html", form=form)
        
        global lastExecution_powerR
        global dqn
        
        powerR = form.r.data
        env.powerRefMethod = powerR
        
        #Comprobación para no entrenar dos veces seguidas el modelo para la misma potencia
        if (request.form['submit_button'] == 'Entrenado') and (powerR != lastExecution_powerR):
            #Entrenamiento del agente
            dqn = build_agent(model, actions)
            dqn.compile(Adam(learning_rate=1e-3), metrics=['mae'])
            dqn.fit(env, nb_steps=30000, visualize=False, verbose=1)   
            lastExecution_powerR = powerR
        
        #Reseteamos las variables del entorno
        obs = env.reset()
        done = False
        score = 0
        
        powerArray = []
        anglesArray = []
        initPower = env.genPowerEulerMethod
        #Se ha aumentado el tiempo de entrenamiento para dar mas margen al modelo
        env.training_lengthMethod = 2000
        powerArray.append(env.genPowerEulerMethod)
        anglesArray.append(env.angleMethod)
    
        while not done:
            #Dependiendo del botón pulsado, las acciones son aleatorias o las toma el modelo entrenado
            if request.form['submit_button'] == 'Sin entrenar':
                action = env.action_space.sample()
            else:
                action = dqn.forward(obs)
                    
            obs, reward, done, info = env.step(action)
            score += reward
        
            powerArray.append(env.genPowerEulerMethod)
            anglesArray.append(env.angleMethod)
            
            #Condición de parada para un error menor a 2Kw
            if abs(env.powerRefMethod - env.genPowerEulerMethod) < 2.0:
                break
        
        #Gráficas con los resultados de la ejecución
        figure = Figure()
        figure.set_size_inches(18.5, 10.5)

        plt1 = figure.add_subplot(1,2,1)
        plt1.set_title("Potencia generada")
        plt1.axhline(y=powerR, color='r', linestyle='-')
        plt1.set_xlabel("Pasos")
        plt1.set_ylabel("Potencia")
        plt1.plot(powerArray, 'b')
        
        plt2 = figure.add_subplot(1,2,2)
        plt2.set_title("Ángulo del aspa")
        plt2.set_xlabel("Pasos")
        plt2.set_ylabel("Ángulo")
        plt2.plot(anglesArray, 'g')
        
        output = io.BytesIO()
        figure.savefig(output, format='png')
        plotData = base64.b64encode(output.getbuffer()).decode("ascii")
        
        return render_template("RL.html", form=form, plotImg=plotData, finalPower=env.genPowerEulerMethod, initPower=initPower, finalAngle=env.angleMethod)
    else:
        
        return render_template("RL.html", form=form)

if __name__ == '__main__':
    app.secret_key="anystringhere"
    run_simple('localhost', 8000, app)
    
lastExecution_powerR = 0

 * Running on http://localhost:8000/ (Press CTRL+C to quit)
127.0.0.1 - - [18/Sep/2022 14:46:27] "GET / HTTP/1.1" 200 -


Training for 30000 steps ...
Interval 1 (0 steps performed)
  116/10000 [..............................] - ETA: 9s - reward: -280.9400 

  updates=self.state_updates,


66 episodes - episode_reward: -28629.613 [-109003.324, -8177.613] - loss: 3039.166 - mae: 266.836 - mean_q: -253.474

Interval 2 (10000 steps performed)
67 episodes - episode_reward: -10691.463 [-15326.496, -7851.900] - loss: 369.180 - mae: 269.645 - mean_q: -248.746

Interval 3 (20000 steps performed)
done, took 201.670 seconds


127.0.0.1 - - [18/Sep/2022 14:49:52] "POST / HTTP/1.1" 200 -
127.0.0.1 - - [18/Sep/2022 14:49:59] "POST / HTTP/1.1" 200 -
127.0.0.1 - - [18/Sep/2022 14:50:04] "POST / HTTP/1.1" 200 -
127.0.0.1 - - [18/Sep/2022 14:50:07] "POST / HTTP/1.1" 200 -
