In [10]:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import torch
import math
import tqdm as tqdm

In [11]:
# def H_star(absorptivity=0.3, power=400.0, rho=2500.0, Cp=915.0, T_solid=853.0, T_0=300.0, hf=40000.0, Diffusivity=0.000097, scan_speed=0.5, beam_diameter=0.0001):

#     H = (4 * absorptivity * power)/(np.pi*rho*(Cp*(T_solid - T_0) + hf) * np.sqrt(Diffusivity * scan_speed * beam_diameter**3))

# loss = (-(H_star(power, scan_speed)-115)**2 - 1000*((scan_speed - 3)**2))

In [12]:
#  State space 

In [13]:
# write a deep q learning agent that takes in the state space and outputs the action space
# the state space is power and scan speed
# action space is lower power, increase power, lower scan speed, increase scan speed
# the loss function = -(H_star(power, scan_speed)-115)**2 - 1000*((scan_speed - 3)**2)

# the agent should be able to learn the optimal power and scan speed to achieve the desired H_star

class DQNAgent:
    def __init__(self, state_size, action_size, gamma=0.95, epsilon=1.0, epsilon_min=0.01, epsilon_decay=0.995, learning_rate=0.001, batch_size=32):
        self.state_size = state_size
        self.action_size = action_size
        self.memory = []
        self.gamma = gamma
        self.epsilon = epsilon
        self.epsilon_min = epsilon_min
        self.epsilon_decay = epsilon_decay
        self.learning_rate = learning_rate
        self.batch_size = batch_size
        self.model = self._build_model()

    def _build_model(self):
        model = torch.nn.Sequential(
            torch.nn.Linear(self.state_size, 64),
            torch.nn.ReLU(),
            torch.nn.Linear(64, 64),
            torch.nn.ReLU(),
            torch.nn.Linear(64, self.action_size)
        )
        model.compile(optimizer='adam', loss='mse')
        return model

    def remember(self, state, action, reward, next_state, done):
        self.memory.append((state, action, reward, next_state, done))

    def act(self, state):
        if np.random.rand() <= self.epsilon:
            return np.random.choice(self.action_size)
        act_values = self.model.predict(state)
        return np.argmax(act_values[0])

    def replay(self):
        if len(self.memory) < self.batch_size:
            return
        minibatch = np.random.choice(self.memory, self.batch_size)
        for state, action, reward, next_state, done in minibatch:
            target = reward
            if not done:
                target = reward + self.gamma * np.amax(self.model.predict(next_state)[0])
            target_f = self.model.predict(state)
            target_f[0][action] = target
            self.model.fit(state, target_f, epochs=1, verbose=0)
        if self.epsilon > self.epsilon_min:
            self.epsilon *= self.epsilon_decay

    def load(self, name):
        self.model.load_weights(name)

    def save(self, name):
        self.model.save_weights(name)

    def H_star(absorptivity=0.3, power=400.0, rho=2500.0, Cp=915.0, T_solid=853.0, T_0=300.0, hf=40000.0, Diffusivity=0.000097, scan_speed=0.5, beam_diameter=0.0001):
        H = (4 * absorptivity * power)/(np.pi*rho*(Cp*(T_solid - T_0) + hf) * np.sqrt(Diffusivity * scan_speed * beam_diameter**3))
        return H
    
    def loss(self, power, scan_speed):
        return (-(self.H_star(power, scan_speed)-115)**2 - 1000*((scan_speed - 3)**2))
    
    def train(self, episodes=1000):
        for e in range(episodes):
            state = np.random.rand(2)
            for time in range(1000):
                action = self.act(state)
                next_state = np.random.rand(2)
                reward = self.loss(next_state[0], next_state[1])
                done = False
                self.remember(state, action, reward, next_state, done)
                state = next_state
                self.replay()
            print(f"episode: {e}/{episodes}, loss: {reward}")
        self.save("model.h5")

    def test(self):
        self.load("model.h5")
        state = np.random.rand(2)
        for time in range(1000):
            action = self.act(state)
            next_state = np.random.rand(2)
            reward = self.loss(next_state[0], next_state[1])
            done = False
            self.remember(state, action, reward, next_state, done)
            state = next_state
            self.replay()
            print(f"loss: {reward}")

    def plot_rewards(self):
        rewards = []
        for state, action, reward, next_state, done in self.memory:
            rewards.append(reward)
        plt.plot(rewards)
        plt.show()


In [14]:
power = np.linspace(50, 400, 100)
scan_speed = np.linspace(0.2, 3, 100)

agent = DQNAgent(2, 4)
agent.train()
agent.test()
agent.plot_rewards()



TypeError: compile() got an unexpected keyword argument 'optimizer'