In [255]:
import pandas as pd
import numpy as np
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
import torch
import os
import seaborn as sns
import random
import torch.cuda
from matplotlib import pyplot as plt
import pickle
from collections import deque

df = pd.read_csv('diabetes_transformed.csv')

display(df.head())

inp = df.values
ratio = 0.8 # % af data der bruges til træning, resten validering
split_mark = int(np.ceil(len(inp)*ratio))

X= inp[:split_mark]
Y = inp[split_mark:]

df = pd.read_csv('pc_diabetes.csv')
pc1 = np.array(df['PC1'])
pc2 = np.array(df['PC2'])

class Linear_QNet(nn.Module):
    def __init__(self):
        super().__init__()
        self.device = 'cuda' if torch.cuda.is_available() else 'cpu'

        self.linear1 = nn.Linear(10, 256)
        self.linear2 = nn.Linear(256,512)
        self.linear3 = nn.Linear(512, 256)
        self.linear4 = nn.Linear(256,2)
        
    def forward(self, x):
        x = F.tanh(self.linear1(x))
        x = F.tanh(self.linear2(x))
        x = F.tanh(self.linear3(x))
        x = F.tanh(self.linear4(x))
        return x
    
    def save(self, file_name='model.pth', index=0):
        model_folder_path = './DQN_models/model' 
        if not os.path.exists(model_folder_path):
            os.makedirs(model_folder_path)

        complete_file_name = f"{index}_{file_name}"
        file_path = os.path.join(model_folder_path, complete_file_name)
        
        torch.save(self.state_dict(), file_path)
    
class QTrainer:
    def __init__(self, model, lr, gamma):
        self.lr = lr
        self.gamma = gamma
        self.model = model
        self.device = model.device
        self.optimizer = optim.Adam(model.parameters(), lr=self.lr)
        self.criterion = nn.MSELoss()

    def train_step(self, state, action, reward, done, outcome):
        state = torch.tensor(np.array(state), dtype=torch.float).to(self.device)
        action = torch.tensor(np.array(action), dtype=torch.long).to(self.device)
        reward = torch.tensor(np.array(reward), dtype=torch.float).to(self.device)
        # (n, x)

        if len(state.shape) == 1:
            # (1, x)
            state = torch.unsqueeze(state, 0)
            action = torch.unsqueeze(action, 0)
            reward = torch.unsqueeze(reward, 0)
            done = (done, )

        # 1: predicted Q values with current state
        pred = self.model(state)
        target = pred.clone()
        
        if outcome[0].item() == 1:
            target[0,0] = 1
            target[0,1] = 0
        else:
            target[0,0] = 0
            target[0,1] = 1
        # target = torch.tensor([1,0] if list(outcome) == 1 else [0,1])
        # target = target_model(state)

        ## If we have more than one state-action pair to train on, do so in a loop.
        ## If not, we will only process one pair

        # for idx in range(len(done)):
        #     Q_new = reward[idx]
        #     target[idx][torch.argmax(action[idx]).item()] = Q_new
    
        # 2: Q_new = r + y * max(next_predicted Q value) -> only do this if not done
        # pred.clone()
        # preds[argmax(action)] = Q_new
        self.optimizer.zero_grad()

        ## Calculate MSE loss on target and prediction
        loss = self.criterion(target, pred)
        loss.backward()

        self.optimizer.step()


# eps decay 0.9998, LR 0.001 VIRKER

## Agent tager filepath som input hvis man vil køre en model, der allerede er trænet.
class Agent:
    def __init__(self, file_path=None, training=True, device=None, epsilon_decay=0.99999, 
                 learning_rate=0.01, model_name='testing', epsilon_min=0.01, gamma=0.9, epsilon=1, data=None,
                 keep_eps=False):
        self.data = data
        self.MAX_MEMORY = 5_000 ## Længde af buffer
        self.BATCH_SIZE = 32 ## Sample størrelse
        self.LR = learning_rate ## Learning rate (TIDLIGERE 0.01 for onestep)
        if device is not None: ## Her kan man vælge at køre cpu selvom man har cuda
            self.device = device
        else:
            self.device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
        print("Working on:", f"{self.device}".upper())
        self.epsilon = epsilon if training else 0  ## Tilfældighed
        self.gamma = gamma  ## Discount faktor
        self.memory = deque(maxlen=self.MAX_MEMORY)  ## popleft() buffer
        self.model_name = model_name
        self.keep_eps = keep_eps

        ## Definerer en masse variable baseret på __init__ input
        self.is_training = training

        self.model = Linear_QNet().to(self.device) 

        self.file_path = file_path

        ## Hvis vi har en sti til en model, vil vi loade den i stedet for at træne en ny
        if self.file_path is not None:
            self.model.load_state_dict(torch.load(self.file_path, map_location=self.device))
            self.model.eval()

        ## Initialisér trainer
        self.trainer = QTrainer(self.model, lr=self.LR, gamma=self.gamma)

        ## Vores epsilon behøver ikke at være så stor for onestep, da repræsentationen af staten er så simpel.
        ## Den skal være højere for vector og grid repræsentation
        self.epsilon_decay = epsilon_decay  #0.9995 if state_rep=='onestep' else 0.999998  # Decaying rate per game
        self.epsilon_min = epsilon_min ## Minimumsværdi af epsilon


        ## Gemmer state-actionpar til buffer
    def remember(self, state, action, reward, next_state, done):
        self.memory.append((state, action, reward, next_state, done))

        ## Træner long memory
    def train_long_memory(self):
        if len(self.memory) > self.BATCH_SIZE:
            mini_sample = random.sample(self.memory, self.BATCH_SIZE) # list of tuples
        else:
            mini_sample = self.memory

        states, actions, rewards, next_states, dones = zip(*mini_sample)
        self.trainer.train_step(states, actions, rewards, next_states, dones, self.target_model)

        ## Træner short memory
    def train_short_memory(self, state, action, reward, done, outcome):
        self.trainer.train_step(state, action, reward, done, outcome)

        ## Bestem en action
    def get_action(self, state):
        ## Opdatér epsilon værdi
        if not self.keep_eps:
            if self.is_training: self.epsilon = max(self.epsilon_min, self.epsilon_decay * self.epsilon)  # decay epsilon

        ## Liste med actions, er 3 eller 4 lang afhængig af om vi spiller fra slangens perspektiv eller ej.
        final_move = torch.tensor([0,0])

        ## Nogle gange vil vi tage en tilfældig action, især i starten. Gælder kun under træning.
        if random.random() < self.epsilon:
            move = random.randint(0, len(final_move)-1)  # Depending on action space
            final_move[move] = 1
        else:

            ## Laver state om til tensor og får en prediction fra modellen
            state_tensor = torch.tensor(state, dtype=torch.float).to(self.device).clone().detach()
            prediction = self.model(state_tensor)
            ## Backstep vil algoritmisk give en meget lav q-værdi for at gå imod bevægelsesretningen.
            ## Det er i game_class ikke muligt at gå baglæns, så dette er for at undgå at forlæns og baglæns
            ## får samme q-værdi. Vi vil gerne have én entydig action fra modellen.
            
            ## Vælger den action med den højeste Q-værdi.
            move = torch.argmax(prediction).item()
            final_move[move] = 1

        return final_move
    
    def train(self, rounds):
        accur = np.zeros(rounds)
        lower_acc = np.zeros(rounds)
        upper_acc = np.zeros(rounds)
        for j in range(rounds):
            res = []
            for i in range(len(self.data)):
                state = self.data[i,:8]
                pc1_val = pc1 @ state
                pc2_val = pc2 @ state
                state = np.concatenate((state, [pc1_val], [pc2_val]))
                outcome = self.data[i,-1]
                outcome = torch.tensor([1,0] if outcome == 1 else [0,1])
                decision = self.get_action(state)
                reward = 0
                if list(decision) == list(outcome): reward = 1
                if self.is_training: self.train_short_memory(state, decision, reward, done=True, outcome=outcome)
                res.append(reward)
            p_hat = np.mean(res)
            lower, upper =p_hat - 1.96*np.sqrt((p_hat*(1-p_hat))/len(res)), p_hat + 1.96*np.sqrt((p_hat*(1-p_hat))/len(res))
            if j % 5 == 0 or j == (rounds-1):
                print(f'Loading: {int((j+5)/rounds*100)}%'*self.is_training,'Mean', round(np.mean(res),2), 
                      'CI:', round(lower,2), round(upper,2), 
                      'Epsilon:'*bool(self.epsilon), f'{round(self.epsilon,2)}'*bool(self.epsilon))
            accur[j] = np.mean(res)
            lower_acc[j], upper_acc[j] = lower, upper
        self.model.save()
        return np.concatenate(([accur], [lower_acc], [upper_acc]), axis=0)

rounds_to_train = 50
agent = Agent(epsilon_decay=0.9998, learning_rate=0.0001, data=X, keep_eps=True, epsilon=0)
training_accuracy = agent.train(rounds_to_train)

sns.lineplot(pd.DataFrame(training_accuracy.T, columns=['Accuracy', 'CI Lower Bound', 'CI Upper Bound']))
plt.xlabel('Rounds')
plt.ylabel('Accuracy')
plt.title('Training Accuracy')

agent = Agent(training=False, file_path='DQN_models/model/0_model.pth', data=Y)
print('\nEval')
final_stats = agent.train(1)
final_accuracy, final_lower_bound, final_upper_bound = [stat[0] for stat in final_stats]
# addition = np.concatenate(([np.array(([rounds_to_train+1]*3))], [np.array([stat[0] for stat in final_stats])]))
# sns.scatterplot(x=[rounds_to_train, rounds_to_train, rounds_to_train],
#                  y=[final_accuracy,final_lower_bound,final_upper_bound])
plt.errorbar(x=rounds_to_train, y= final_accuracy, yerr=(final_upper_bound-final_lower_bound)/2, elinewidth=1, 
capsize=10)
plt.show()
# lr 0.0001, 50 runder: Mean 0.83 CI: 0.75 0.92 

Unnamed: 0,Pregnancies,Glucose,BloodPressure,SkinThickness,Insulin,BMI,DiabetesPedigreeFunction,Age,Outcome
0,-0.717427,-1.091046,-0.373655,-0.58511,-0.522842,-0.710421,-1.031876,-0.968299,0
1,-1.029213,0.466314,-2.456964,0.557421,0.100631,1.42673,5.115111,0.209585,1
2,-0.093854,-1.447941,-1.655691,0.271788,-0.573394,-0.297238,-0.797126,-0.477514,1
3,-0.40564,2.413014,-0.053146,1.50953,3.260122,-0.368477,-1.05796,2.172726,1
4,-0.717427,2.153454,-0.854419,-0.58511,5.81299,-0.425468,-0.362402,2.761668,1


Working on: CPU
[-0.7174265  -1.09104581 -0.37365481 -0.58510971 -0.52284201 -0.710421
 -1.03187632 -0.9682991  -2.02226942 -0.0895213 ]
[-1.02921274  0.46631407 -2.45696436  0.55742083  0.10063086  1.42673006
  5.11511079  0.2095853   0.43483782 -1.83784795]
[-0.09385402 -1.44794079 -1.65569146  0.2717882  -0.57339386 -0.29723846
 -0.79712575 -0.47751393 -1.70091543 -0.24698446]
[-0.40564026  2.41301393 -0.05314565  1.50952962  3.26012173 -0.36847683
 -1.05795971  2.17272595  3.28503409  0.18549172]
[-0.7174265   2.15345395 -0.85441855 -0.58510971  5.81299035 -0.42546752
 -0.36240247  2.76166815  3.30392097  1.05236121]
[ 0.52971846  1.40721901  0.10710893 -0.96595323  0.15960802 -1.03811749
  0.18534885  1.97641189  1.03146472  2.22455155]
[-1.02921274 -0.15014088  1.06863641  1.69995138  0.62300002  1.81141726
  0.08101527  0.01327123  1.44028269 -2.37880699]
[-0.7174265  -0.63681585 -3.25823726  0.84305347 -0.6155204   1.45522541
 -0.98550584  0.2095853  -1.0351983  -1.47796816]
[-