In [2]:
import pandas as pd
print(pd.__version__)

2.2.3


In [3]:
import os
import sys
# Add the root directory to Python path
root_dir = "C:\\Users\\Jordan Lankford\\Documents\\GitHub\\choudhary2024icu"
if root_dir not in sys.path:
    sys.path.append(root_dir)

# Now imports should work
from src.utils.utils import set_seeds
import torch
import torch.nn as nn
import gymnasium as gym
import numpy as np
from src.utils.utils import set_seeds
from torchsummary import summary

def encode_state(obs, n_states):  # Modified to take n_states as parameter
    obs = torch.Tensor(obs)
    return nn.functional.one_hot(obs.long(), n_states).float()

def get_mask(info, n_actions=25):
    allowed_actions = info['admissible_actions']
    mask = np.zeros(n_actions)
    mask[allowed_actions] = 1
    return torch.Tensor(mask).unsqueeze(0)

class QNetwork(nn.Module):
    def __init__(self, env):
        super().__init__()
        self.n_states = env.observation_space.n
        self.n_actions = env.action_space.n
        self.network = nn.Linear(self.n_states, self.n_actions, bias=False)
        torch.nn.init.constant_(self.network.weight, 0.0)

    def forward(self, x, action_masks=None):
        q_values = self.network(x)
        if action_masks is not None:
            q_values = q_values - ((1 - action_masks) * 1e10)
        return q_values
    

env = gym.make('Sepsis/ICU-Sepsis-v2')
    
    # Create network and load weights
q_network = QNetwork(env)
state_dict = torch.load(r"C:\Users\Jordan Lankford\Documents\GitHub\choudhary2024icu\models\dqn\dqn_seed_0_episode_499900.pt")
q_network.load_state_dict(state_dict)

<All keys matched successfully>

In [4]:
weight_shapes = []
for param in q_network.parameters():
    weight_shapes.append(param.shape)
print(weight_shapes)


[torch.Size([25, 716])]


In [5]:
values_in_dimension=[]
for shape in weight_shapes:
    values_in_dimension.append(np.prod(shape))
print(values_in_dimension)

[17900]


In [6]:
weights = [param.detach().numpy() for param in q_network.parameters()]  # Get all model weights as numpy arrays


In [15]:
import torch
import torch.nn as nn
import numpy as np
import gymnasium as gym
from torchsummary import summary

# Assuming QNetwork and set_seeds are defined earlier in the code
# Assuming the reshape function is defined as needed


def f(fly, flies_history, model=q_network,env=env):
    # Clone the model (you don't really need to clone in PyTorch, just create a new instance)
    cloned_model = q_network
    
    # Reshape fly's weights to match the model's weight shape
    reshaped_fly = reshape_to_model_weights(fly, model)
    
    # Load the reshaped weights into the cloned model
    cloned_model.load_state_dict(dict(zip([name for name, _ in cloned_model.named_parameters()], reshaped_fly)))

    # Set the model to evaluation mode
    cloned_model.eval()

    # Run the evaluation (similar to what is done in evaluate_network)
    episode_rewards = []
    episode_lengths = []
    
    num_episodes = 10000  # or any other number you'd like to test  USE 100000 FOR STABAL ISH
    for episode in range(num_episodes):
        state, info = env.reset()
        episode_reward = 0
        steps = 0
        done = False
        
        while not done:
            state_encoded = encode_state(np.array([state]), env.observation_space.n)
            action_mask = get_mask(info, n_actions=env.action_space.n)
            
            with torch.no_grad():
                q_values = cloned_model(state_encoded, action_mask)
                action = torch.argmax(q_values).item()
            
            next_state, reward, terminated, truncated, info = env.step(action)
            episode_reward += reward
            steps += 1
            state = next_state
            done = terminated or truncated

        episode_rewards.append(episode_reward)
        episode_lengths.append(steps)
    

    fitness = np.mean(episode_rewards)
    # Return the average reward (or any other metric you want)
    return f_avrg(fitness, flies_history)
    #return -fitness


In [9]:
def reshape_to_model_weights(flattened_fly, model=q_network):
    # List to store reshaped weights
    reshaped_weights = []
    counter = 0  # Counter for elements in fly
    
    # Get the shapes of the model's parameters
    values_in_dimension = [param.numel() for param in model.parameters()]
    weight_shapes = [param.shape for param in model.parameters()]
    
    for i in range(len(values_in_dimension)):
        # Get the flattened weights for this layer
        weight_flat = flattened_fly[counter: counter + values_in_dimension[i]]
        
        # Reshape and convert to torch.Tensor
        reshaped_layer_weights = torch.tensor(weight_flat, dtype=torch.float32).reshape(weight_shapes[i])
        
        # Append the reshaped weight to the list
        reshaped_weights.append(reshaped_layer_weights)
        
        # Move the counter to the next parameter block
        counter += values_in_dimension[i]
    
    return reshaped_weights


In [10]:
def initialize_flies_from_model(number_of_flies, weights, model, inclusive):
    population = []

    if inclusive == False:
        for i in range(number_of_flies):
            fly = []  # This will hold the flattened weights for a single fly
            
            for param in model.parameters():  # Iterate through model parameters (weights and biases)
                # Generate random weights with the same shape as the model's weight matrix
                scalers = np.random.uniform(0.9, 1.1, size=param.shape)
                print(param.shape)  # Print the shape to see what the weights look like
                scaledweights = param.detach().numpy() * scalers  # Multiply the weight values by the scalers
                
                # Flatten the random weights and add them to the fly
                flattened_weights = scaledweights.flatten()
                fly.append(flattened_weights)
            
            # After flattening each layer's weights, flatten the entire fly and add to population
            population.append(np.concatenate(fly))  # Concatenate the list of flattened weight arrays
            
    if inclusive == True:
        for i in range(number_of_flies):
            fly = []  # This will hold the flattened weights for a single fly
            
            for param in model.parameters():  # Iterate through model parameters (weights and biases)
                if i == 0:
                    flattened_weights = param.detach().numpy().flatten()  # Get the flattened weights for the first fly
                    fly.append(flattened_weights)
                else:
                    scalers = np.random.uniform(0.9, 1.1, size=param.shape)
                    print(param.shape)  # Print the shape to see what the weights look like
                    scaledweights = param.detach().numpy() * scalers  # Multiply the weight values by the scalers
                    
                    # Flatten the random weights and add them to the fly
                    flattened_weights = scaledweights.flatten()
                    fly.append(flattened_weights)
                
            # After flattening each layer's weights, flatten the entire fly and add to population
            population.append(np.concatenate(fly))  # Concatenate the list of flattened weight arrays
        
    return population


In [22]:
num_of_flies = 10

flies = initialize_flies_from_model(num_of_flies,weights,q_network,True)



from collections import deque
max_history = 5
flies_history = [deque(maxlen=max_history) for _ in range(num_of_flies)]

torch.Size([25, 716])
torch.Size([25, 716])
torch.Size([25, 716])
torch.Size([25, 716])
torch.Size([25, 716])
torch.Size([25, 716])
torch.Size([25, 716])
torch.Size([25, 716])
torch.Size([25, 716])


In [24]:
print(flies_history[0])

deque([], maxlen=5)


In [12]:
def f_avrg(fitness, flies_history):

    flies_history.append(fitness)


    return sum(fitness_history) / len(fitness_history) # mean
    

In [13]:
f_avrg(f)

TypeError: unsupported operand type(s) for +: 'int' and 'function'

In [19]:
f(flies[0])

-0.8561

In [10]:
len(flies[0])

17900

In [15]:
N = len(flies)			# POPULATION SIZE
D = len(flies[0])					# DIMENSIONALITY 
delta = 0.005			# DISTURBANCE THRESHOLD 
maxIterations = 1000	# ITERATIONS ALLOWED

In [16]:
# MAIN DFO LOOP


def DFO(start_flies):
	counter = 0
	X = start_flies
	fitness = [None]*len(start_flies) 

	for itr in range (maxIterations):
		for i in range(N): # EVALUATION --- 10 needs to be the population size
			fitness[i] = f(X[i])
		s = np.argmin(fitness) # FIND BEST FLY

	
			
		
		#fitness_history.append(fitness[s])
		#delta_history.append(delta)

		if (itr%1 == 0): # PRINT BEST FLY EVERY 100 ITERATIONS
			print ("Iteration:", itr, "\tBest fly index:", s, 
				"\tFitness value:", fitness[s], "delta: ", delta, "counter: ", counter)

		# TAKE EACH FLY INDIVIDUALLY 
		for i in range(N): 
			if i == s: continue # ELITIST STRATEGY

			# FIND BEST NEIGHBOUR
			left = (i-1)%N
			right = (i+1)%N
			bNeighbour = right if fitness[right]<fitness[left] else left

			for d in range(D): # UPDATE EACH DIMENSION SEPARATELY 
				if (np.random.rand() < delta):
					X[i][d] = np.random.normal(loc=X[bNeighbour][d], scale=(delta))
					#X[i][d] = np.random.normal(loc=X[bNeighbour][d], scale=(delta**2.0))	#update between a normal distribution with the mean set to the best neighbours location using a SD of delta squared
					continue;

				u = np.random.rand()
				X[i][d] = X[bNeighbour][d] + u*(X[s][d] - X[bNeighbour][d])	#standard fucntion uses current fly location, update will be a seflless method using the best neighbor 

				# OUT OF BOUND CONTROL
				#if X[i][d] < lowerB[d] or X[i][d] > upperB[d]:
					#X[i][d] = np.random.uniform(lowerB[d], upperB[d])

	for i in range(N): fitness[i] = f(X[i]) # EVALUATION
	new_s = np.argmin(fitness) # FIND BEST FLY
	s = new_s
	return(X[s])

#print("\nFinal best fitness:\t", fitness[s])
#print("\nBest fly position:\n",  X[s])	

In [17]:
newweihgts = DFO(flies)


Iteration: 0 	Best fly index: 9 	Fitness value: -0.841 delta:  0.005 counter:  0
Iteration: 1 	Best fly index: 6 	Fitness value: -0.845 delta:  0.005 counter:  0
Iteration: 2 	Best fly index: 6 	Fitness value: -0.839 delta:  0.005 counter:  0
Iteration: 3 	Best fly index: 8 	Fitness value: -0.847 delta:  0.005 counter:  0
Iteration: 4 	Best fly index: 8 	Fitness value: -0.841 delta:  0.005 counter:  0
Iteration: 5 	Best fly index: 3 	Fitness value: -0.843 delta:  0.005 counter:  0
Iteration: 6 	Best fly index: 5 	Fitness value: -0.843 delta:  0.005 counter:  0
Iteration: 7 	Best fly index: 2 	Fitness value: -0.861 delta:  0.005 counter:  0
Iteration: 8 	Best fly index: 3 	Fitness value: -0.85 delta:  0.005 counter:  0
Iteration: 9 	Best fly index: 9 	Fitness value: -0.842 delta:  0.005 counter:  0
Iteration: 10 	Best fly index: 7 	Fitness value: -0.861 delta:  0.005 counter:  0
Iteration: 11 	Best fly index: 9 	Fitness value: -0.844 delta:  0.005 counter:  0
Iteration: 12 	Best fly ind

KeyboardInterrupt: 