In [1]:
import pandas as pd
print(pd.__version__)

2.2.3


In [2]:
import os
import sys
# Add the root directory to Python path
root_dir = "C:\\Users\\Jordan Lankford\\Documents\\GitHub\\FineTune-DQN"
if root_dir not in sys.path:
    sys.path.append(root_dir)

# Now imports should work
from src.utils.utils import set_seeds
import torch
import torch.nn as nn
import gymnasium as gym
import numpy as np
from src.utils.utils import set_seeds
from torchsummary import summary

def encode_state(obs, n_states):
    obs = torch.Tensor(obs)
    return nn.functional.one_hot(obs.long(), n_states).float()

def get_mask(info, n_actions=25):
    allowed_actions = info['admissible_actions']
    mask = np.zeros(n_actions)
    mask[allowed_actions] = 1
    return torch.Tensor(mask).unsqueeze(0)

# Modified QNetwork to match the deep DQN structure
class QNetwork(nn.Module):
    def __init__(self, env):
        super().__init__()
        self.n_states = env.observation_space.n
        self.n_actions = env.action_space.n
        
        # Match the architecture of the deep DQN
        self.network = nn.Sequential(
            nn.Linear(self.n_states, 128),
            nn.ReLU(),
            nn.Linear(128, 128),
            nn.ReLU(),
            nn.Linear(128, self.n_actions)
        )

    def forward(self, x, action_masks=None):
        q_values = self.network(x)
        if action_masks is not None:
            q_values = q_values - ((1 - action_masks) * 1e10)
        return q_values

# Create environment and network
env = gym.make('Sepsis/ICU-Sepsis-v2')
q_network = QNetwork(env)

# Load the trained deep DQN weights
model_path = r"C:\Users\Jordan Lankford\Documents\GitHub\FineTune-DQN\models\dqn\dqn_final_seed_0.pt"
state_dict = torch.load(model_path)

# Print the keys in the state dict to verify structure
print("State dict keys:", state_dict.keys())

# Load the weights
try:
    q_network.load_state_dict(state_dict)
    print("Successfully loaded weights!")
except Exception as e:
    print(f"Error loading weights: {e}")
    # Try loading with strict=False if there are issues
    q_network.load_state_dict(state_dict, strict=False)
    print("Loaded weights with strict=False")

# Print model structure
print("\nModel Architecture:")
print(q_network)


State dict keys: odict_keys(['network.0.weight', 'network.0.bias', 'network.2.weight', 'network.2.bias', 'network.4.weight', 'network.4.bias'])
Successfully loaded weights!

Model Architecture:
QNetwork(
  (network): Sequential(
    (0): Linear(in_features=716, out_features=128, bias=True)
    (1): ReLU()
    (2): Linear(in_features=128, out_features=128, bias=True)
    (3): ReLU()
    (4): Linear(in_features=128, out_features=25, bias=True)
  )
)


  state_dict = torch.load(model_path)


In [3]:
weight_shapes = []
for param in q_network.parameters():
    weight_shapes.append(param.shape)
print(weight_shapes)


[torch.Size([128, 716]), torch.Size([128]), torch.Size([128, 128]), torch.Size([128]), torch.Size([25, 128]), torch.Size([25])]


In [4]:
values_in_dimension=[]
for shape in weight_shapes:
    values_in_dimension.append(np.prod(shape))
print(values_in_dimension)

[np.int64(91648), np.int64(128), np.int64(16384), np.int64(128), np.int64(3200), np.int64(25)]


In [5]:
weights = [param.detach().numpy() for param in q_network.parameters()]  # Get all model weights as numpy arrays


In [6]:
import torch
import torch.nn as nn
import numpy as np
import gymnasium as gym
from torchsummary import summary

# Assuming QNetwork and set_seeds are defined earlier in the code
# Assuming the reshape function is defined as needed


def f(fly, flies_history, model=q_network,env=env):
    # Clone the model (you don't really need to clone in PyTorch, just create a new instance)
    cloned_model = q_network
    
    # Reshape fly's weights to match the model's weight shape
    reshaped_fly = reshape_to_model_weights(fly, model)
    
    # Load the reshaped weights into the cloned model
    cloned_model.load_state_dict(dict(zip([name for name, _ in cloned_model.named_parameters()], reshaped_fly)))

    # Set the model to evaluation mode
    cloned_model.eval()

    # Run the evaluation (similar to what is done in evaluate_network)
    episode_rewards = []
    episode_lengths = []
    
    num_episodes = 10000  # or any other number you'd like to test  USE 100000 FOR STABAL ISH
    for episode in range(num_episodes):
        state, info = env.reset()
        episode_reward = 0
        steps = 0
        done = False
        
        while not done:
            state_encoded = encode_state(np.array([state]), env.observation_space.n)
            action_mask = get_mask(info, n_actions=env.action_space.n)
            
            with torch.no_grad():
                q_values = cloned_model(state_encoded, action_mask)
                action = torch.argmax(q_values).item()
            
            next_state, reward, terminated, truncated, info = env.step(action)
            episode_reward += reward
            steps += 1
            state = next_state
            done = terminated or truncated

        episode_rewards.append(episode_reward)
        episode_lengths.append(steps)
    

    fitness = np.mean(episode_rewards)
    # Return the average reward (or any other metric you want)
    return -f_avrg(fitness, flies_history)
    #return -fitness


In [7]:
def reshape_to_model_weights(flattened_fly, model=q_network):
    # List to store reshaped weights
    reshaped_weights = []
    counter = 0  # Counter for elements in fly
    
    # Get the shapes of the model's parameters
    values_in_dimension = [param.numel() for param in model.parameters()]
    weight_shapes = [param.shape for param in model.parameters()]
    
    for i in range(len(values_in_dimension)):
        # Get the flattened weights for this layer
        weight_flat = flattened_fly[counter: counter + values_in_dimension[i]]
        
        # Reshape and convert to torch.Tensor
        reshaped_layer_weights = torch.tensor(weight_flat, dtype=torch.float32).reshape(weight_shapes[i])
        
        # Append the reshaped weight to the list
        reshaped_weights.append(reshaped_layer_weights)
        
        # Move the counter to the next parameter block
        counter += values_in_dimension[i]
    
    return reshaped_weights


In [8]:
def initialize_flies_from_model(number_of_flies, weights, model, inclusive):
    population = []

    if inclusive == False:
        for i in range(number_of_flies):
            fly = []  # This will hold the flattened weights for a single fly
            
            for param in model.parameters():  # Iterate through model parameters (weights and biases)
                # Generate random weights with the same shape as the model's weight matrix
                scalers = np.random.uniform(0.9, 1.1, size=param.shape)
                print(param.shape)  # Print the shape to see what the weights look like
                scaledweights = param.detach().numpy() * scalers  # Multiply the weight values by the scalers
                
                # Flatten the random weights and add them to the fly
                flattened_weights = scaledweights.flatten()
                fly.append(flattened_weights)
            
            # After flattening each layer's weights, flatten the entire fly and add to population
            population.append(np.concatenate(fly))  # Concatenate the list of flattened weight arrays
            
    if inclusive == True:
        for i in range(number_of_flies):
            fly = []  # This will hold the flattened weights for a single fly
            
            for param in model.parameters():  # Iterate through model parameters (weights and biases)
                if i == 0:
                    flattened_weights = param.detach().numpy().flatten()  # Get the flattened weights for the first fly
                    fly.append(flattened_weights)
                else:
                    scalers = np.random.uniform(0.9, 1.1, size=param.shape)
                    print(param.shape)  # Print the shape to see what the weights look like
                    scaledweights = param.detach().numpy() * scalers  # Multiply the weight values by the scalers
                    
                    # Flatten the random weights and add them to the fly
                    flattened_weights = scaledweights.flatten()
                    fly.append(flattened_weights)
                
            # After flattening each layer's weights, flatten the entire fly and add to population
            population.append(np.concatenate(fly))  # Concatenate the list of flattened weight arrays
        
    return population


In [9]:
num_of_flies = 1

flies = initialize_flies_from_model(num_of_flies,weights,q_network,True)



from collections import deque
max_history = 5
flies_history = [deque(maxlen=max_history) for _ in range(num_of_flies)]

In [10]:
print(flies_history[0])

deque([], maxlen=5)


In [11]:
def f_avrg(fitness, flies_history):

    flies_history.append(fitness)

    print(flies_history)

    return sum(flies_history) / len(flies_history) # mean
    

In [17]:
f(flies[0],flies_history[0])

deque([np.float64(0.803), np.float64(0.8148)], maxlen=5)


np.float64(-0.8089)

In [13]:
len(flies[0])

111513

In [14]:
N = len(flies)			# POPULATION SIZE
D = len(flies[0])					# DIMENSIONALITY 
delta = 0.005			# DISTURBANCE THRESHOLD 
maxIterations = 1	# ITERATIONS ALLOWED

In [15]:
def DFO(start_flies):
    """
    Vectorized DFO implementation for DQN weight optimization
    """
    #N = len(start_flies)          # Population size
    #D = len(start_flies[0])       # Dimensionality of weights
    #delta = 0.005                 # Disturbance threshold
    #maxIterations = 1000          # Max iterations allowed

    # Initialize population matrix
    X = np.array(start_flies)     # Shape: (N, D)
    fitness = np.zeros(N)         # Array to store fitness values
    flies_history = [deque(maxlen=5) for _ in range(N)]  # History for each fly

    # Initial evaluation of all flies
    for i in range(N):
        fitness[i] = f(X[i], flies_history[i])
    
    # Find initial best fly
    s = np.argmin(fitness)
    
    # Main DFO loop
    for itr in range(maxIterations):
        if itr % 1 == 0:
            print(f"Iteration: {itr}, Best fly index: {s}, Fitness value: {fitness[s]:.3f}, delta: {delta}")

        # Update each fly except the best one
        for i in range(N):
            if i == s:
                continue  # Skip best fly (elitist strategy)

            # Find best neighbor
            left = (i-1) % N
            right = (i+1) % N
            bNeighbour = right if fitness[right] < fitness[left] else left

            # Generate random numbers for vectorized operations
            U = np.random.uniform(0, 1, D)
            R = np.random.uniform(0, 1, D)

            # Vectorized position update
            X[i] = np.where(R < delta,
                           np.random.normal(loc=X[bNeighbour], scale=delta),
                           X[bNeighbour] + U * (X[s] - X[bNeighbour]))      #if r<delta do random else do the other selfless update

            # Evaluate new position
            fitness[i] = f(X[i], flies_history[i])

        # Update best fly
        new_s = np.argmin(fitness)
        s = new_s

    return X[s]  # Return best solution found

In [16]:
newweihgts = DFO(flies)


KeyboardInterrupt: 

In [None]:
np.save('realbest_fly_weights.npy', newweihgts)