In [2]:
import pandas as pd
print(pd.__version__)

2.2.3


In [3]:
import os
import sys
# Add the root directory to Python path
root_dir = "C:\\Users\\Jordan Lankford\\Documents\\GitHub\\FineTune-DQN"
if root_dir not in sys.path:
    sys.path.append(root_dir)

# Now imports should work
from src.utils.utils import set_seeds
import torch
import torch.nn as nn
import gymnasium as gym
import numpy as np
from src.utils.utils import set_seeds
from torchsummary import summary

def encode_state(obs, n_states):
    obs = torch.Tensor(obs)
    return nn.functional.one_hot(obs.long(), n_states).float()

def get_mask(info, n_actions=25):
    allowed_actions = info['admissible_actions']
    mask = np.zeros(n_actions)
    mask[allowed_actions] = 1
    return torch.Tensor(mask).unsqueeze(0)

# Modified QNetwork to match the deep DQN structure
class QNetwork(nn.Module):
    def __init__(self, env):
        super().__init__()
        self.n_states = env.observation_space.n
        self.n_actions = env.action_space.n
        
        # Match the architecture of the deep DQN
        self.network = nn.Sequential(
            nn.Linear(self.n_states, 128),
            nn.ReLU(),
            nn.Linear(128, 128),
            nn.ReLU(),
            nn.Linear(128, self.n_actions)
        )

    def forward(self, x, action_masks=None):
        q_values = self.network(x)
        if action_masks is not None:
            q_values = q_values - ((1 - action_masks) * 1e10)
        return q_values

# Create environment and network
env = gym.make('Sepsis/ICU-Sepsis-v2')
q_network = QNetwork(env)

# Load the trained deep DQN weights
model_path = r"C:\Users\Jordan Lankford\Documents\GitHub\FineTune-DQN\models\dqn\dqn_final_seed_0.pt"
state_dict = torch.load(model_path)

# Print the keys in the state dict to verify structure
print("State dict keys:", state_dict.keys())

# Load the weights
try:
    q_network.load_state_dict(state_dict)
    print("Successfully loaded weights!")
except Exception as e:
    print(f"Error loading weights: {e}")
    # Try loading with strict=False if there are issues
    q_network.load_state_dict(state_dict, strict=False)
    print("Loaded weights with strict=False")

# Print model structure
print("\nModel Architecture:")
print(q_network)


State dict keys: odict_keys(['network.0.weight', 'network.0.bias', 'network.2.weight', 'network.2.bias', 'network.4.weight', 'network.4.bias'])
Successfully loaded weights!

Model Architecture:
QNetwork(
  (network): Sequential(
    (0): Linear(in_features=716, out_features=128, bias=True)
    (1): ReLU()
    (2): Linear(in_features=128, out_features=128, bias=True)
    (3): ReLU()
    (4): Linear(in_features=128, out_features=25, bias=True)
  )
)


  state_dict = torch.load(model_path)


In [4]:
weight_shapes = []
for param in q_network.parameters():
    weight_shapes.append(param.shape)
print(weight_shapes)


[torch.Size([128, 716]), torch.Size([128]), torch.Size([128, 128]), torch.Size([128]), torch.Size([25, 128]), torch.Size([25])]


In [5]:
values_in_dimension=[]
for shape in weight_shapes:
    values_in_dimension.append(np.prod(shape))
print(values_in_dimension)

[np.int64(91648), np.int64(128), np.int64(16384), np.int64(128), np.int64(3200), np.int64(25)]


In [6]:
weights = [param.detach().numpy() for param in q_network.parameters()]  # Get all model weights as numpy arrays


In [22]:
import torch
import torch.nn as nn
import numpy as np
import gymnasium as gym
from torchsummary import summary



def f(fly, flies_history = None, model=q_network,env=env):
    
    cloned_model = q_network
    
    # Reshape fly's weights to match the model's weight shape
    reshaped_fly = reshape_to_model_weights(fly, model)
    
    # Load the reshaped weights into the cloned model
    cloned_model.load_state_dict(dict(zip([name for name, _ in cloned_model.named_parameters()], reshaped_fly)))

    # Set the model to evaluation mode
    cloned_model.eval()

    # Run the evaluation (similar to what is done in evaluate_network)
    episode_rewards = []
    episode_lengths = []
    
    num_episodes = 20000  # or any other number you'd like to test  USE 10000 FOR STABAL ISH
    for episode in range(num_episodes):
        state, info = env.reset()
        episode_reward = 0
        steps = 0
        done = False
        
        while not done:
            state_encoded = encode_state(np.array([state]), env.observation_space.n)
            action_mask = get_mask(info, n_actions=env.action_space.n)
            
            with torch.no_grad():
                q_values = cloned_model(state_encoded, action_mask)
                action = torch.argmax(q_values).item()
            
            next_state, reward, terminated, truncated, info = env.step(action)
            episode_reward += reward
            steps += 1
            state = next_state
            done = terminated or truncated

        episode_rewards.append(episode_reward)
        episode_lengths.append(steps)
    

    fitness = np.mean(episode_rewards)
    # Return the average reward (or any other metric you want)
    return -f_avrg(fitness, flies_history)
    #return -fitness


In [29]:
#run fly through the fitness function 20 times, calculate the range in fitness values

#f(fly, flies_history = None, model=q_network,env=env)
fitness_values = []
for i in range(20):
    fitness_values.append(f(flies[0], flies_history = None, model=q_network,env=env))

print(fitness_values)
#range of fitness values
range_fitness = max(fitness_values) - min(fitness_values)
print(range_fitness)


[np.float64(-0.8124), np.float64(-0.8074), np.float64(-0.81455), np.float64(-0.80705), np.float64(-0.8125), np.float64(-0.8096), np.float64(-0.80815), np.float64(-0.8064), np.float64(-0.80905), np.float64(-0.8143), np.float64(-0.81195), np.float64(-0.8096), np.float64(-0.80975), np.float64(-0.81325), np.float64(-0.8111), np.float64(-0.80875), np.float64(-0.80945), np.float64(-0.8159), np.float64(-0.8128), np.float64(-0.8118)]
0.009499999999999953


In [30]:
#standard deviation of fitness values
std_fitness = np.std(fitness_values)
print(std_fitness)
#mean of fitness values
mean_fitness = np.mean(fitness_values)
print(mean_fitness)




0.002600570850794101
-0.8107875


In [32]:
for i in range(20):
    #take the firtst ith number of flies and caluculate mean fitness
    mean_fitness = np.mean(fitness_values[:i])
    print("mean when using", i ,"flies:", mean_fitness)

mean when using 0 flies: nan
mean when using 1 flies: -0.8124
mean when using 2 flies: -0.8099000000000001
mean when using 3 flies: -0.8114500000000001
mean when using 4 flies: -0.8103500000000001
mean when using 5 flies: -0.8107800000000001
mean when using 6 flies: -0.8105833333333333
mean when using 7 flies: -0.8102357142857144
mean when using 8 flies: -0.80975625
mean when using 9 flies: -0.8096777777777777
mean when using 10 flies: -0.81014
mean when using 11 flies: -0.8103045454545454
mean when using 12 flies: -0.8102458333333332
mean when using 13 flies: -0.8102076923076922
mean when using 14 flies: -0.8104249999999998
mean when using 15 flies: -0.8104699999999999
mean when using 16 flies: -0.8103625
mean when using 17 flies: -0.8103088235294118
mean when using 18 flies: -0.8106194444444443
mean when using 19 flies: -0.8107342105263157


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)


In [None]:
'''
from parallel_fitness import parallel_fitness, QNetwork
import time 

def f_parallel(fly, model=q_network, num_processes=4):
    """
    Wrapper for parallel fitness evaluation
    """
    reshaped_fly = reshape_to_model_weights(fly, model)
    state_dict = dict(zip([name for name, _ in model.named_parameters()], reshaped_fly))
    return parallel_fitness(state_dict, num_processes=num_processes)


# Compare performance
print("Testing sequential evaluation:")
start = time.perf_counter()
result_seq = f(flies[0])
time_seq = time.perf_counter() - start
print(f"Sequential result: {result_seq}")
print(f"Sequential time: {time_seq:.2f}s")

print("\nTesting parallel evaluation:")
start = time.perf_counter()
result_par = f_parallel(flies[0], num_processes=4)
time_par = time.perf_counter() - start
print(f"Parallel result: {result_par}")
print(f"Parallel time: {time_par:.2f}s")

print(f"\nSpeedup: {time_seq/time_par:.2f}x")'
'''

Testing sequential evaluation:
Sequential result: -0.809
Sequential time: 33.63s

Testing parallel evaluation:
Evaluated 10000 episodes in 50.38 seconds
Parallel result: -0.8062
Parallel time: 50.39s

Speedup: 0.67x


In [None]:

'''
from vectorized_fitness import vectorized_fitness, QNetwork
import time

def f_vectorized(fly, model=q_network, num_envs=4):
    """
    Wrapper for vectorized fitness evaluation
    """
    reshaped_fly = reshape_to_model_weights(fly, model)
    state_dict = dict(zip([name for name, _ in model.named_parameters()], reshaped_fly))
    return vectorized_fitness(state_dict, num_envs=num_envs)

# Test code


print("\nTesting vectorized evaluation:")
start = time.perf_counter()
result_vec = f_vectorized(flies[0], num_envs=4)
time_vec = time.perf_counter() - start
print(f"Vectorized result: {result_vec}")
print(f"Vectorized time: {time_vec:.2f}s")


print("Testing sequential evaluation:")
start = time.perf_counter()
result_seq = f(flies[0])
time_seq = time.perf_counter() - start
print(f"Sequential result: {result_seq}")
print(f"Sequential time: {time_seq:.2f}s")

print(f"\nSpeedup: {time_seq/time_vec:.2f}x")'
'''


Testing vectorized evaluation:
Evaluated 1068 episodes in 2.43 seconds
Vectorized result: -0.0
Vectorized time: 3.88s
Testing sequential evaluation:
Sequential result: -0.8124
Sequential time: 37.90s

Speedup: 9.77x


In [8]:
def reshape_to_model_weights(flattened_fly, model=q_network):
    # List to store reshaped weights
    reshaped_weights = []
    counter = 0  # Counter for elements in fly
    
    # Get the shapes of the model's parameters
    values_in_dimension = [param.numel() for param in model.parameters()]
    weight_shapes = [param.shape for param in model.parameters()]
    
    for i in range(len(values_in_dimension)):
        # Get the flattened weights for this layer
        weight_flat = flattened_fly[counter: counter + values_in_dimension[i]]
        
        # Reshape and convert to torch.Tensor
        reshaped_layer_weights = torch.tensor(weight_flat, dtype=torch.float32).reshape(weight_shapes[i])
        
        # Append the reshaped weight to the list
        reshaped_weights.append(reshaped_layer_weights)
        
        # Move the counter to the next parameter block
        counter += values_in_dimension[i]
    
    return reshaped_weights


In [9]:
def initialize_flies_from_model(number_of_flies, weights, model, inclusive):
    population = []

    if inclusive == False:
        for i in range(number_of_flies):
            fly = []  # This will hold the flattened weights for a single fly
            
            for param in model.parameters():  # Iterate through model parameters (weights and biases)
                # Generate random weights with the same shape as the model's weight matrix
                scalers = np.random.uniform(0.9, 1.1, size=param.shape)
                print(param.shape)  # Print the shape to see what the weights look like
                scaledweights = param.detach().numpy() * scalers  # Multiply the weight values by the scalers
                
                # Flatten the random weights and add them to the fly
                flattened_weights = scaledweights.flatten()
                fly.append(flattened_weights)
            
            # After flattening each layer's weights, flatten the entire fly and add to population
            population.append(np.concatenate(fly))  # Concatenate the list of flattened weight arrays
            
    if inclusive == True:
        for i in range(number_of_flies):
            fly = []  # This will hold the flattened weights for a single fly
            
            for param in model.parameters():  # Iterate through model parameters (weights and biases)
                if i == 0:
                    flattened_weights = param.detach().numpy().flatten()  # Get the flattened weights for the first fly
                    fly.append(flattened_weights)
                else:
                    scalers = np.random.uniform(0.9, 1.1, size=param.shape)
                    print(param.shape)  # Print the shape to see what the weights look like
                    scaledweights = param.detach().numpy() * scalers  # Multiply the weight values by the scalers
                    
                    # Flatten the random weights and add them to the fly
                    flattened_weights = scaledweights.flatten()
                    fly.append(flattened_weights)
                
            # After flattening each layer's weights, flatten the entire fly and add to population
            population.append(np.concatenate(fly))  # Concatenate the list of flattened weight arrays
        
    return population


In [10]:
num_of_flies = 10

flies = initialize_flies_from_model(num_of_flies,weights,q_network,True)



from collections import deque
max_history = 5
flies_history = [deque(maxlen=max_history) for _ in range(num_of_flies)]

torch.Size([128, 716])
torch.Size([128])
torch.Size([128, 128])
torch.Size([128])
torch.Size([25, 128])
torch.Size([25])
torch.Size([128, 716])
torch.Size([128])
torch.Size([128, 128])
torch.Size([128])
torch.Size([25, 128])
torch.Size([25])
torch.Size([128, 716])
torch.Size([128])
torch.Size([128, 128])
torch.Size([128])
torch.Size([25, 128])
torch.Size([25])
torch.Size([128, 716])
torch.Size([128])
torch.Size([128, 128])
torch.Size([128])
torch.Size([25, 128])
torch.Size([25])
torch.Size([128, 716])
torch.Size([128])
torch.Size([128, 128])
torch.Size([128])
torch.Size([25, 128])
torch.Size([25])
torch.Size([128, 716])
torch.Size([128])
torch.Size([128, 128])
torch.Size([128])
torch.Size([25, 128])
torch.Size([25])
torch.Size([128, 716])
torch.Size([128])
torch.Size([128, 128])
torch.Size([128])
torch.Size([25, 128])
torch.Size([25])
torch.Size([128, 716])
torch.Size([128])
torch.Size([128, 128])
torch.Size([128])
torch.Size([25, 128])
torch.Size([25])
torch.Size([128, 716])
torch.Siz

In [12]:
print(flies_history[0])

deque([], maxlen=5)


In [11]:
def f_avrg(fitness, flies_history):

    flies_history.append(fitness)

    print(flies_history)

    return sum(flies_history) / len(flies_history) # mean
    

In [None]:
f(flies[0],flies_history[0])

In [None]:
len(flies[0])

In [19]:
N = len(flies)			# POPULATION SIZE
D = len(flies[0])					# DIMENSIONALITY 
delta = 0.005			# DISTURBANCE THRESHOLD 
maxIterations = 200	# ITERATIONS ALLOWED
sd = np.std(flies[0])		# STANDARD DEVIATION

In [None]:
#print the min and max values and the range of the initial population
min_value = np.min(flies[0])
max_value = np.max(flies[0])
range_value = max_value - min_value
print("Initial Population:")
print("Min:", min_value)
print("Max:", max_value)
print("Range:", range_value)
sd = np.std(flies[0])
print("Standard Deviation:", sd)
median = np.median(flies[0])
print("Median:", median)




Initial Population:
Min: -4.9936833
Max: 4.964795
Range: 9.958479
Standard Deviation: 0.1878362
Median: 0.0


AttributeError: module 'numpy' has no attribute 'mode'

In [None]:
def DFO(start_flies):
    """
    DFO with initial history population phase
    """
    X = np.array(start_flies)     
    fitness = np.zeros(N)         
    flies_history = [deque(maxlen=5) for _ in range(N)]

    # First phase: Populate histories for all flies
    print("Populating initial histories...")
    for _ in range(max_history):  # Run evaluations to fill histories
        for i in range(N):
            fitness[i] = f(X[i], flies_history[i])
            print(f"Fly {i} history: {list(flies_history[i])}")
    
    # # Now all flies have full histories, start optimization
    s = np.argmin(fitness)
    print(f"\nStarting optimization with best fly: {s}, fitness: {fitness[s]:.3f}")
    
    # Main DFO loop
    for itr in range(maxIterations):
        # Recalculate best fly's fitness
        best_fly_fitness = f(X[s], flies_history[s])
        fitness[s] = best_fly_fitness
        
        print(f"\nIteration: {itr}")
        print(f"Best fly {s} recalculated fitness: {fitness[s]:.3f}")
        print(f"Best fly history: {list(flies_history[s])}")

        for i in range(N):
            if i == s:
                continue

            left = (i-1) % N
            right = (i+1) % N
            bNeighbour = right if fitness[right] < fitness[left] else left

            old_position = X[i].copy()
            old_fitness = fitness[i]

            U = np.random.uniform(0, 1, D)
            R = np.random.uniform(0, 1, D)
            X[i] = np.where(R < delta,
                           np.random.normal(loc=X[i], scale=sd), #Change to scale=sd, rather than delta
                           X[bNeighbour] + U * (X[s] - X[i])) # MADE BOTH RESSART AND NORMAL USE THE CURRENT FLY RATHER THAN FORCING SELFLESS UPDATE

            new_fitness = f(X[i], flies_history[i])
            
            if new_fitness < old_fitness:
                fitness[i] = new_fitness
                if new_fitness < fitness[s]:
                    s = i
                    print(f"New best found! Fly {i}: {new_fitness:.3f}")
                    print(f"New best history: {list(flies_history[i])}")
            else:
                X[i] = old_position
                fitness[i] = old_fitness

        # After all flies updated, recheck who is best
        s = np.argmin(fitness)
        print(f"End of iteration best fly: {s} with fitness: {fitness[s]:.3f}")
        print(f"Best fly history: {list(flies_history[s])}")

    return X[s]

In [29]:
newweihgts = DFO(flies)


deque([np.float64(0.81145)], maxlen=5)

Iteration: 0
Best fly 0 recalculated fitness: -0.811
Best fly history: [np.float64(0.81145)]
deque([np.float64(0.8033)], maxlen=5)
deque([np.float64(0.8029)], maxlen=5)
deque([np.float64(0.80995)], maxlen=5)
deque([np.float64(0.8073)], maxlen=5)
deque([np.float64(0.79635)], maxlen=5)
deque([np.float64(0.7914)], maxlen=5)
deque([np.float64(0.79)], maxlen=5)
deque([np.float64(0.7995)], maxlen=5)
deque([np.float64(0.8111)], maxlen=5)
End of iteration best fly: 0 with fitness: -0.811
Best fly history: [np.float64(0.81145)]
deque([np.float64(0.81145), np.float64(0.816)], maxlen=5)

Iteration: 1
Best fly 0 recalculated fitness: -0.814
Best fly history: [np.float64(0.81145), np.float64(0.816)]
deque([np.float64(0.8033), np.float64(0.8129)], maxlen=5)
deque([np.float64(0.8029), np.float64(0.81395)], maxlen=5)
deque([np.float64(0.80995), np.float64(0.81615)], maxlen=5)
deque([np.float64(0.8073), np.float64(0.8094)], maxlen=5)


KeyboardInterrupt: 

In [None]:
np.save('realbest_fly_weights.npy', newweihgts)