In [16]:
import pandas as pd
print(pd.__version__)

2.2.3


In [1]:
import os
import sys
# Add the root directory to Python path
root_dir = "C:\\Users\\Jordan Lankford\\Documents\\GitHub\\FineTune-DQN"
if root_dir not in sys.path:
    sys.path.append(root_dir)

# Now imports should work
from src.utils.utils import set_seeds
import torch
import torch.nn as nn
import gymnasium as gym
import numpy as np
from src.utils.utils import set_seeds
from torchsummary import summary

def encode_state(obs, n_states):
    obs = torch.Tensor(obs)
    return nn.functional.one_hot(obs.long(), n_states).float()

def get_mask(info, n_actions=25):
    allowed_actions = info['admissible_actions']
    mask = np.zeros(n_actions)
    mask[allowed_actions] = 1
    return torch.Tensor(mask).unsqueeze(0)

# Modified QNetwork to match the deep DQN structure
class QNetwork(nn.Module):
    def __init__(self, env):
        super().__init__()
        self.n_states = env.observation_space.n
        self.n_actions = env.action_space.n
        
        # Match the architecture of the deep DQN
        self.network = nn.Sequential(
            nn.Linear(self.n_states, 128),
            nn.ReLU(),
            nn.Linear(128, 128),
            nn.ReLU(),
            nn.Linear(128, self.n_actions)
        )

    def forward(self, x, action_masks=None):
        q_values = self.network(x)
        if action_masks is not None:
            q_values = q_values - ((1 - action_masks) * 1e10)
        return q_values

# Create environment and network
env = gym.make('Sepsis/ICU-Sepsis-v2')
q_network = QNetwork(env)

# Load the trained deep DQN weights
model_path = r"C:\Users\Jordan Lankford\Documents\GitHub\FineTune-DQN\models\dqn\dqn_final_seed_0.pt"
state_dict = torch.load(model_path)

# Print the keys in the state dict to verify structure
print("State dict keys:", state_dict.keys())

# Load the weights
try:
    q_network.load_state_dict(state_dict)
    print("Successfully loaded weights!")
except Exception as e:
    print(f"Error loading weights: {e}")
    # Try loading with strict=False if there are issues
    q_network.load_state_dict(state_dict, strict=False)
    print("Loaded weights with strict=False")

# Print model structure
print("\nModel Architecture:")
print(q_network)


ModuleNotFoundError: No module named 'numpy'

In [20]:
weight_shapes = []
for param in q_network.parameters():
    weight_shapes.append(param.shape)
print(weight_shapes)


NameError: name 'q_network' is not defined

In [None]:
values_in_dimension=[]
for shape in weight_shapes:
    values_in_dimension.append(np.prod(shape))
print(values_in_dimension)

[np.int64(91648), np.int64(128), np.int64(16384), np.int64(128), np.int64(3200), np.int64(25)]


In [None]:
weights = [param.detach().numpy() for param in q_network.parameters()]  # Get all model weights as numpy arrays


In [None]:
def f(fly, flies_history=None, model=q_network, env=env, eval_seeds=[0, 1, 2, 3, 4]):
    """Use fixed seeds for more consistent evaluation while maintaining some variety"""
    cloned_model = q_network
    reshaped_fly = reshape_to_model_weights(fly, model)
    cloned_model.load_state_dict(dict(zip([name for name, _ in cloned_model.named_parameters()], reshaped_fly)))
    cloned_model.eval()


    start_seed=0
    end_seed=200
    num_episodes_per_seed=100

    all_returns = []
    all_episode_lengths = []
    all_discounted_returns = []

for seed in range(start_seed, end_seed + 1):
        set_seeds(seed)
        env.action_space.seed(seed)
        env.observation_space.seed(seed)
        
        seed_returns = []
        seed_lengths = []
        seed_discounted_returns = []

        for episode in range(num_episodes_per_seed):
            state, info = env.reset(seed=seed)
            episode_reward = 0
            steps = 0
            done = False
            episode_rewards = []
            
            while not done:
                state_encoded = encode_state(np.array([state]), env.observation_space.n)
                action_mask = get_mask(info)
                
                state_encoded = state_encoded.to(device)
                action_mask = action_mask.to(device)
                
                with torch.no_grad():
                    q_values = q_network(state_encoded, action_mask)
                    action = torch.argmax(q_values, dim=1).cpu().item()
                
                next_state, reward, terminated, truncated, info = env.step(action)
                
                episode_rewards.append(reward)
                episode_reward += reward
                steps += 1
                state = next_state
                done = terminated or truncated

            discounted_return = calculate_discounted_return(episode_rewards)
            
            seed_returns.append(episode_reward)
            seed_lengths.append(steps)
            seed_discounted_returns.append(discounted_return)

        seed_mean_return = np.mean(seed_returns)
        all_returns.append(seed_mean_return)
        all_episode_lengths.append(np.mean(seed_lengths))
        all_discounted_returns.append(np.mean(seed_discounted_returns))
        
        if seed % 1 == 0:  # Print progress every 10 seeds
            print(f"Seed {seed} completed. Mean return: {seed_mean_return}")


        fitness = np.mean(all_returns),
    return -fitness
    #return -f_avrg(fitness, flies_history) if flies_history is not None else -fitness

IndentationError: unindent does not match any outer indentation level (<string>, line 68)

In [None]:
#run fly through the fitness function 20 times, calculate the range in fitness values

#f(fly, flies_history = None, model=q_network,env=env)
fitness_values = []
for i in range(10):
    fitness_values.append(f(flies[0], flies_history = None, model=q_network,env=env))

print(fitness_values)
#range of fitness values
range_fitness = max(fitness_values) - min(fitness_values)
print(range_fitness)


NameError: name 'flies' is not defined

In [None]:
#standard deviation of fitness values
std_fitness = np.std(fitness_values)
print(std_fitness)
#mean of fitness values
mean_fitness = np.mean(fitness_values)
print(mean_fitness)




In [None]:
for i in range(20):
    #take the firtst ith number of flies and caluculate mean fitness
    mean_fitness = np.mean(fitness_values[:i])
    print("mean when using", i ,"flies:", mean_fitness)

In [5]:
def reshape_to_model_weights(flattened_fly, model=q_network):
    # List to store reshaped weights
    reshaped_weights = []
    counter = 0  # Counter for elements in fly
    
    # Get the shapes of the model's parameters
    values_in_dimension = [param.numel() for param in model.parameters()]
    weight_shapes = [param.shape for param in model.parameters()]
    
    for i in range(len(values_in_dimension)):
        # Get the flattened weights for this layer
        weight_flat = flattened_fly[counter: counter + values_in_dimension[i]]
        
        # Reshape and convert to torch.Tensor
        reshaped_layer_weights = torch.tensor(weight_flat, dtype=torch.float32).reshape(weight_shapes[i])
        
        # Append the reshaped weight to the list
        reshaped_weights.append(reshaped_layer_weights)
        
        # Move the counter to the next parameter block
        counter += values_in_dimension[i]
    
    return reshaped_weights


NameError: name 'q_network' is not defined

In [9]:
def initialize_flies_from_model(number_of_flies, weights, model, inclusive):
    population = []

    if inclusive == False:
        for i in range(number_of_flies):
            fly = []  # This will hold the flattened weights for a single fly
            
            for param in model.parameters():  # Iterate through model parameters (weights and biases)
                # Generate random weights with the same shape as the model's weight matrix
                scalers = np.random.uniform(0.9, 1.1, size=param.shape)
                print(param.shape)  # Print the shape to see what the weights look like
                scaledweights = param.detach().numpy() * scalers  # Multiply the weight values by the scalers
                
                # Flatten the random weights and add them to the fly
                flattened_weights = scaledweights.flatten()
                fly.append(flattened_weights)
            
            # After flattening each layer's weights, flatten the entire fly and add to population
            population.append(np.concatenate(fly))  # Concatenate the list of flattened weight arrays
            
    if inclusive == True:
        for i in range(number_of_flies):
            fly = []  # This will hold the flattened weights for a single fly
            
            for param in model.parameters():  # Iterate through model parameters (weights and biases)
                if i == 0:
                    flattened_weights = param.detach().numpy().flatten()  # Get the flattened weights for the first fly
                    fly.append(flattened_weights)
                else:
                    scalers = np.random.uniform(0.9, 1.1, size=param.shape)
                    print(param.shape)  # Print the shape to see what the weights look like
                    scaledweights = param.detach().numpy() * scalers  # Multiply the weight values by the scalers
                    
                    # Flatten the random weights and add them to the fly
                    flattened_weights = scaledweights.flatten()
                    fly.append(flattened_weights)
                
            # After flattening each layer's weights, flatten the entire fly and add to population
            population.append(np.concatenate(fly))  # Concatenate the list of flattened weight arrays
        
    return population


In [10]:
num_of_flies = 10

flies = initialize_flies_from_model(num_of_flies,weights,q_network,True)



from collections import deque
max_history = 5
flies_history = [deque(maxlen=max_history) for _ in range(num_of_flies)]

NameError: name 'weights' is not defined

In [None]:
print(flies_history[0])

In [11]:
def f_avrg(fitness, flies_history):

    flies_history.append(fitness)

    print(flies_history)

    return sum(flies_history) / len(flies_history) # mean
    

In [None]:
f(flies[0],flies_history[0])

In [None]:
len(flies[0])

In [12]:
N = len(flies)			# POPULATION SIZE
D = len(flies[0])					# DIMENSIONALITY 
#delta = 0.005			# DISTURBANCE THRESHOLD 
maxIterations = 200	# ITERATIONS ALLOWED
sd = np.std(flies[0])		# STANDARD DEVIATION

In [13]:
#print the min and max values and the range of the initial population
min_value = np.min(flies[0])
max_value = np.max(flies[0])
range_value = max_value - min_value
print("Initial Population:")
print("Min:", min_value)
print("Max:", max_value)
print("Range:", range_value)
sd = np.std(flies[0])
print("Standard Deviation:", sd)
median = np.median(flies[0])
print("Median:", median)




Initial Population:
Min: -4.9936833
Max: 4.964795
Range: 9.958479
Standard Deviation: 0.1878362
Median: 0.0


In [14]:
import numpy as np

def disturbance_threashold_mechanism(delta, disturbance_counter, patience):
    if delta == 1 and disturbance_counter > patience:
        return 0, 0 #delta, disturbance_counter
    elif delta != 1 and disturbance_counter > patience:
        delta = (delta + np.random.uniform(high=0.5)) #add rand number between 0-0.5
        if delta > 1:
            return 1, 0
        else:
            return delta, 0
    else:
        return delta, disturbance_counter

In [15]:
def DFO(start_flies):
    """
    DFO with dynamic delta control and detailed fitness reporting
    """
    X = np.array(start_flies)     
    fitness = np.zeros(N)         
    #flies_history = [deque(maxlen=5) for _ in range(N)]
    disturbance_counter = 0
    fitness_history = []
    delta_history = []
    delta = 1
    
    for i in range(N):
        #fitness[i] = f(X[i], flies_history[i])
        fitness[i] = f(X[i])


    # Find initial best fly
    s = np.argmin(fitness)
    
    # Main DFO loop
    for itr in range(maxIterations):
        print(f"\n{'='*50}")
        print(f"Iteration: {itr}")
        print(f"Current delta: {delta:.6f}")
        print(f"disturbance_counter: {disturbance_counter}")

        for i in range(N):
            if i == s:
                print(f"Fly {i} (Best fly) fitness = {fitness[i]:.6f}")
                continue

            left = (i-1) % N
            right = (i+1) % N
            bNeighbour = right if fitness[right] < fitness[left] else left

            

            U = np.random.uniform(0, 1, D)
            R = np.random.uniform(0, 1, D)
            X[i] = np.where(R < delta,
                           np.random.normal(loc=X[bNeighbour], scale=2*sd),
                           X[bNeighbour] + U * (X[s] - X[i]))

            #new_fitness = f(X[i], flies_history[i])

            #print(f"\nFly {i} after update:")
            #print(f"First 5 dimensions: {X[i][:5]}")
            
        

            
            fitness[i] = f(X[i])
            
            
            
        new_s = np.argmin(fitness)
        
        if new_s <= s:
            disturbance_counter += 1
            delta, disturbance_counter = disturbance_threashold_mechanism(delta, disturbance_counter, patience=10)
        s = new_s
            

        fitness_history.append(fitness[s])
        delta_history.append(delta)
        
        print(f"\nEnd of iteration summary:")
        print(f"Best fly: {s} with fitness = {fitness[s]:.6f}")
        print(f"All flies fitness values:")
        for i in range(N):
            print(f"Fly {i}: {fitness[i]:.6f}")

    return X[s], fitness_history, delta_history

In [None]:

best_solution, fitness_history, delta_history = DFO(flies)



Iteration: 0
Current delta: 1.000000
disturbance_counter: 0
Fly 0 (Best fly) fitness = -0.810850

End of iteration summary:
Best fly: 0 with fitness = -0.810850
All flies fitness values:
Fly 0: -0.810850
Fly 1: -0.767150
Fly 2: -0.780400
Fly 3: -0.787250
Fly 4: -0.781450
Fly 5: -0.782350
Fly 6: -0.757000
Fly 7: -0.783500
Fly 8: -0.780350
Fly 9: -0.769600

Iteration: 1
Current delta: 1.000000
disturbance_counter: 1
Fly 0 (Best fly) fitness = -0.810850

End of iteration summary:
Best fly: 0 with fitness = -0.810850
All flies fitness values:
Fly 0: -0.810850
Fly 1: -0.789900
Fly 2: -0.774000
Fly 3: -0.780350
Fly 4: -0.779300
Fly 5: -0.789600
Fly 6: -0.777600
Fly 7: -0.784900
Fly 8: -0.774400
Fly 9: -0.769150

Iteration: 2
Current delta: 1.000000
disturbance_counter: 2
Fly 0 (Best fly) fitness = -0.810850

End of iteration summary:
Best fly: 0 with fitness = -0.810850
All flies fitness values:
Fly 0: -0.810850
Fly 1: -0.767200
Fly 2: -0.785550
Fly 3: -0.776200
Fly 4: -0.781700
Fly 5: -0.7

In [None]:
np.save('realbest_fly_weights.npy', best_solution)

In [None]:
newweihgts = np.load('realbest_fly_weights.npy')

In [None]:
#Saving fine tuned weights

cloned_model = q_network
    # Reshape fly's weights to match the model's weight shape
reshaped_fly = reshape_to_model_weights(newweihgts)
    
    # Load the reshaped weights into the cloned model
cloned_model.load_state_dict(dict(zip([name for name, _ in cloned_model.named_parameters()], reshaped_fly)))


# save the model as dfoFineTuned
torch.save(cloned_model.state_dict(), '2timesSDAndFIxedUpdatedfoFineTuned.pt')


In [None]:
len(fitness_history)

In [None]:
# Plot results after all iterations
import matplotlib.pyplot as plt

# Plot fitness[s]
plt.figure(figsize=(10, 5))
plt.plot(fitness_history, label="Fitness (Best Fly)")
plt.xlabel("Iteration")
plt.ylabel("Fitness")
plt.title("Fitness vs Iteration")
plt.legend()
plt.grid()

# Plot delta
plt.figure(figsize=(10, 5))
plt.plot(delta_history, label="Delta", color='orange')
plt.xlabel("Iteration")
plt.ylabel("Delta")
plt.title("Delta vs Iteration")
plt.legend()
plt.grid()

plt.show()

NameError: name 'fitness_history' is not defined

<Figure size 1000x500 with 0 Axes>