# Load data

In [1]:
import os
from joblib import load
import pandas as pd

# load simulated data

all_states = load("simulated_states.joblib")
all_labels = load("simulated_labels.joblib")

Unnamed: 0,agent_x,agent_y,target_x,target_y,at_up,at_down,at_left,at_right,hasItem,energy,charge
0,4,4,11,2,0,0,4,4,0,119,0
1,1,1,6,5,2,0,0,0,0,119,0
2,9,7,3,2,0,0,4,4,0,119,0
3,5,4,11,2,0,0,4,4,0,118,0
4,2,1,6,5,0,0,4,4,0,118,0


# Preprocessing

In [2]:
# get dataframe
import pandas as pd

# column titles
state_columns = [
    "agent_x", "agent_y",
    "target_x", "target_y",
    "at_up", "at_down", "at_left", "at_right",
    "hasItem", "energy", "charge"
]

# Create DataFrame
df = pd.DataFrame(all_states, columns=state_columns)
df.head()
all_states.shape

(103622, 11)

In [22]:
# feature scaling
from sklearn.preprocessing import StandardScaler
df = pd.DataFrame(all_states, columns=None)

scaler = StandardScaler()
scaled_df = scaler.fit_transform(df)

['GAscaler.joblib']

In [4]:
# get train and test data
from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(df, all_labels, test_size=0.2, random_state=1)

# Train Model


In [5]:
import numpy as np

def fitness_function(ga_instance, solution, solution_idx):
    weights = solution.reshape(5, len(state_columns))  # 5 actions × 11 features
    correct = 0

    for state, label in zip(scaled_df, all_labels):
        action_scores = np.dot(weights, state)
        predicted = np.argmax(action_scores)
        if predicted == label:
            correct += 1

    accuracy = correct / len(scaled_df)
    return accuracy

In [6]:
import pygad

state_columns = [
    "agent_x", "agent_y",
    "target_x", "target_y",
    "at_up", "at_down", "at_left", "at_right",
    "hasItem", "energy", "charge"
]
num_genes = len(state_columns) * 5


gene_space = {'low': -10, 'high': 10}
ga_instance = pygad.GA(num_generations=100,
                       num_parents_mating=8,
                       fitness_func=fitness_function,
                       sol_per_pop=16,
                       num_genes=num_genes,
                       parent_selection_type="sss",
                       keep_parents=4,
                       keep_elitism=4,
                       crossover_type="single_point",
                       mutation_type="random",
                       mutation_percent_genes=10)

In [13]:
ga_instance.run()

In [20]:
# Get best solution
solution, fitness, _ = ga_instance.best_solution()
print("Best solution fitness:", fitness)

# Convert genes to final weight matrix
final_weights = solution.reshape(5, len(state_columns))

solution

Best solution fitness: 0.8310783424369342


array([ 3.09941677,  3.13965231, -5.08528533, -3.60086475, -2.62257784,
       -0.66777438,  2.93911499, -0.378498  , -1.79697533, -1.51017219,
        1.88087111,  1.37677569,  0.7681843 ,  4.76965731, -0.33960742,
        0.3487092 , -2.8664437 ,  1.60303803,  1.10109903,  7.55076161,
        0.83739632,  3.94363215,  1.10887769,  1.72684155,  0.03232869,
       -6.22907108,  4.1891486 , -0.52282471,  0.5529789 , -3.91884507,
       -1.83675194, -0.70707948,  2.47772523, -0.36804865, -5.4425574 ,
       -0.85070126,  5.06287531,  1.28528709,  0.36975908,  0.04587632,
       -5.19135751,  3.9231679 ,  0.59088024, -0.83060193, -0.69068278,
       -0.971438  ,  0.08407571, -0.72393772, -5.30512764, -0.86185141,
        0.44939623, -0.37549367, -0.06294201,  0.36217139,  4.15987612])

# Save Model

In [18]:
from joblib import dump

dump(final_weights, "GAsolution.joblib")

['GAsolution.joblib']

In [None]:
final_weights = solution.reshape(5, len(state_columns))

# Use model on a new input
sample_state = np.random.rand(len(state_columns)) * 10
action_scores = np.dot(final_weights, sample_state)
predicted_action = np.argmax(action_scores)

print("Predicted action for sample input:", predicted_action)