<a href="https://colab.research.google.com/github/HAR5HA-7663/MCS-5993-Evolutionary-Computation-and-Deep-Learning/blob/main/Assignments/4/XOR3.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

## XOR with 3 classes

<pre>
   | x0 | x1 |XOR3|
   |----|----|----|
   |0.0 |0.0 | 0  |
   |0.0 |1.0 | 1  |
   |1.0 |0.0 | 1  |
   |1.0 |1.0 | 0  |
   |0.5 |0.5 | 2  |
</pre>

In [1]:
import numpy as np
from tensorflow import keras
from keras.models import Sequential
from keras.layers import Dense, Activation
from keras import optimizers

# the five different states of the XOR3 gate
X = np.array([[0,0],[0,1],[1,0],[1,1],[0.5,0.5],[0.6,0.6]])

# the five expected results in the same order
y = np.array([[0],[1],[1],[0],[2],[2]])

Xt = np.array([[0,0.1],[0.1,1],[0.9,0],[0.9,1],[0.55,0.45]])
yt = np.array([[0],[1],[1],[0],[2]])

In [2]:
act_func = ('relu', 'elu', 'sigmoid', 'tanh', 'leaky_relu')
optimz = ('SGD', 'RMSprop', 'Adam')

print("Activation functions:", act_func)
print("Optimizers:", optimz)

Activation functions: ('relu', 'elu', 'sigmoid', 'tanh', 'leaky_relu')
Optimizers: ('SGD', 'RMSprop', 'Adam')


In [3]:
# Using 'sparse_categorical_crossentropy'
def XOR3_eval(param):
    """
    param indices:
      0: num of hidden neurons
      1: learning rate
      2: batch size
      3: activation function index
      4: loss function index (fixed, not really used)
      5: optimizer index
    """
    global model
    model = Sequential([
        keras.Input(shape=(2,)),
        Dense(int(param[0]), input_dim=2, activation=act_func[round(param[3])]),
        Dense(3, activation='softmax')
    ])

    if round(param[5]) == 0:
        optmzr = optimizers.SGD(learning_rate=param[1])
    elif round(param[5]) == 1:
        optmzr = optimizers.RMSprop(learning_rate=param[1])
    else:
        optmzr = optimizers.Adam(learning_rate=param[1])

    model.compile(
    optimizer='rmsprop',
    loss='sparse_categorical_crossentropy', # target labels are integers
    metrics=['accuracy']
    )
    model.fit(X, y, batch_size=1, epochs=1300, verbose=0)

    loss, acc = model.evaluate(Xt, yt, verbose=0)
    return loss

# test_param = [4, 0.5, 2, 2, 0, 1]  # neurons, lr, bsize, act='sigmoid', loss, opt='RMSprop'
# print("loss =", XOR3_eval(test_param))

In [4]:
def print_HP_found(x, eval):
    print(
        f"#neurons={int(x[0])}, "
        f"lr={x[1]:.3f}, "
        f"bsize={int(x[2])}, "
        f"actF={act_func[int(round(x[3]))]}, "
        f"lossF='{loss_func}', "
        f"optim={optimz[int(round(x[5]))]},  "
        f"Eval={eval:.6f}"
    )

In [6]:
import numpy as np
from keras.models import save_model

# constants
MaxGen   = 35    # generations per trial
MaxTrial = 3     # number of independent trials
numVar   = 6     # number of hyperparameters

# initial step size and adjustment ratio
stepSize_i = 0.82     # initial step size
stepSize_r = 0.82     # step size ratio (Rechenberg constant)

totgen  = 0
foundCnt = 0
objfunc = XOR3_eval   # link to objective function

print("[#neurons, lr, bsize, actfun, lossfun, optmzr]")

for trial in range(MaxTrial):
    print(f"\n************************** Trial # = {trial+1}")
    print(f"Trial {trial+1}: Initializing hyperparameters")
    best_p_val = 9999.0
    xp = np.empty(numVar)  # parent
    xo = np.empty(numVar)  # offspring
    successCnt = 0
    WindowSize = 10
    stepSize = stepSize_i

    # --- initialize hyperparameters (start point) ---
    hparams = [
        (np.random.randint(2, 16), 2, 16),          # hidden neurons
        (round(np.random.uniform(0.01, 1.5), 2), 0.01, 1.5),  # learning rate
        (np.random.randint(1, 5), 1, 5),            # batch size
        (np.random.randint(0, len(act_func)), 0, len(act_func)-1),  # activation
        (0, 0, 0),                                  # loss fixed
        (np.random.randint(0, len(optimz)), 0, len(optimz)-1)       # optimizer
    ]

    for i in range(numVar):
        xp[i] = hparams[i][0]

    # evaluate parent
    print(f"Trial {trial+1}: Evaluating parent")
    p_val = objfunc(xp)
    print(f"Trial {trial+1}: Parent value: {p_val}")

    # --- evolution loop ---
    print(f"Trial {trial+1}: Starting evolution loop")
    for g in range(1, MaxGen + 1):
        print(f"Trial {trial+1}, Gen {g}: Starting generation")

        # 1/5 success rule step-size adaptation
        if (g % WindowSize) == 0:
            print(f"Trial {trial+1}, Gen {g}: Adapting step size")
            if successCnt > (WindowSize * 0.2):
                stepSize /= stepSize_r
            elif successCnt < (WindowSize * 0.2):
                stepSize *= stepSize_r
            successCnt = 0
            print(f"Trial {trial+1}, Gen {g}: New step size: {stepSize}")

        # mutate each hyperparameter with scaled step size
        print(f"Trial {trial+1}, Gen {g}: Mutating hyperparameters")
        for i in range(numVar):
            range_scale = hparams[i][2] - hparams[i][1]
            variation = np.random.normal(0.0, stepSize) * range_scale
            if i == 1:   # learning rate: keep decimals
                xo[i] = round(xp[i] + variation, 2)
            else:
                xo[i] = xp[i] + variation
            xo[i] = np.clip(xo[i], hparams[i][1], hparams[i][2])

        print(f"Trial {trial+1}, Gen {g}: Offspring: {xo}")

        # evaluate offspring
        print(f"Trial {trial+1}, Gen {g}: Evaluating offspring")
        o_val = objfunc(xo)
        print(f"Trial {trial+1}, Gen {g}: Offspring value: {o_val}")

        # selection
        if o_val < p_val:
            xp = xo.copy()
            p_val = o_val
            successCnt += 1
            print(f"Trial {trial+1}, Gen {g}: Offspring accepted, new parent value: {p_val}")
        else:
            print(f"Trial {trial+1}, Gen {g}: Offspring rejected")

        # check acceptable solution
        if p_val < 0.01:
            print(f"Trial {trial+1}: Acceptable solution found after {g} generations:")
            print_HP_found(xp, p_val)
            if p_val < best_p_val:
                best_p_val = p_val
                save_model(model, "bestXOR.keras")
            totgen += g
            foundCnt += 1
            break  # stop this trial early if success

    print(f"Trial {trial+1}: Trial completed")

    # end of one trial
print(f"\nSystem Success = {foundCnt / MaxTrial * 100:.2f}%")
totgen += (MaxTrial - foundCnt) * MaxGen
print(f"Average # of generations used = {totgen / MaxTrial:.0f}")

[#neurons, lr, bsize, actfun, lossfun, optmzr]

************************** Trial # = 1
Trial 1: Initializing hyperparameters
Trial 1: Evaluating parent
Trial 1: Parent value: 0.5678828954696655
Trial 1: Starting evolution loop
Trial 1, Gen 1: Starting generation
Trial 1, Gen 1: Mutating hyperparameters
Trial 1, Gen 1: Offspring: [2.22787589 0.01       1.         0.44285823 0.         2.        ]
Trial 1, Gen 1: Evaluating offspring
Trial 1, Gen 1: Offspring value: 0.8451813459396362
Trial 1, Gen 1: Offspring rejected
Trial 1, Gen 2: Starting generation
Trial 1, Gen 2: Mutating hyperparameters
Trial 1, Gen 2: Offspring: [2.         0.35       1.         0.         0.         1.02298103]
Trial 1, Gen 2: Evaluating offspring
Trial 1, Gen 2: Offspring value: 1.105372667312622
Trial 1, Gen 2: Offspring rejected
Trial 1, Gen 3: Starting generation
Trial 1, Gen 3: Mutating hyperparameters
Trial 1, Gen 3: Offspring: [2.         0.03       1.         0.49522492 0.         2.        ]
Trial 1, G

In [7]:
print (model.predict(X))
print (model.predict(X, verbose=0).round())
(loss, acc) = model.evaluate(Xt, yt)
print (f"loss = {loss}, acc = {acc}")

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 61ms/step
[[0.39795366 0.07171635 0.53032994]
 [0.00811264 0.97381973 0.01806762]
 [0.03539601 0.95990115 0.00470279]
 [0.73307616 0.0134634  0.25346047]
 [0.30675542 0.02110653 0.67213804]
 [0.35637662 0.01848765 0.6251358 ]]
[[0. 0. 1.]
 [0. 1. 0.]
 [0. 1. 0.]
 [1. 0. 0.]
 [0. 0. 1.]
 [0. 0. 1.]]
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 39ms/step - accuracy: 0.8000 - loss: 0.4278
loss = 0.42782267928123474, acc = 0.800000011920929


In [8]:
# model save into a file and reloading the model
model.summary()
model.save('xor3.keras') # the file is stored on the cloud account. Will stay a day

model.load_weights('xor3.keras')
print (model.predict(X))
print (model.predict(X, verbose=0).round())
model.evaluate(Xt, yt)

m2 = keras.models.load_model('xor3.keras')
m2.summary()
print (m2.predict(X))
print (m2.predict(X, verbose=0).round())
(loss, acc) = m2.evaluate(Xt, yt)
print (f"loss = {loss}, acc = {acc}")

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 30ms/step
[[0.39795366 0.07171635 0.53032994]
 [0.00811264 0.97381973 0.01806762]
 [0.03539601 0.95990115 0.00470279]
 [0.73307616 0.0134634  0.25346047]
 [0.30675542 0.02110653 0.67213804]
 [0.35637662 0.01848765 0.6251358 ]]
[[0. 0. 1.]
 [0. 1. 0.]
 [0. 1. 0.]
 [1. 0. 0.]
 [0. 0. 1.]
 [0. 0. 1.]]
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 47ms/step - accuracy: 0.8000 - loss: 0.4278


[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 50ms/step
[[0.39795366 0.07171635 0.53032994]
 [0.00811264 0.97381973 0.01806762]
 [0.03539601 0.95990115 0.00470279]
 [0.73307616 0.0134634  0.25346047]
 [0.30675542 0.02110653 0.67213804]
 [0.35637662 0.01848765 0.6251358 ]]
[[0. 0. 1.]
 [0. 1. 0.]
 [0. 1. 0.]
 [1. 0. 0.]
 [0. 0. 1.]
 [0. 0. 1.]]
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 262ms/step - accuracy: 0.8000 - loss: 0.4278
loss = 0.42782267928123474, acc = 0.800000011920929


In [9]:
# Using 'categorical_crossentropy'

model = Sequential([
    keras.Input(shape=(2,)),
    Dense(16, activation='relu'),
    Dense(3, activation='softmax')
])
model.compile(
  optimizer='rmsprop',
  loss='categorical_crossentropy', # target labels are 1hot encoded format
  metrics=['accuracy']
)

# Converting y to one-hot encoded format with shape (5,3)
y_1hot = keras.utils.to_categorical(y, num_classes=3)
model.fit(X, y_1hot, batch_size=1, epochs=1300, verbose=0)
print (model.predict(X))
print (model.predict(X, verbose=0).round())
yt_1hot = keras.utils.to_categorical(yt, num_classes=3)
model.evaluate(Xt, yt_1hot)

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 69ms/step
[[6.2623304e-01 9.9472404e-03 3.6381966e-01]
 [1.8557267e-04 9.9972326e-01 9.1205882e-05]
 [6.8616186e-04 9.9734300e-01 1.9708136e-03]
 [8.5044229e-01 5.4780149e-04 1.4900997e-01]
 [1.4084108e-01 1.4353599e-03 8.5772353e-01]
 [2.0160155e-01 1.2895619e-03 7.9710889e-01]]
[[1. 0. 0.]
 [0. 1. 0.]
 [0. 1. 0.]
 [1. 0. 0.]
 [0. 0. 1.]
 [0. 0. 1.]]
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 188ms/step - accuracy: 1.0000 - loss: 0.1822


[0.18220749497413635, 1.0]