# The Deep Learning Project

Team members:
- Mellissa HAFIS
- Ilona LEDROGOFF

In [None]:
!wget https://www.lamsade.dauphine.fr/~cazenave/project2026.zip
!unzip project2026.zip
!ls -l

--2026-01-21 18:31:33--  https://www.lamsade.dauphine.fr/~cazenave/project2026.zip
Resolving www.lamsade.dauphine.fr (www.lamsade.dauphine.fr)... 193.48.71.250
Connecting to www.lamsade.dauphine.fr (www.lamsade.dauphine.fr)|193.48.71.250|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: 138578548 (132M) [application/zip]
Saving to: ‘project2026.zip’


2026-01-21 18:31:43 (14.8 MB/s) - ‘project2026.zip’ saved [138578548/138578548]

Archive:  project2026.zip
  inflating: games.data              
  inflating: golois.cpython-312-x86_64-linux-gnu.so  
total 665408
-rw-r--r-- 1 root root 542497580 Oct  7  2022 games.data
-rwxr-xr-x 1 root root    284672 Oct  1 15:09 golois.cpython-312-x86_64-linux-gnu.so
-rw-r--r-- 1 root root 138578548 Oct  1 20:02 project2026.zip
drwxr-xr-x 1 root root      4096 Dec  9 14:42 sample_data


## Some improvements:
### In the feature extractor:
- We change the simple Conv2D into residual blocks
- It's supposed to help the network to reuse the features efficiently and prevent gradient vanishing problem.

### Adding a dropout in the value head
- To prevent overfitting

### Increasing the number of filters
- possible values: 35, 40, 45, 48, ...
- MAX PARMS: 100 000

### Ajouter une mini-conv avant les têtes

In [None]:
import tensorflow as tf
import tensorflow.keras as keras
import numpy as np
from tensorflow.keras import layers
from tensorflow.keras import regularizers
import gc

import golois

planes = 31
moves = 361
N = 50000
epochs = 40
batch = 128
filters = 32

input_data = np.random.randint(2, size=(N, 19, 19, planes))
input_data = input_data.astype ('float32')

policy = np.random.randint(moves, size=(N,))
policy = keras.utils.to_categorical (policy)
policy = policy.astype ('float32')

value = np.random.randint(2, size=(N,))
value = value.astype ('float32')

end = np.random.randint(2, size=(N, 19, 19, 2))
end = end.astype ('float32')

groups = np.zeros((N, 19, 19, 1))
groups = groups.astype ('float32')

print ("Tensorflow version", tf.__version__)
print ("getValidation", flush = True)
golois.getValidation (input_data, policy, value, end)


def residual_block(x, filters):
    shortcut = x
    x = layers.Conv2D(filters, 3, padding='same', activation='relu')(x)
    x = layers.Conv2D(filters, 3, padding='same')(x)
    x = layers.Add()([x, shortcut])
    x = layers.Activation('relu')(x)
    return x

# features extractor

input = keras.Input(shape=(19, 19, planes), name='board')

x = layers.Conv2D(filters, 1, activation='relu', padding='same')(input)
for i in range(5):  # 5 residuel blocks
    x = residual_block(x, filters)


# policy head
policy_head = layers.Conv2D(2, 1, activation='relu', padding='same')(x)
policy_head = layers.Conv2D(1, 1, activation='relu', padding='same', use_bias = False, kernel_regularizer=regularizers.l2(0.0001))(policy_head)
policy_head = layers.Flatten()(policy_head)
policy_head = layers.Activation('softmax', name='policy')(policy_head)

# value head
value_head = layers.Conv2D(2, 1, activation='relu', padding='same')(x)
value_head = layers.Conv2D(1, 1, activation='relu', padding='same', use_bias = False, kernel_regularizer=regularizers.l2(0.0001))(value_head)
value_head = layers.Flatten()(value_head)
value_head = layers.Dense(50, activation='relu', kernel_regularizer=regularizers.l2(0.001))(value_head)
value_head = layers.Dropout(0.3)(value_head)
value_head = layers.Dense(1, activation='sigmoid', name='value', kernel_regularizer=regularizers.l2(0.0001))(value_head)

model = keras.Model(inputs=input, outputs=[policy_head, value_head])

model.summary ()

# compilation
model.compile(optimizer=keras.optimizers.Adam(learning_rate=0.0001),
              loss={'policy': 'categorical_crossentropy', 'value': 'mse'},
              loss_weights={'policy' : 1.0, 'value' : 1.0},
              metrics={'policy': 'categorical_accuracy', 'value': 'mae'})

# training
for i in range (1, epochs + 1):
    print ('epoch ' + str (i))
    golois.getBatch (input_data, policy, value, end, groups, i * N)
    history = model.fit(input_data,
                        [policy,value],
                        epochs=1, batch_size=batch)
    if (i % 5 == 0):
        gc.collect ()
    if (i % 20 == 0):
        golois.getValidation (input_data, policy, value, end)
        val = model.evaluate (input_data,
                              [policy, value], verbose = 0, batch_size=batch)
        print ("val =", val)
        model.save ('test.keras')

Tensorflow version 2.19.0
getValidation


epoch 1
[1m391/391[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m22s[0m 30ms/step - loss: 5.5597 - policy_categorical_accuracy: 0.0236 - policy_loss: 5.3685 - value_loss: 0.1206 - value_mae: 0.2936
epoch 2
[1m391/391[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m8s[0m 19ms/step - loss: 4.3323 - policy_categorical_accuracy: 0.1366 - policy_loss: 4.1879 - value_loss: 0.1191 - value_mae: 0.2905
epoch 3
[1m391/391[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m8s[0m 20ms/step - loss: 4.0724 - policy_categorical_accuracy: 0.1651 - policy_loss: 3.9441 - value_loss: 0.1202 - value_mae: 0.2924
epoch 4
[1m391/391[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m8s[0m 20ms/step - loss: 3.8656 - policy_categorical_accuracy: 0.1889 - policy_loss: 3.7426 - value_loss: 0.1207 - value_mae: 0.2934
epoch 5
[1m391/391[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m8s[0m 21ms/step - loss: 3.7554 - policy_categorical_accuracy: 0.2032 - policy_loss: 3.6336 - value_loss: 0.1211 - value_mae: 0.2940

In [None]:
model.save('Mellissa_HAFIS-Ilona_LEDROGOFF_ResNet_NoBatch.h5')

