In [13]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Flatten, Dense, Dropout, BatchNormalization
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.callbacks import LearningRateScheduler
import random
import xgboost as xgb
from sklearn.model_selection import train_test_split

# Setting a random seed for reproducibility
np.random.seed(42)

###################### -- HELPER FUNCTIONS -- ######################
def merge_predictions(preds_a, preds_b):
    merged_predictions = []
    for i in range(len(preds_a)):
        
        # Replacing 9 back to 24 if needed
        decoded_a = preds_a[i] if preds_a[i] != 9 else 24
        decoded_b = preds_b[i] if preds_b[i] != 9 else 24

        ascii_a = decoded_a + 65
        ascii_b = decoded_b + 65

        sum_pred = normalize_ascii_sum(ascii_a + ascii_b)
        merged_predictions.append((i, chr(sum_pred)))
    return merged_predictions

def separate_test_sets(file_path):
    test_data = pd.read_csv(file_path)
    test_a_columns = [col for col in test_data.columns if 'pixel_a' in col]
    test_b_columns = [col for col in test_data.columns if 'pixel_b' in col]
    test_a = test_data[test_a_columns]
    test_b = test_data[test_b_columns]
    test_a.columns = [col.replace('_a', '') for col in test_a.columns]
    test_b.columns = [col.replace('_b', '') for col in test_b.columns]
    return test_a, test_b

def normalize_ascii_sum(ascii_sum):
    while ascii_sum > 122:  # 'z' is ASCII 122
        ascii_sum -= 65  # 122 ('z') - 65 ('A') + 1
    return int(ascii_sum)

def save_predictions_to_csv(filename, predictions):
    with open(filename, 'w') as file:
        file.write("id,label\n")
        for id, label in predictions:
            file.write(f"{id},{label}\n")

def entropy(p):
    return - (p * np.log2(p) + (1 - p) * np.log2(1 - p)) if 0 < p < 1 else 0
 
###################### -- CNN IMPLEMENTATION -- ######################

class CNNClassifier:
    def __init__(self, input_shape, num_classes):
        self.model = self._build_model(input_shape, num_classes)

    def _build_model(self, input_shape, num_classes):
        model = Sequential([
            Conv2D(32, (3, 3), activation='relu', input_shape=input_shape, padding='same'),
            BatchNormalization(),
            Conv2D(32, (3, 3), activation='relu', padding='same'),
            BatchNormalization(),
            MaxPooling2D((2, 2)),
            Dropout(0.25),
            Conv2D(64, (3, 3), activation='relu', padding='same'),
            BatchNormalization(),
            Conv2D(64, (3, 3), activation='relu', padding='same'),
            BatchNormalization(),
            MaxPooling2D((2, 2)),
            Dropout(0.25),
            Flatten(),
            Dense(512, activation='relu'),
            BatchNormalization(),
            Dropout(0.5),
            Dense(num_classes, activation='softmax')
        ])

        optimizer = Adam(learning_rate=0.001, beta_1=0.9, beta_2=0.999)
        model.compile(optimizer=optimizer, loss="categorical_crossentropy", metrics=["accuracy"])
        return model

    def train(self, x_train, y_train, x_val, y_val, epochs=50, batch_size=64):
        datagen = ImageDataGenerator(
            rotation_range=10,
            zoom_range=0.1,
            width_shift_range=0.1,
            height_shift_range=0.1
        )
        datagen.fit(x_train)
        reduce_lr = LearningRateScheduler(lambda x: 1e-3 * 0.9 ** x)
        self.model.fit(
            datagen.flow(x_train, y_train, batch_size=batch_size),
            epochs=epochs,
            validation_data=(x_val, y_val),
            callbacks=[reduce_lr]
        )

    def predict(self, x_test):
        return self.model.predict(x_test)

def startCNN(X, Y, input_shape, num_classes):
    X_train, X_val, Y_train, Y_val = train_test_split(X, Y, test_size=0.1, random_state=42)
    cnn = CNNClassifier(input_shape, num_classes)
    cnn.train(X_train, Y_train, X_val, Y_val)
    return cnn

###################### -- XGBOOST Implementation -- ######################

class XGBoostClassifier:
    def __init__(self, max_depth=7, eta=0.1, num_class=24):
        self.params = {
            'objective': 'multi:softmax',
            'num_class': num_class,
            'booster': 'gbtree',
            'eval_metric': 'merror',
            'eta': eta,
            'max_depth': max_depth,
        }

    def train(self, train_data, train_labels, validation_data, validation_labels):
        dtrain = xgb.DMatrix(train_data, label=train_labels)
        dval = xgb.DMatrix(validation_data, label=validation_labels)
        watchlist = [(dtrain, 'train'), (dval, 'validation')]
        self.model = xgb.train(self.params, dtrain, num_boost_round=200, evals=watchlist, early_stopping_rounds=20, verbose_eval=False)

    def compute_predictions(self, data):
        ddata = xgb.DMatrix(data)
        return self.model.predict(ddata)

    def compute_accuracy(self, preds, labels):
        return np.mean(preds == labels)

def startXGBOOST(train_data_normalized, train_labels, validation_data_normalized, validation_labels):
    learning_rates = [0.01, 0.05, 0.1, 0.2, 0.3]
    max_depths = [3, 4, 5, 6, 7, 8, 9, 10]
    learning_rates = [0.1]
    max_depths = [7]
    results_XGB = []
    best_acc_xgb = 0
    best_lr = None
    best_depth = None
    num_class = len(np.unique(train_labels))
    print(num_class)

    for lr in learning_rates:
        for depth in max_depths:
            xgb_model = XGBoostClassifier(max_depth=depth, eta=lr, num_class=num_class)
            xgb_model.train(train_data_normalized, train_labels, validation_data_normalized, validation_labels)
            val_preds = xgb_model.compute_predictions(validation_data_normalized)
            acc = xgb_model.compute_accuracy(val_preds, validation_labels)
            print(f"LR: {lr} ; Depth: {depth} ; Accuracy: {acc * 100:.2f}%")
            if acc > best_acc_xgb:
                best_acc_xgb = acc
                best_lr = lr
                best_depth = depth
                print("New best")
            results_XGB.append((lr, depth, acc))

    best_xgb_model = XGBoostClassifier(max_depth=best_depth, eta=best_lr, num_class=num_class)
    best_xgb_model.train(train_data_normalized, train_labels, validation_data_normalized, validation_labels)

    return best_xgb_model

###################### -- DATA HANDLING AND TRAINING -- ######################

# Load your dataset here
mnist_sign_train = pd.read_csv('sign_mnist_train.csv')
test_a, test_b = separate_test_sets('test.csv')

mnist_sign_train['label'] = mnist_sign_train['label'].replace(24, 9) ## Because

# Préparation des données de training pour XGBoost & CNN
features_xgb = mnist_sign_train.drop('label', axis=1).values / 255.0
labels_xgb = mnist_sign_train['label'].values
features_cnn = features_xgb.reshape((-1, 28, 28, 1))
labels_cnn = tf.keras.utils.to_categorical(labels_xgb, num_classes=25)

# Préparation des données de test pour XGBoost & CNN
normalized_test_a_cnn = test_a.values.reshape((-1, 28, 28, 1)) / 255.0
normalized_test_b_cnn = test_b.values.reshape((-1, 28, 28, 1)) / 255.0
normalized_test_a_xgb = test_a.values / 255.0
normalized_test_b_xgb = test_b.values / 255.0

#Split Training Validation pour XGB
split_index = int(0.8 * len(features_xgb))
x_train_xgb, x_val_xgb = features_xgb[:split_index], features_xgb[split_index:]
y_train_xgb, y_val_xgb = labels_xgb[:split_index], labels_xgb[split_index:]


In [62]:
# Train XGB Model
best_xgb_model = startXGBOOST(x_train_xgb, y_train_xgb, x_val_xgb, y_val_xgb)

LR: 0.01 ; Depth: 3 ; Accuracy: 77.13%
New best
LR: 0.01 ; Depth: 4 ; Accuracy: 87.91%
New best
LR: 0.01 ; Depth: 5 ; Accuracy: 92.73%
New best
LR: 0.01 ; Depth: 6 ; Accuracy: 95.16%
New best
LR: 0.01 ; Depth: 7 ; Accuracy: 96.30%
New best
LR: 0.01 ; Depth: 8 ; Accuracy: 96.78%
New best
LR: 0.01 ; Depth: 9 ; Accuracy: 97.32%
New best
LR: 0.01 ; Depth: 10 ; Accuracy: 97.47%
New best
LR: 0.05 ; Depth: 3 ; Accuracy: 95.81%
LR: 0.05 ; Depth: 4 ; Accuracy: 98.22%
New best
LR: 0.05 ; Depth: 5 ; Accuracy: 98.83%
New best
LR: 0.05 ; Depth: 6 ; Accuracy: 99.11%
New best
LR: 0.05 ; Depth: 7 ; Accuracy: 99.27%
New best
LR: 0.05 ; Depth: 8 ; Accuracy: 99.18%
LR: 0.05 ; Depth: 9 ; Accuracy: 99.25%
LR: 0.05 ; Depth: 10 ; Accuracy: 99.29%
New best
LR: 0.1 ; Depth: 3 ; Accuracy: 98.63%
LR: 0.1 ; Depth: 4 ; Accuracy: 99.27%
LR: 0.1 ; Depth: 5 ; Accuracy: 99.38%
New best
LR: 0.1 ; Depth: 6 ; Accuracy: 99.42%
New best
LR: 0.1 ; Depth: 7 ; Accuracy: 99.54%
New best
LR: 0.1 ; Depth: 8 ; Accuracy: 99.53%
LR

In [53]:
# Train CNN Model
cnn_model = startCNN(features_cnn, labels_cnn, input_shape=(28, 28, 1), num_classes=25)


Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 30/50
Epoch 31/50
Epoch 32/50
Epoch 33/50
Epoch 34/50
Epoch 35/50
Epoch 36/50
Epoch 37/50
Epoch 38/50
Epoch 39/50
Epoch 40/50
Epoch 41/50
Epoch 42/50
Epoch 43/50
Epoch 44/50
Epoch 45/50
Epoch 46/50
Epoch 47/50
Epoch 48/50
Epoch 49/50
Epoch 50/50


In [63]:
# XGBoost Predictions
xgb_preds_a = best_xgb_model.compute_predictions(normalized_test_a_xgb)
xgb_preds_b = best_xgb_model.compute_predictions(normalized_test_b_xgb)
xgb_merged_predictions = merge_predictions(xgb_preds_a, xgb_preds_b)

In [54]:
# CNN Predictions
cnn_preds_a = cnn_model.predict(normalized_test_a_cnn)
cnn_preds_b = cnn_model.predict(normalized_test_b_cnn)
cnn_merged_predictions = merge_predictions(np.argmax(cnn_preds_a, axis=1), np.argmax(cnn_preds_b, axis=1))



In [64]:
# Save XGB
save_predictions_to_csv("xgb_predictions.csv", xgb_merged_predictions)


In [55]:
# Save CNN
save_predictions_to_csv("cnn_predictions.csv", cnn_merged_predictions)
