In [24]:
train_file_path = 'sign_mnist_train2.csv'
test_file_path = 'test2.csv'

In [None]:
train_file_path = 'https://raw.githubusercontent.com/Herbrax/Kaggle_MNIST_Sign/main/sign_mnist_train2.csv'
test_file_path = 'https://raw.githubusercontent.com/Herbrax/Kaggle_MNIST_Sign/main/test2.csv'

In [18]:
train_file_path = 'sign_mnist_train.csv'
test_file_path = 'test.csv'

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import xgboost as xgb

# Setting a random seed for reproducibility
np.random.seed(42)
split_ratio = 0.8

###################### -- HELPER FUNCTIONS -- ######################

def separate_test_sets(file_path):
    test_data = pd.read_csv(file_path)
    test_a_columns = [col for col in test_data.columns if 'pixel_a' in col]
    test_b_columns = [col for col in test_data.columns if 'pixel_b' in col]
    test_a = test_data[test_a_columns]
    test_b = test_data[test_b_columns]
    test_a.columns = [col.replace('_a', '') for col in test_a.columns]
    test_b.columns = [col.replace('_b', '') for col in test_b.columns]
    return test_a, test_b

def predict_and_merge(model, test_a, test_b):
    preds_a = model.compute_predictions(test_a)
    preds_b = model.compute_predictions(test_b)
    merged_predictions = []
    for i in range(len(preds_a)):
        # Replacing 9 back to 24 if needed
        decoded_a = preds_a[i] if preds_a[i] != 9 else 24
        decoded_b = preds_b[i] if preds_b[i] != 9 else 24

        ascii_a = decoded_a + 65
        ascii_b = decoded_b + 65
        sum_pred = normalize_ascii_sum(ascii_a + ascii_b)
        merged_predictions.append((i, chr(sum_pred)))
    return merged_predictions


def predict_and_merge2(model, test_a, test_b):
    preds_a = model.compute_predictions(test_a)
    preds_b = model.compute_predictions(test_b)
    merged_predictions = []
    for i in range(len(preds_a)):
        decoded_a = preds_a[i] if preds_a[i] != 9 else 24
        decoded_b = preds_b[i] if preds_b[i] != 9 else 24
        sum_pred = decoded_a + decoded_b
        print(decoded_a,";",decoded_b,"----------------------",sum_pred)
        merged_predictions.append((i, sum_pred))
    return merged_predictions

def save_predictions_to_csv(filename, predictions):
    with open(filename, 'w') as file:
        file.write("id,label\n")
        for id, label in predictions:
            file.write(f"{id},{label}\n")

def normalize_ascii_sum(ascii_sum):
    while ascii_sum > 122:  # 'z' is ASCII 122
        ascii_sum -= 65  # 122 ('z') - 65 ('A') + 1
    return int(ascii_sum)

def plot_results_XGB(results):
    learning_rates = [x[0] for x in results]
    max_depths = [x[1] for x in results]
    accuracies = [x[2] for x in results]
    plt.figure(figsize=(10, 6))
    for lr in set(learning_rates):
        specific_lr_depths = [depth for depth, l_rate in zip(max_depths, learning_rates) if l_rate == lr]
        specific_acc = [acc for acc, l_rate in zip(accuracies, learning_rates) if l_rate == lr]
        plt.plot(specific_lr_depths, specific_acc, label=f'Learning Rate {lr}')
    plt.title('Accuracy for different max depths and learning rates')
    plt.xlabel('Max Depth')
    plt.ylabel('Accuracy')
    plt.legend()
    plt.show()



###################### -- Data Handling -- ######################

mnist_sign_train = pd.read_csv(train_file_path)
mnist_sign_train['label'] = mnist_sign_train['label'].replace(24, 9)
mnist_sign_train = mnist_sign_train.sample(frac=1).reset_index(drop=True)
labels = mnist_sign_train['label'].values
features = mnist_sign_train.drop('label', axis=1).values

features_normalized = features / 255.0
split_index = int(split_ratio * len(features_normalized))
train_data, validation_data = features_normalized[:split_index], features_normalized[split_index:]
train_labels, validation_labels = labels[:split_index], labels[split_index:]

train_mean = train_data.mean(axis=0)
train_std = train_data.std(axis=0)
train_data_normalized = (train_data - train_mean) / train_std
validation_data_normalized = (validation_data - train_mean) / train_std

test_a, test_b = separate_test_sets(test_file_path)
normalized_test_a = (test_a.values / 255.0 - train_mean) / train_std
normalized_test_b = (test_b.values / 255.0 - train_mean) / train_std

###################### -- XGBOOST Implementation -- ######################

class XGBoostClassifier:
    def __init__(self, max_depth, eta, num_class):
        self.params = {
            'objective': 'multi:softmax',
            'num_class': num_class,
            'booster': 'gbtree',
            'eval_metric': 'merror',
            'eta': eta,
            'max_depth': max_depth,
        }

    def train(self, train_data, train_labels, validation_data, validation_labels):
        dtrain = xgb.DMatrix(train_data, label=train_labels)
        dval = xgb.DMatrix(validation_data, label=validation_labels)
        watchlist = [(dtrain, 'train'), (dval, 'validation')]
        self.model = xgb.train(self.params, dtrain, num_boost_round=200, evals=watchlist, early_stopping_rounds=20, verbose_eval=False)

    def compute_predictions(self, data):
        ddata = xgb.DMatrix(data)
        return self.model.predict(ddata)

    def compute_accuracy(self, preds, labels):
        return np.mean(preds == labels)

###################### -- TRAINING -- ######################

def startXGBOOST(train_data_normalized, train_labels, validation_data_normalized, validation_labels):
    learning_rates = [0.01, 0.05, 0.1, 0.2, 0.3]
    max_depths = [3, 4, 5, 6, 7, 8, 9, 10]
    learning_rates = [0.3]
    max_depths = [4]
    results_XGB = []
    best_acc_xgb = 0
    best_lr = None
    best_depth = None
    num_class = len(np.unique(train_labels))

    for lr in learning_rates:
        for depth in max_depths:
            xgb_model = XGBoostClassifier(max_depth=depth, eta=lr, num_class=num_class)
            xgb_model.train(train_data_normalized, train_labels, validation_data_normalized, validation_labels)
            val_preds = xgb_model.compute_predictions(validation_data_normalized)
            acc = xgb_model.compute_accuracy(val_preds, validation_labels)
            print(f"LR: {lr} ; Depth: {depth} ; Accuracy: {acc * 100:.2f}%")
            if acc > best_acc_xgb:
                best_acc_xgb = acc
                best_lr = lr
                best_depth = depth
                print("New best")
            results_XGB.append((lr, depth, acc))

    #plot_results_XGB(results_XGB)

    best_xgb_model = XGBoostClassifier(max_depth=best_depth, eta=best_lr, num_class=num_class)
    best_xgb_model.train(train_data_normalized, train_labels, validation_data_normalized, validation_labels)

    return best_xgb_model,best_lr,best_depth

#best_xgb_model = startXGBOOST(train_data_normalized, train_labels, validation_data_normalized, validation_labels)

#final_predictions = predict_and_merge(best_xgb_model, normalized_test_a, normalized_test_b)

#save_merged_predictions_to_csv("merged_predictions.csv", final_predictions)


In [26]:
print(train_file_path)
print(test_file_path)
best_xgb_model, best_lr, best_depth = startXGBOOST(train_data_normalized, train_labels, validation_data_normalized, validation_labels)

sign_mnist_train2.csv
test2.csv
LR: 0.3 ; Depth: 4 ; Accuracy: 83.50%
New best


In [27]:
output = f"{best_lr}_{best_depth}.csv"
final_predictions = predict_and_merge(best_xgb_model, normalized_test_a, normalized_test_b)
save_predictions_to_csv(output, final_predictions)

In [None]:
final_predictions2 = predict_and_merge2(best_xgb_model, normalized_test_a, normalized_test_b)
save_predictions_to_csv("numsum_preds.csv", final_predictions2)


#23;17     40                        ONLY WRONG ONE, 17 est prédit comme 18.
#18;23     41
#13;20     33
#12;11     23
#19;14     33

##Testing with testcustom.csv

CNN Basic Implementation

In [5]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Flatten, Dense, Dropout
from tensorflow.keras.layers import BatchNormalization

# Setting a random seed for reproducibility
np.random.seed(42)
split_ratio = 0.8

###################### -- HELPER FUNCTIONS -- ######################

def separate_test_sets(file_path):
    test_data = pd.read_csv(file_path)
    test_a_columns = [col for col in test_data.columns if 'pixel_a' in col]
    test_b_columns = [col for col in test_data.columns if 'pixel_b' in col]
    test_a = test_data[test_a_columns]
    test_b = test_data[test_b_columns]
    test_a.columns = [col.replace('_a', '') for col in test_a.columns]
    test_b.columns = [col.replace('_b', '') for col in test_b.columns]
    return test_a, test_b

def normalize_ascii_sum(ascii_sum):
    while ascii_sum > 122:  # 'z' is ASCII 122
        ascii_sum -= 65  # 122 ('z') - 65 ('A') + 1
    return int(ascii_sum)

def save_predictions_to_csv(filename, predictions):
    with open(filename, 'w') as file:
        file.write("id,label\n")
        for id, label in predictions:
            file.write(f"{id},{label}\n")

###################### -- CNN Implementation -- ######################

def build_cnn_model(input_shape, num_classes):
    model = Sequential([
        Conv2D(32, (3, 3), activation='relu', input_shape=input_shape),
        MaxPooling2D((2, 2)),
        BatchNormalization(),
        Conv2D(64, (3, 3), activation='relu'),
        MaxPooling2D((2, 2)),
        BatchNormalization(),
        Flatten(),
        Dense(128, activation='relu'),
        Dropout(0.5),
        Dense(num_classes, activation='softmax')
    ])
    model.compile(optimizer='adam', 
                  loss='sparse_categorical_crossentropy', 
                  metrics=['accuracy'])
    return model


###################### -- Data Handling -- ######################

# Load your dataset here
mnist_sign_train = pd.read_csv('sign_mnist_train.csv')
mnist_sign_train['label'] = mnist_sign_train['label'].replace(24, 9)  # Replace 24 with 9
mnist_sign_train = mnist_sign_train.sample(frac=1).reset_index(drop=True)

# Prepare the data
labels = mnist_sign_train['label'].values
features = mnist_sign_train.drop('label', axis=1).values

# Normalize features
features_normalized = features / 255.0

# Split data into training and validation sets
split_index = int(split_ratio * len(features_normalized))
train_data, validation_data = features_normalized[:split_index], features_normalized[split_index:]
train_labels, validation_labels = labels[:split_index], labels[split_index:]

# Reshape data for CNN input
train_images = train_data.reshape((-1, 28, 28, 1))
validation_images = validation_data.reshape((-1, 28, 28, 1))

###################### -- CNN Training -- ######################

# Define and build CNN model
input_shape = (28, 28, 1)
num_classes = len(np.unique(train_labels))
cnn_model = build_cnn_model(input_shape, num_classes)

# Train the CNN model
cnn_model.fit(train_images, train_labels, epochs=50, validation_data=(validation_images, validation_labels))

###################### -- Predict and Save to CSV -- ######################

# Prepare test data
test_a, test_b = separate_test_sets('test.csv')
normalized_test_a = test_a.values.reshape((-1, 28, 28, 1)) / 255.0
normalized_test_b = test_b.values.reshape((-1, 28, 28, 1)) / 255.0

# Predict on test data
preds_a = cnn_model.predict(normalized_test_a)
preds_b = cnn_model.predict(normalized_test_b)

# Convert predictions to labels
preds_a_labels = np.argmax(preds_a, axis=1)
preds_b_labels = np.argmax(preds_b, axis=1)

# Replacing 9 back to 24 if needed, and ASCII manipulation
merged_predictions = []
for i in range(len(preds_a_labels)):
    decoded_a = preds_a_labels[i] if preds_a_labels[i] != 9 else 24
    decoded_b = preds_b_labels[i] if preds_b_labels[i] != 9 else 24

    ascii_a = decoded_a + 65
    ascii_b = decoded_b + 65
    sum_pred = normalize_ascii_sum(ascii_a + ascii_b)
    merged_predictions.append((i, chr(sum_pred)))
   


Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 30/50
Epoch 31/50
Epoch 32/50
Epoch 33/50
Epoch 34/50
Epoch 35/50
Epoch 36/50
Epoch 37/50
Epoch 38/50
Epoch 39/50
Epoch 40/50
Epoch 41/50
Epoch 42/50
Epoch 43/50
Epoch 44/50
Epoch 45/50
Epoch 46/50
Epoch 47/50
Epoch 48/50
Epoch 49/50
Epoch 50/50


In [8]:
save_predictions_to_csv("cnnn_predictions.csv", merged_predictions)
   

CNN with K-Fold Validation

In [9]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Flatten, Dense, Dropout, BatchNormalization
from sklearn.model_selection import KFold

# Setting a random seed for reproducibility
np.random.seed(42)
split_ratio = 0.8

###################### -- HELPER FUNCTIONS -- ######################

def separate_test_sets(file_path):
    test_data = pd.read_csv(file_path)
    test_a_columns = [col for col in test_data.columns if 'pixel_a' in col]
    test_b_columns = [col for col in test_data.columns if 'pixel_b' in col]
    test_a = test_data[test_a_columns]
    test_b = test_data[test_b_columns]
    test_a.columns = [col.replace('_a', '') for col in test_a.columns]
    test_b.columns = [col.replace('_b', '') for col in test_b.columns]
    return test_a, test_b

def normalize_ascii_sum(ascii_sum):
    while ascii_sum > 122:  # 'z' is ASCII 122
        ascii_sum -= 65  # 122 ('z') - 65 ('A') + 1
    return int(ascii_sum)

def save_predictions_to_csv(filename, predictions):
    with open(filename, 'w') as file:
        file.write("id,label\n")
        for id, label in predictions:
            file.write(f"{id},{label}\n")

###################### -- CNN Implementation -- ######################

def build_cnn_model(input_shape, num_classes):
    model = Sequential([
        Conv2D(32, (3, 3), activation='relu', input_shape=input_shape),
        MaxPooling2D((2, 2)),
        BatchNormalization(),
        Conv2D(64, (3, 3), activation='relu'),
        MaxPooling2D((2, 2)),
        BatchNormalization(),
        Flatten(),
        Dense(128, activation='relu'),
        Dropout(0.5),
        Dense(num_classes, activation='softmax')
    ])
    model.compile(optimizer='adam', 
                  loss='sparse_categorical_crossentropy', 
                  metrics=['accuracy'])
    return model

###################### -- Data Handling and CNN Training with Cross-Validation -- ######################

# Load your dataset here
mnist_sign_train = pd.read_csv('sign_mnist_train.csv')
mnist_sign_train['label'] = mnist_sign_train['label'].replace(24, 9)  # Replace 24 with 9
mnist_sign_train = mnist_sign_train.sample(frac=1).reset_index(drop=True)

# Prepare the data
labels = mnist_sign_train['label'].values
features = mnist_sign_train.drop('label', axis=1).values

# Normalize features
features_normalized = features / 255.0
images = features_normalized.reshape((-1, 28, 28, 1))

# Define k-fold cross-validation
kfold = KFold(n_splits=20, shuffle=True, random_state=42)
fold_no = 1
num_classes = len(np.unique(labels))

for train, test in kfold.split(images, labels):
    train_images, test_images = images[train], images[test]
    train_labels, test_labels = labels[train], labels[test]

    # Define and build CNN model
    cnn_model = build_cnn_model(input_shape=(28, 28, 1), num_classes=num_classes)

    # Train the CNN model
    print(f'Training for fold {fold_no} ...')
    cnn_model.fit(train_images, train_labels, epochs=10, validation_data=(test_images, test_labels))

    # Increase fold number
    fold_no += 1

# Save the final model if needed
# cnn_model.save('path_to_my_model.h5')

###################### -- Predict and Save to CSV -- ######################

# Prepare test data
test_a, test_b = separate_test_sets('test.csv')
normalized_test_a = test_a.values.reshape((-1, 28, 28, 1)) / 255.0
normalized_test_b = test_b.values.reshape((-1, 28, 28, 1)) / 255.0

# Predict on test data
preds_a = cnn_model.predict(normalized_test_a)
preds_b = cnn_model.predict(normalized_test_b)

# Convert predictions to labels
preds_a_labels = np.argmax(preds_a, axis=1)
preds_b_labels = np.argmax(preds_b, axis=1)

# Replacing 9 back to 24 if needed, and ASCII manipulation
merged_predictions = []
for i in range(len(preds_a_labels)):
    decoded_a = preds_a_labels[i] if preds_a_labels[i] != 9 else 24
    decoded_b = preds_b_labels[i] if preds_b_labels[i] != 9 else 24

    ascii_a = decoded_a + 65
    ascii_b = decoded_b + 65
    sum_pred = normalize_ascii_sum(ascii_a + ascii_b)
    merged_predictions.append((i, chr(sum_pred)))


Training for fold 1 ...
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Training for fold 2 ...
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Training for fold 3 ...
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Training for fold 4 ...
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Training for fold 5 ...
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


In [10]:
save_predictions_to_csv("cnn_fold__predictions.csv", merged_predictions)


CNN adapted from https://github.com/brendanartley/Medium-Article-Code/blob/main/code/mnist-keras-cnn-99-6.ipynb

In [15]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Flatten, Dense, Dropout, BatchNormalization
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.callbacks import LearningRateScheduler

# Setting a random seed for reproducibility
np.random.seed(42)

###################### -- HELPER FUNCTIONS -- ######################

def separate_test_sets(file_path):
    test_data = pd.read_csv(file_path)
    test_a_columns = [col for col in test_data.columns if 'pixel_a' in col]
    test_b_columns = [col for col in test_data.columns if 'pixel_b' in col]
    test_a = test_data[test_a_columns]
    test_b = test_data[test_b_columns]
    test_a.columns = [col.replace('_a', '') for col in test_a.columns]
    test_b.columns = [col.replace('_b', '') for col in test_b.columns]
    return test_a, test_b

def normalize_ascii_sum(ascii_sum):
    while ascii_sum > 122:  # 'z' is ASCII 122
        ascii_sum -= 65  # 122 ('z') - 65 ('A') + 1
    return int(ascii_sum)

def save_predictions_to_csv(filename, predictions):
    with open(filename, 'w') as file:
        file.write("id,label\n")
        for id, label in predictions:
            file.write(f"{id},{label}\n")

###################### -- CNN Implementation -- ######################

def build_cnn_model(input_shape, num_classes):
    model = Sequential([
        Conv2D(32, (3, 3), activation='relu', input_shape=input_shape, padding='same'),
        BatchNormalization(),
        Conv2D(32, (3, 3), activation='relu', padding='same'),
        BatchNormalization(),
        MaxPooling2D((2, 2)),
        Dropout(0.25),

        Conv2D(64, (3, 3), activation='relu', padding='same'),
        BatchNormalization(),
        Conv2D(64, (3, 3), activation='relu', padding='same'),
        BatchNormalization(),
        MaxPooling2D((2, 2)),
        Dropout(0.25),

        Flatten(),
        Dense(512, activation='relu'),
        BatchNormalization(),
        Dropout(0.5),
        Dense(num_classes, activation='softmax')
    ])

    optimizer = Adam(learning_rate=0.001, beta_1=0.9, beta_2=0.999)
    model.compile(optimizer=optimizer, loss="categorical_crossentropy", metrics=["accuracy"])
    return model

###################### -- Data Handling and CNN Training -- ######################

# Load your dataset here
mnist_sign_train = pd.read_csv('sign_mnist_train.csv')
mnist_sign_train['label'] = mnist_sign_train['label'].replace(24, 9)  # Replace 24 with 9
mnist_sign_train = mnist_sign_train.sample(frac=1).reset_index(drop=True)

# Prepare the data
labels = mnist_sign_train['label'].values
features = mnist_sign_train.drop('label', axis=1).values

# Normalize features and one-hot encode the labels
features_normalized = features / 255.0
images = features_normalized.reshape((-1, 28, 28, 1))
labels_encoded = tf.keras.utils.to_categorical(labels, num_classes=25)

# Data Augmentation
datagen = ImageDataGenerator(
    rotation_range=10,
    zoom_range=0.1,
    width_shift_range=0.1,
    height_shift_range=0.1
)

# Define and build CNN model
input_shape = (28, 28, 1)
num_classes = 25  # 24 classes + 1 for the replaced label
cnn_model = build_cnn_model(input_shape, num_classes)

# Learning Rate Scheduler
reduce_lr = LearningRateScheduler(lambda x: 1e-3 * 0.9 ** x)

# Train the CNN model
batch_size = 64
epochs = 50
history = cnn_model.fit(
    datagen.flow(images, labels_encoded, batch_size=batch_size),
    epochs=epochs,
    steps_per_epoch=images.shape[0] // batch_size,
    callbacks=[reduce_lr]
)


###################### -- Predict and Save to CSV -- ######################

# Prepare test data
test_a, test_b = separate_test_sets('test.csv')
normalized_test_a = test_a.values.reshape((-1, 28, 28, 1)) / 255.0
normalized_test_b = test_b.values.reshape((-1, 28, 28, 1)) / 255.0

# Predict on test data
preds_a = cnn_model.predict(normalized_test_a)
preds_b = cnn_model.predict(normalized_test_b)

# Convert predictions to labels
preds_a_labels = np.argmax(preds_a, axis=1)
preds_b_labels = np.argmax(preds_b, axis=1)

# Replacing 9 back to 24 if needed, and ASCII manipulation
merged_predictions = []
for i in range(len(preds_a_labels)):
    decoded_a = preds_a_labels[i] if preds_a_labels[i] != 9 else 24
    decoded_b = preds_b_labels[i] if preds_b_labels[i] != 9 else 24

    ascii_a = decoded_a + 65
    ascii_b = decoded_b + 65
    sum_pred = normalize_ascii_sum(ascii_a + ascii_b)
    merged_predictions.append((i, chr(sum_pred)))


Epoch 1/50

In [None]:
# Save predictions to CSV file
save_predictions_to_csv("cnn_predictions.csv", merged_predictions)