# **RANDOM GUESSING MODEL**

The most trivial idea for our task is to use a random guessing model.\
Since we have eight labels, we expect an accuracy of 0.125.\
Let's see

Firstly we import all the necessary libraries

In [None]:
# Fix randomness and hide warnings
seed = 42

import os
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3'
os.environ['PYTHONHASHSEED'] = str(seed)
os.environ['MPLCONFIGDIR'] = os.getcwd()+'/configs/'

import warnings
warnings.simplefilter(action='ignore', category=FutureWarning)
warnings.simplefilter(action='ignore', category=Warning)

import numpy as np
np.random.seed(seed)

import logging

import random
random.seed(seed)

In [None]:
# Import tensorflow
import tensorflow as tf
tf.random.set_seed(seed)
tf.compat.v1.set_random_seed(seed)
print(tf.__version__)

2.16.1


In [None]:
# Import some other useful libraries or functions
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.metrics import f1_score
import numpy as np

## Data preparation

This is the most classical data preparation pipeline:
1. Load the data
2. Split the data into samples and images
3. Normalize the samples
4. Convert the labels to categorical
5. Build a **tf.Data.Dataset** for performance boosting (what a performace)

In [None]:
data_path = "/kaggle/input/blood-cells/cleaned_training_set.npz"

In [None]:
data = np.load(data_path, allow_pickle=True)
lst = data.files
X = data[lst[0]]
y = data[lst[1]]

# Convert values in data to int
X = X.astype(int)

# Normalize data to the range [0, 1]
X = (X / 255).astype('float32')

# Convert labels to categorical format using one-hot encoding
y = tf.keras.utils.to_categorical(y)

In [None]:
batch_size = 256

In [None]:
dataset = tf.data.Dataset.from_tensor_slices((X, y)).cache().shuffle(20480).batch(batch_size)

## Model definition

In [None]:
def random_guessing_model(dataset, n_labels=8):

    """
    This function takes a dataset and predict the class of each image by random choices.
    It also compute the accuracy and the F1 score
    """

    all_true_labels = []
    all_random_predictions = []

    for images, labels in dataset:

        true_labels = np.argmax(labels.numpy(), axis=1)  # to integer

        # Predictions
        random_predictions = np.random.randint(0, n_labels, size=true_labels.shape)

        all_true_labels.extend(true_labels)
        all_random_predictions.extend(random_predictions)

    all_true_labels = np.array(all_true_labels)
    all_random_predictions = np.array(all_random_predictions)

    accuracy = np.mean(all_true_labels == all_random_predictions)
    f1 = f1_score(all_true_labels, all_random_predictions, average='macro')

    return {
        "accuracy": accuracy,
        "f1_score": f1
    }

In [None]:
scores = random_guessing_model(dataset)
print(f"\nAccuracy: {scores['accuracy']:.2f}\nF1-score: {scores['f1_score']:.2f}")


Accuracy: 0.13
F1-score: 0.12
