In [None]:
import numpy as np
import pandas as pd
from keras import Sequential, Input
from keras.src.layers import Dense
from keras.src.utils import to_categorical
from sklearn.metrics import accuracy_score
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
import plotly.express as px

In [None]:
data = pd.read_csv("fetal_health.csv")
labels = data["fetal_health"].values - 1
data = data.drop(data.columns[-1], axis=1)

# display(px.histogram(labels))

data, test_data, labels, test_labels = train_test_split(data, labels, test_size=0.25)

scaler = StandardScaler()
scaler.fit(data)
data = scaler.transform(data)
test_data = scaler.transform(test_data)

In [None]:
NUMBER_OF_SUBSETS_TO_CROSS_VALIDATION = 9
one_cluster_size = len(data) // NUMBER_OF_SUBSETS_TO_CROSS_VALIDATION
print(f"Data shape: {data.shape}")
print(f"One-cluster size: {one_cluster_size}")

In [None]:
def create_model():
    model = Sequential()
    model.add(Input(shape=(21,)))
    model.add(Dense(units=3, activation="softmax"))

    model.compile(optimizer="adam", loss="categorical_crossentropy", metrics=["accuracy"])

    return model

In [None]:
data_subsets = []
labels_subsets = []

for i in range(NUMBER_OF_SUBSETS_TO_CROSS_VALIDATION):
    first_elem_index = one_cluster_size * i
    last_elem_index = first_elem_index + one_cluster_size

    data_subsets.append(data[first_elem_index:last_elem_index])
    labels_subsets.append(labels[first_elem_index:last_elem_index])

In [None]:
models = []
stats = pd.DataFrame(columns=["#", "accuracy", "loss", "val_accuracy", "val_loss"])

for i in range(NUMBER_OF_SUBSETS_TO_CROSS_VALIDATION):
    model = create_model()

    data_to_train = np.concatenate([data_subsets[j] for j in range(len(data_subsets)) if i != j])
    labels_to_train = np.concatenate([labels_subsets[j] for j in range(len(labels_subsets)) if i != j])
    data_to_validate = data_subsets[i]
    labels_to_validate = labels_subsets[i]

    labels_to_train = to_categorical(labels_to_train)
    labels_to_validate = to_categorical(labels_to_validate)

    r = model.fit(data_to_train, labels_to_train, epochs=10, batch_size=16,
                  validation_data=(data_to_validate, labels_to_validate), verbose=0)

    models.append(model)

    stat = [
        i + 1,
        r.history["accuracy"][-1],
        r.history["loss"][-1],
        r.history["val_accuracy"][-1],
        r.history["val_loss"][-1]
    ]

    stats.loc[len(stats)] = stat

    print(f"{i + 1}/{NUMBER_OF_SUBSETS_TO_CROSS_VALIDATION} models trained")

stats

In [None]:
comparision = pd.DataFrame({
    "true": test_labels
})

i = 1
for model in models:
    preds = model.predict(test_data, verbose=0)
    classes = np.argmax(preds, axis=1)
    comparision[f"Model #{i}"] = classes

    print(f"Model #{i} accuracy: {accuracy_score(comparision['true'], classes)}")

    i += 1

# Majority voting
voting = comparision[comparision.columns[1:]].mode(axis=1)
comparision["Majority Vote"] = voting[0]

print(f"Majority voting accuracy: {accuracy_score(comparision['true'], comparision['Majority Vote'])}")

In [None]:
comparision

# Conclusions
Sometimes majority voting is more accurate than all models, but never have accuracy lower than worst model.

Some tests:
- ***3 folds:*** For 5 pipelines, majority voting is better than any model in 3 attempts.
- ***5 folds:*** For 5 pipelines, majority voting is better than any model in 0 attempt, but in 3 attempts MV have accuracy equals to best model.

