# Voting

Tests of the voting for chosen models.

In [7]:
from tensorflow.keras.models import load_model
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow import keras
import numpy as np
from sklearn.metrics import accuracy_score
import pandas as pd

In [8]:
seed = 2024
TRAIN_DIR = '../voting/data/train'

In [9]:
def hard_vote(preds1, preds2, preds3):
    final_preds = []
    for i in range(len(preds1)):
        combined_preds = np.array([preds1[i], preds2[i], preds3[i]])
        final_preds.append(np.argmax(np.sum(combined_preds, axis=0)))
    return np.array(final_preds)

In [10]:
def hard_vote7(preds1, preds2, preds3, preds4, preds5, preds6, preds7):
    final_preds = []
    for i in range(len(preds1)):
        combined_preds = np.array([preds1[i], preds2[i], preds3[i], preds4[i], preds5[i], preds6[i], preds7[i]])
        final_preds.append(np.argmax(np.sum(combined_preds, axis=0)))
    return np.array(final_preds)

In [11]:
datagen = ImageDataGenerator(validation_split=0.2,
                             rescale=1./255)

val_generator = datagen.flow_from_directory(
        TRAIN_DIR,
        subset='validation',
        seed=seed,
        target_size=(32, 32),
        batch_size=16
    )

Found 18000 images belonging to 10 classes.


In [12]:
X, y = zip(*(val_generator[i] for i in range(len(val_generator))))
X_test, y_test = np.vstack(X), np.vstack(y)

In [13]:
def softmax(x):
    e_x = np.exp(x - np.max(x))
    return e_x / e_x.sum(axis=1, keepdims=True)

In [14]:
def soft_vote_probs(probs1, probs2):
    final_probs = (probs1 + probs2) / 2.0
    return final_probs

## Test 1: Hard voting for best VGG models
VGG8 for learning rate = 0.01 $\newline$
VGG9 for batch size = 128 $\newline$
VGG14

In [15]:
vgg8 = load_model('keras/vgg8-3.keras')
vgg9 = load_model('keras/vgg9-2.keras')
vgg14 = load_model('keras/vgg14-0.keras')

In [16]:
y_pred1 = vgg8.predict(X_test)
y_pred2 = vgg9.predict(X_test)
y_pred3 = vgg14.predict(X_test)




In [17]:
y_test = np.argmax(y_test, axis=1)

In [18]:
final_predictions = hard_vote(y_pred1, y_pred2, y_pred3)
accuracy = accuracy_score(final_predictions, y_test)
accuracy

0.5692222222222222

In [19]:
name = 'vgghardvoting'
accuracy_df = pd.DataFrame(data=[accuracy], columns = ['accuracy'])
accuracy_csv_file = f'train_history/{name}/accuracy.csv'
with open(accuracy_csv_file, mode='w') as f:
    accuracy_df.to_csv(f)
print(f'Attempts accuracy is saved to {accuracy_csv_file}')

Attempts accuracy is saved to train_history/vgghardvoting/accuracy.csv


## Test 2: Hard voting for 3 best CNN models
CNN6 - first attempt $\newline$
CNN7 - fifth attempt $\newline$
CNN10 - fifth attempt

In [22]:
cnn6 = load_model('keras/cnn6-0.keras')
cnn7 = load_model('keras/cnn7-4.keras')
cnn10 = load_model('keras/cnn10-4.keras')



In [23]:
y_pred1 = cnn6.predict(X_test)
y_pred2 = cnn7.predict(X_test)
y_pred3 = cnn10.predict(X_test)




In [None]:
y_test = np.argmax(y_test, axis=1)

In [25]:
final_predictions = hard_vote(y_pred1, y_pred2, y_pred3)
accuracy = accuracy_score(final_predictions, y_test)
accuracy

0.12311111111111112

In [26]:
name = 'cnnhardvoting3'
accuracy_df = pd.DataFrame(data=[accuracy], columns = ['accuracy'])
accuracy_csv_file = f'train_history/{name}/accuracy.csv'
with open(accuracy_csv_file, mode='w') as f:
    accuracy_df.to_csv(f)
print(f'Attempts accuracy is saved to {accuracy_csv_file}')

Attempts accuracy is saved to train_history/cnnhardvoting3/accuracy.csv


## Test 3: Hard voting for 7 best CNN models
CNN6 - first attempt $\newline$
CNN7 - fifth attempt $\newline$
CNN10 - all attemps

In [27]:
cnn6 = load_model('keras/cnn6-0.keras')
cnn7 = load_model('keras/cnn7-4.keras')
cnn10_0 = load_model('keras/cnn10-0.keras')
cnn10_1 = load_model('keras/cnn10-1.keras')
cnn10_2 = load_model('keras/cnn10-2.keras')
cnn10_3 = load_model('keras/cnn10-3.keras')
cnn10_4 = load_model('keras/cnn10-4.keras')



In [28]:
y_pred1 = cnn6.predict(X_test)
y_pred2 = cnn7.predict(X_test)
y_pred3 = cnn10_0.predict(X_test)
y_pred4 = cnn10_1.predict(X_test)
y_pred5 = cnn10_2.predict(X_test)
y_pred6 = cnn10_3.predict(X_test)
y_pred7 = cnn10_4.predict(X_test)




In [None]:
y_test = np.argmax(y_test, axis=1)

In [30]:
final_predictions = hard_vote7(y_pred1, y_pred2, y_pred3, y_pred4, y_pred5, y_pred6, y_pred7)
accuracy = accuracy_score(final_predictions, y_test)
accuracy

0.1005

In [31]:
name = 'cnnhardvoting7'
accuracy_df = pd.DataFrame(data=[accuracy], columns = ['accuracy'])
accuracy_csv_file = f'train_history/{name}/accuracy.csv'
with open(accuracy_csv_file, mode='w') as f:
    accuracy_df.to_csv(f)
print(f'Attempts accuracy is saved to {accuracy_csv_file}')

Attempts accuracy is saved to train_history/cnnhardvoting7/accuracy.csv


## Test 4: Soft voting for best CNN and VGG models
CNN10 - fifth attempt $\newline$
VGG9 - batch size = 128

In [32]:
cnn10 = load_model('keras/cnn10-4.keras')
vgg9 = load_model('keras/vgg9-2.keras')



In [33]:
y_pred1 = cnn10.predict(X_test)
y_pred2 = vgg9.predict(X_test)



In [None]:
y_test = np.argmax(y_test, axis=1)

In [34]:
probs1 = softmax(y_pred1)
probs2 = softmax(y_pred2)

In [35]:
final_probs = soft_vote_probs(probs1, probs2)
final_predictions = np.argmax(final_probs, axis=1)
accuracy = accuracy_score(final_predictions, y_test)
accuracy

0.5447777777777778

In [36]:
name = 'vggcnnsoftvoting'
accuracy_df = pd.DataFrame(data=[accuracy], columns = ['accuracy'])
accuracy_csv_file = f'train_history/{name}/accuracy.csv'
with open(accuracy_csv_file, mode='w') as f:
    accuracy_df.to_csv(f)
print(f'Attempts accuracy is saved to {accuracy_csv_file}')

Attempts accuracy is saved to train_history/vggcnnsoftvoting/accuracy.csv
