In [29]:
from sklearn.tree import DecisionTreeClassifier
from sklearn.datasets import load_iris
from sklearn.model_selection import KFold
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, roc_auc_score
import numpy as np
import pickle
import numpy as np
import matplotlib.pyplot as plt
from tensorflow.keras import layers, models
from tensorflow.keras.utils import to_categorical
from tensorflow import keras
from keras import models
from keras.models import Sequential
from keras.optimizers import Adam

In [26]:
def cross_validate_and_select_best_params(model, X, y, n_splits=5):
    kfold = KFold(n_splits=n_splits, shuffle=True, random_state=42)
    
    scores = {
        'accuracy': [],
        'precision': [],
        'recall': [],
        'f1': []
        # 'roc_auc': []
    }

    for train_index, test_index in kfold.split(X):
        X_train, X_test = X[train_index], X[test_index]
        y_train, y_test = y[train_index], y[test_index]

        model.fit(X_train, y_train, epochs=2)

        y_pred = model.predict(X_test)
        y_pred_labels = np.argmax(y_pred, axis=1)
        y_test_labels = np.argmax(y_test, axis=1) 
        
        scores['accuracy'].append(accuracy_score(y_test_labels, y_pred_labels))
        scores['precision'].append(precision_score(y_test_labels, y_pred_labels, average='macro'))
        scores['recall'].append(recall_score(y_test_labels, y_pred_labels, average='macro'))
        scores['f1'].append(f1_score(y_test_labels, y_pred_labels, average='macro'))
        # scores['roc_auc'].append(roc_auc_score(y_test, (model.predict(X_test)[:, 1] > 0.5).astype(int)))

    avg_scores = {key: np.mean(value) for key, value in scores.items()}
    return avg_scores

In [8]:
def load_cifar100_data(data_dir):
    with open(f"{data_dir}/meta", 'rb') as f:
        meta = pickle.load(f, encoding='latin1')
    
    with open(f"{data_dir}/train", 'rb') as f:
        train_data = pickle.load(f, encoding='latin1')
        
    with open(f"{data_dir}/test", 'rb') as f:
        test_data = pickle.load(f, encoding='latin1')
    
    return meta, train_data, test_data

In [13]:
def display_cifar100_data(meta, train_data, test_data, need_show_image=False):
    # Show Name of Classes
    print("CIFAR-100 Classes:")
    for i, label in enumerate(meta['fine_label_names']):
        print(f"{i}: {label}")
    
    # Show Training data shape and num
    print(f"\nTrain data shape: {train_data['data'].shape}")
    print(f"Number of training examples: {train_data['data'].shape[0]}")
    
    # Show Testing data shape and num
    print(f"\nTest data shape: {test_data['data'].shape}")
    print(f"Number of test examples: {test_data['data'].shape[0]}")

    # Show some images from training set if need
    if need_show_image:
        num_images = 10  # num to show
        images = train_data['data'][:num_images]
        labels = train_data['fine_labels'][:num_images]
    
        images = images.reshape(num_images, 3, 32, 32).transpose(0, 2, 3, 1)
        images = np.clip(images / 255.0, 0, 1)  

        plt.figure(figsize=(15, 5))
        for i in range(num_images):
            plt.subplot(2, 5, i + 1)
            plt.imshow(images[i])
            plt.title(meta['fine_label_names'][labels[i]])
            plt.axis('off')
        plt.tight_layout()
        plt.show()

In [14]:
data_dir = 'cifar-100-python'  # path directory
meta, train_data, test_data = load_cifar100_data(data_dir)
display_cifar100_data(meta, train_data, test_data)

CIFAR-100 Classes:
0: apple
1: aquarium_fish
2: baby
3: bear
4: beaver
5: bed
6: bee
7: beetle
8: bicycle
9: bottle
10: bowl
11: boy
12: bridge
13: bus
14: butterfly
15: camel
16: can
17: castle
18: caterpillar
19: cattle
20: chair
21: chimpanzee
22: clock
23: cloud
24: cockroach
25: couch
26: crab
27: crocodile
28: cup
29: dinosaur
30: dolphin
31: elephant
32: flatfish
33: forest
34: fox
35: girl
36: hamster
37: house
38: kangaroo
39: keyboard
40: lamp
41: lawn_mower
42: leopard
43: lion
44: lizard
45: lobster
46: man
47: maple_tree
48: motorcycle
49: mountain
50: mouse
51: mushroom
52: oak_tree
53: orange
54: orchid
55: otter
56: palm_tree
57: pear
58: pickup_truck
59: pine_tree
60: plain
61: plate
62: poppy
63: porcupine
64: possum
65: rabbit
66: raccoon
67: ray
68: road
69: rocket
70: rose
71: sea
72: seal
73: shark
74: shrew
75: skunk
76: skyscraper
77: snail
78: snake
79: spider
80: squirrel
81: streetcar
82: sunflower
83: sweet_pepper
84: table
85: tank
86: telephone
87: televis

In [11]:
def merge_train_test_data(train_data, test_data):
    X_train = train_data['data']
    y_train = train_data['fine_labels']
    
    X_test = test_data['data']
    y_test = test_data['fine_labels']
    
    X_combined = np.concatenate((X_train, X_test), axis=0)
    y_combined = np.concatenate((y_train, y_test), axis=0)
    
    return X_combined, y_combined

In [32]:
def build_simple_cnn(input_shape, num_classes):
    model = models.Sequential()
    model.add(layers.Conv2D(32, (3, 3), activation='relu', input_shape=input_shape, padding='same'))
    model.add(layers.MaxPooling2D((2, 2)))
    model.add(layers.Conv2D(64, (3, 3), activation='relu', padding='same'))
    model.add(layers.MaxPooling2D((2, 2)))
    model.add(layers.Conv2D(128, (3, 3), activation='relu', padding='same'))
    model.add(layers.MaxPooling2D((2, 2)))
    model.add(layers.Flatten())
    model.add(layers.Dense(128, activation='relu'))
    model.add(layers.Dropout(0.5))
    model.add(layers.Dense(num_classes, activation='softmax'))
    model.compile(optimizer=Adam(learning_rate=0.0001), loss='categorical_crossentropy', metrics=['accuracy'])
    return model


In [33]:
input_shape = (32, 32, 3)
num_classes = 100
model = build_simple_cnn(input_shape, num_classes)

X_combined, y_combined = merge_train_test_data(train_data, test_data)
X_combined_reshaped = X_combined.reshape(-1, 32, 32, 3)

y_combined = to_categorical(y_combined, num_classes=100)


print(y_combined, test_data)

avg_scores = cross_validate_and_select_best_params(model, X_combined_reshaped, y_combined)



[[0. 0. 0. ... 0. 0. 0.]
 [0. 0. 0. ... 0. 0. 0.]
 [1. 0. 0. ... 0. 0. 0.]
 ...
 [0. 0. 0. ... 0. 0. 0.]
 [0. 0. 0. ... 0. 0. 0.]
 [0. 0. 0. ... 0. 0. 0.]] {'filenames': ['volcano_s_000012.png', 'woods_s_000412.png', 'seal_s_001803.png', 'mushroom_s_001755.png', 'adriatic_sea_s_000653.png', 'tulipa_clusiana_s_000175.png', 'camel_s_001052.png', 'mourning_cloak_s_000143.png', 'cirrostratus_s_000223.png', 'eating_apple_s_000763.png', 'adriatic_s_000380.png', 'skunk_s_001175.png', 'streetcar_s_000382.png', 'sounding_rocket_s_000010.png', 'lamp_s_001350.png', 'king_of_beasts_s_001221.png', 'tulipa_clusiana_s_000186.png', 'timber_wolf_s_000622.png', 'rose_s_002192.png', 'valencia_orange_s_001542.png', 'mountain_rose_s_000209.png', 'mount_s_002678.png', 'striped_skunk_s_001304.png', 'dinosaur_s_000326.png', 'chimpanzee_s_000134.png', 'oilcan_s_000656.png', 'keyboard_s_001529.png', 'safety_bike_s_000390.png', 'bike_s_000658.png', 'rose_s_000120.png', 'chaise_s_001826.png', 'fishplate_s_000669.

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


[1m1500/1500[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m15s[0m 10ms/step - accuracy: 0.0089 - loss: 4.6056
Epoch 2/2
[1m1500/1500[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m15s[0m 10ms/step - accuracy: 0.0094 - loss: 4.6053
[1m375/375[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 3ms/step


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


Epoch 1/2
[1m1500/1500[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m14s[0m 9ms/step - accuracy: 0.0102 - loss: 4.6053
Epoch 2/2
[1m1500/1500[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m11s[0m 8ms/step - accuracy: 0.0094 - loss: 4.6059
[1m375/375[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 3ms/step
Epoch 1/2
[1m   1/1500[0m [37m━━━━━━━━━━━━━━━━━━━━[0m [1m35s[0m 23ms/step - accuracy: 0.0000e+00 - loss: 4.6066

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


[1m1500/1500[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m12s[0m 8ms/step - accuracy: 0.0097 - loss: 4.6053
Epoch 2/2
[1m1500/1500[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m11s[0m 8ms/step - accuracy: 0.0089 - loss: 4.6052
[1m375/375[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 3ms/step
Epoch 1/2


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


[1m1500/1500[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m12s[0m 8ms/step - accuracy: 0.0101 - loss: 4.6055
Epoch 2/2
[1m1500/1500[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m12s[0m 8ms/step - accuracy: 0.0087 - loss: 4.6052
[1m375/375[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 3ms/step


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


In [34]:
print(avg_scores)

{'accuracy': 0.008549999999999999, 'precision': 0.0007670983040787085, 'recall': 0.010037040212638258, 'f1': 0.0002384751095244362}
