In [1]:
import numpy as np # arrays & loading data
import tensorflow as tf # arrays & loading data
from tensorflow.keras.models import Sequential  # model type that we will use
from tensorflow.keras.layers import Dense # we will use Dense layers
from sklearn.preprocessing import StandardScaler # z-score normalization 

# suppress warnings
tf.get_logger().setLevel('ERROR')
tf.autograph.set_verbosity(0)

In [2]:
# unpickle the data from the batch file in the CIFAR-10 dataset
def unpickle(file):
    import pickle
    with open(file, 'rb') as fo:
        dict = pickle.load(fo, encoding='bytes')
    return dict
# save the data as a dictionary
dict = unpickle("cifar-10-batches-py\\data_batch_1")

# split the data using the keys
labels = np.array(dict[b'labels'])
data = np.array(dict[b'data'])

# form a column vector where only 0 and 1 are kept ( we need binary classification)
labels_zero_or_one = np.where((labels<2),labels,-1)
labels_zero_or_one = labels_zero_or_one.reshape(-1,1) 

# concatanate the labels column form above to the data
data_concatanated = np.concatenate((data,labels_zero_or_one),axis=1)

# keep only the rows that have 0 or 1 as labels (those with -1 as labels are not important now)
data_final= data_concatanated[data_concatanated[:,-1]!=-1]
print(data_final)
print(data_final.shape)

[[170 168 177 ...  78  80   1]
 [159 150 153 ...  17  19   1]
 [202 202 204 ... 243 243   0]
 ...
 [116 120 126 ...  84  81   1]
 [ 71  60  74 ...  69  68   1]
 [250 254 211 ... 255 254   1]]
(1979, 3073)


In [3]:
X = data_final[:,:-1] # forming the input and output 
y = data_final[:,-1]

y = np.expand_dims(y, axis=1) # make y 2D - the commands later will require it


In [4]:
# split the data into TRAINING, CROSS-VALIDATION and TEST sets
from sklearn.model_selection import train_test_split

# TRAINING SET - 60%
X_train, X_temporary, y_train, y_temporary = train_test_split(X, y, test_size=0.40, random_state=1)

# the rest of 40% - CV SET(20%) and TEST SET(20%)
X_cv, X_test, y_cv, y_test = train_test_split(X_temporary, y_temporary, test_size=0.50, random_state=1)
del X_temporary, y_temporary

print(f"training input shape:{X_train.shape}")
print(f"training output shape:{y_train.shape}")
print(f"cv input shape:{X_cv.shape}")
print(f"cv output shape:{y_cv.shape}")
print(f"test input shape:{X_test.shape}")
print(f"test output shape:{y_test.shape}")

training input shape:(1187, 3072)
training output shape:(1187, 1)
cv input shape:(396, 3072)
cv output shape:(396, 1)
test input shape:(396, 3072)
test output shape:(396, 1)


In [5]:
# each image has 3072 units (this is explained in more detail in the documentation in the link provided)
# sequential model structure

model= Sequential(
    [
        tf.keras.Input(shape=(3072,)), # input size
        Dense(200,activation="sigmoid", name="layer1"),
        Dense(120,activation="sigmoid", name="layer2"),
        Dense(60,activation="sigmoid", name="layer3"),
        Dense(15,activation="sigmoid", name="layer4"),
        Dense(1,activation="sigmoid", name="layer5"),
    ], name="binary_model"
)

In [6]:
# see details about the activation of every layer and the form of the w and b parameters
model.summary()

In [7]:


# applying z-score to all the training data - make it compact for the algorithms to work better
standard_scaler = StandardScaler()
X_train_scaled = standard_scaler.fit_transform(X_train)
X_cv_scaled = standard_scaler.transform(X_cv) 
X_test_scaled = standard_scaler.transform(X_test)

# define loss and optimizer of the Adam's algorithm
model.compile(
    # this is similar to gradient descent, but it is a much improved version
    loss=tf.keras.losses.BinaryCrossentropy(), # BC - binary class 
    optimizer=tf.keras.optimizers.Adam(0.01), # preimplemented optimizer
)

In [8]:
# train the model "epochs" times
model.fit(
    X_train_scaled, y_train,
    epochs = 34
)

Epoch 1/34
[1m38/38[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 5ms/step - loss: 0.7321
Epoch 2/34
[1m38/38[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step - loss: 0.5134
Epoch 3/34
[1m38/38[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step - loss: 0.4308
Epoch 4/34
[1m38/38[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step - loss: 0.4365
Epoch 5/34
[1m38/38[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step - loss: 0.4288
Epoch 6/34
[1m38/38[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5ms/step - loss: 0.3916
Epoch 7/34
[1m38/38[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5ms/step - loss: 0.4392
Epoch 8/34
[1m38/38[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step - loss: 0.3813
Epoch 9/34
[1m38/38[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step - loss: 0.3461
Epoch 10/34
[1m38/38[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5ms/step - loss: 0.4403
Epoch 11/

<keras.src.callbacks.history.History at 0x2cba1ab50a0>

In [9]:
# fraction of misclassified outputs in the TRAINING SET
y_predicted = model.predict(X_train_scaled)
y_predicted = tf.math.sigmoid(y_predicted) # classify above or below the threshold
# classic way of calculating the error in a classification case
y_predicted = np.where(y_predicted>=0.5,1,0)
error = np.mean(y_predicted != y_train) # arithmetic mean, where the numerator is the count of wrong predictions
print(f"Training Set Classification Error: {error}")

# do the same for CV set
y_predicted = model.predict(X_cv_scaled)
y_predicted = tf.math.sigmoid(y_predicted) 
y_predicted = np.where(y_predicted>=0.5,1,0)
error2 = np.mean(y_predicted != y_cv) 
print(f"CV Set Classification Error: {error2}")

[1m38/38[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step
Training Set Classification Error: 0.5122156697556866
[1m13/13[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step 
CV Set Classification Error: 0.5580808080808081
