In [1]:
import numpy as np # arrays & loading data
import tensorflow as tf # arrays & loading data
from tensorflow.keras.models import Sequential  # model type that we will use
from tensorflow.keras.layers import Dense # we will use Dense layers
from sklearn.preprocessing import StandardScaler # z-score normalization 

# suppress warnings
tf.get_logger().setLevel('ERROR')
tf.autograph.set_verbosity(0)

In [2]:
# unpickle the data from the batch file in the CIFAR-10 dataset
def unpickle(file):
    import pickle
    with open(file, 'rb') as fo:
        dict = pickle.load(fo, encoding='bytes')
    return dict

# here we will add all the batches of data
all_data=np.empty((1,1025))


for i in range(1,6):
    # save the data as a dictionary
    dict = unpickle(f"cifar-10-batches-py\\data_batch_{i}")

    # split the data using the keys
    labels = np.array(dict[b'labels'])
    data = np.array(dict[b'data'])[:,:1024]
    #print(data[:,:1025])
    # form a column vector where only 0 and 1 are kept ( we need binary classification)
    labels_zero_or_one = np.where((labels<2),labels,-1)
    labels_zero_or_one = labels_zero_or_one.reshape(-1,1) 
    
    # concatanate the labels column form above to the data
    data_concatanated = np.concatenate((data,labels_zero_or_one),axis=1)
    
    # keep only the rows that have 0 or 1 as labels (those with -1 as labels are not important now)
    data_final= data_concatanated[data_concatanated[:,-1]!=-1]

    all_data= np.concatenate((all_data,data_final),axis=0)
    
all_data=all_data[1:] # remove the first row (it contains unimportant elements)

print(all_data)
print(all_data.shape)

[[170. 168. 177. ...  71.  73.   1.]
 [159. 150. 153. ... 173. 182.   1.]
 [202. 202. 204. ... 239. 240.   0.]
 ...
 [156. 155. 156. ... 152. 152.   0.]
 [189. 186. 185. ... 196. 195.   1.]
 [229. 236. 234. ... 164. 163.   1.]]
(10000, 1025)


In [3]:
X = all_data[:,:-1] # forming the input and output 
y = all_data[:,-1]

y = np.expand_dims(y, axis=1) # make y 2D - the commands later will require it

# split the data into TRAINING, CROSS-VALIDATION 
from sklearn.model_selection import train_test_split

# TRAINING SET - 80%
X_train, X_cv, y_train, y_cv = train_test_split(X, y, test_size=0.20, random_state=1)

# the rest of 42% - CV SET
#X_cv, X_test, y_cv, y_test = train_test_split(X_temporary, y_temporary, test_size=None, random_state=1)
#del X_temporary, y_temporary

print(f"training input shape:{X_train.shape}")
print(f"training output shape:{y_train.shape}")
print(f"cv input shape:{X_cv.shape}")
print(f"cv output shape:{y_cv.shape}")
#print(f"test input shape:{X_test.shape}")
#print(f"test output shape:{y_test.shape}")



training input shape:(8000, 1024)
training output shape:(8000, 1)
cv input shape:(2000, 1024)
cv output shape:(2000, 1)


In [4]:
# each image has 3072 units (this is explained in more detail in the documentation in the link provided)
# sequential model structure

# CHANGE TO CONVOLUTIONAL
model= Sequential(
    [
        tf.keras.Input(shape=(1024,)), # input size
        #Dense(200,activation="sigmoid", name="layer1"),
        #Dense(120,activation="sigmoid", name="layer2"),
        Dense(60,activation="sigmoid", name="layer3"),
        Dense(50,activation="sigmoid", name="layer4"),
        Dense(15,activation="sigmoid", name="layer5"),
        Dense(1,activation="sigmoid", name="layer6"),
    ], name="binary_model"
)

In [5]:
# see details about the activation of every layer and the form of the w and b parameters
model.summary()

In [6]:
# applying z-score to all the training data - make it compact for the algorithms to work better
standard_scaler = StandardScaler()
X_train_scaled = standard_scaler.fit_transform(X_train)
X_cv_scaled = standard_scaler.transform(X_cv) 
#X_test_scaled = standard_scaler.transform(X_test)

# define loss and optimizer of the Adam's algorithm
model.compile(
    # this is similar to gradient descent, but it is a much improved version
    loss=tf.keras.losses.BinaryCrossentropy(), # BC - binary class 
    optimizer=tf.keras.optimizers.Adam(0.01), # preimplemented optimizer
)

In [7]:
# train the model "epochs" times
model.fit(
    X_train_scaled, y_train,
    epochs = 100
    
    
)

Epoch 1/100
[1m250/250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 694us/step - loss: 0.5833
Epoch 2/100
[1m250/250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 743us/step - loss: 0.5032
Epoch 3/100
[1m250/250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 743us/step - loss: 0.4806
Epoch 4/100
[1m250/250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 680us/step - loss: 0.4734
Epoch 5/100
[1m250/250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 680us/step - loss: 0.4611
Epoch 6/100
[1m250/250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 743us/step - loss: 0.4654
Epoch 7/100
[1m250/250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 743us/step - loss: 0.4531
Epoch 8/100
[1m250/250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 743us/step - loss: 0.4460
Epoch 9/100
[1m250/250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 680us/step - loss: 0.4433
Epoch 10/100
[1m250/250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m

<keras.src.callbacks.history.History at 0x183e7ecddc0>

In [9]:
# fraction of misclassified outputs in the TRAINING SET
y_predicted = model.predict(X_train_scaled)
# classic way of calculating the error in a classification case
y_predicted = np.where(y_predicted>=0.5,1,0)
error = np.mean(y_predicted != y_train) # arithmetic mean, where the numerator is the count of wrong predictions
print(f"Training Set Classification Error: {error}")

# do the same for CV set
y_predicted = model.predict(X_cv_scaled)
y_predicted = np.where(y_predicted>=0.5,1,0)
error2 = np.mean(y_predicted != y_cv) 
print(f"CV Set Classification Error: {error2}")

[1m250/250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 482us/step
Training Set Classification Error: 0.09175
[1m63/63[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 516us/step
CV Set Classification Error: 0.203
