# Assignment 3-2  Identifying CIFAR-10 Images

### Marc Aradillas

In [1]:
# imports
from keras.datasets import cifar10
from keras.utils import np_utils
from keras.models import Sequential
from keras.layers import Dense, Dropout, Activation, Flatten
from keras.layers.convolutional import Conv2D, MaxPooling2D
from keras.optimizers import SGD, Adam, RMSprop
import matplotlib.pyplot as plt

Using TensorFlow backend.


In [2]:
# CIFAR10 is a set of 60K images 32x32 pixels on 3 channels
IMG_CHANNELS = 3
IMG_ROWS = 32
IMG_COLS = 32

In [3]:
# Constants
BATCH_SIZE = 128
NB_EPOCH = 20
NB_CLASSES = 10
VERBOSE = 1
VALIDATION_SPLIT = 0.2
OPTIM = RMSprop()

In [21]:
# # Load CIFAR-10 dataset
# (X_train, y_train), (X_test, y_test) = cifar10.load_data()
# print("X_train shape:", X_train.shape)
# print(X_train.shape[0], 'train samples')
# print(X_test.shape[0], 'test samples')

'''
Error SSL certificate so modified code for workaround on example. extracted tar.gz file and pasted into the "\datasets" directory

Exception: URL fetch failure on https://www.cs.toronto.edu/~kriz/cifar-10-python.tar.gz : 
None -- [SSL: CERTIFICATE_VERIFY_FAILED] certificate verify failed: unable to get local 
issuer certificate (_ssl.c:1076)
'''

'\nError SSL certificate so modified code for workaround on example.\n\nException: URL fetch failure on https://www.cs.toronto.edu/~kriz/cifar-10-python.tar.gz : \nNone -- [SSL: CERTIFICATE_VERIFY_FAILED] certificate verify failed: unable to get local \nissuer certificate (_ssl.c:1076)\n'

In [5]:
import os
import ssl
from keras.datasets import cifar10

# Bypass SSL certificate verification
ssl._create_default_https_context = ssl._create_unverified_context

# Check if the CIFAR-10 dataset exists locally
cifar10_dataset_path = r"C:\Users\marcaradillas_snhu\.keras\datasets\cifar-10-batches-py"

if os.path.exists(cifar10_dataset_path):
    print("Using the local CIFAR-10 dataset.")
    (X_train, y_train), (X_test, y_test) = cifar10.load_data()
else:
    print("Local dataset not found or inaccessible. Attempting to download.")
    (X_train, y_train), (X_test, y_test) = cifar10.load_data()

# Print dataset shapes
print("X_train shape:", X_train.shape)
print(X_train.shape[0], 'train samples')
print(X_test.shape[0], 'test samples')


Using the local CIFAR-10 dataset.
Downloading data from https://www.cs.toronto.edu/~kriz/cifar-10-python.tar.gz
X_train shape: (50000, 32, 32, 3)
50000 train samples
10000 test samples


In [6]:
# Convert to categorical
y_train = np_utils.to_categorical(y_train, NB_CLASSES)
y_test = np_utils.to_categorical(y_test, NB_CLASSES)

In [7]:
# Normalize pixel values to range 0-1
X_train = X_train.astype('float32')
X_test = X_test.astype('float32')
X_train /= 255
X_test /= 255

#### ----------------------------------------------------------------------------------------------------------------------------------------------------------------

## Building the Model (Deeper Network)

#### ----------------------------------------------------------------------------------------------------------------------------------------------------------------

In [8]:
# Define the deep network
model = Sequential()

In [9]:
# First convolutional block
model.add(Conv2D(32, (3, 3), padding='same', input_shape=(IMG_ROWS, IMG_COLS, IMG_CHANNELS)))
model.add(Activation('relu'))
model.add(Conv2D(32, (3, 3), padding='same'))
model.add(Activation('relu'))
model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(Dropout(0.25))

In [10]:
# Second convolutional block
model.add(Conv2D(64, (3, 3), padding='same'))
model.add(Activation('relu'))
model.add(Conv2D(64, (3, 3)))
model.add(Activation('relu'))
model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(Dropout(0.25))

In [11]:
# Fully connected layer
model.add(Flatten())
model.add(Dense(512))
model.add(Activation('relu'))
model.add(Dropout(0.5))
model.add(Dense(NB_CLASSES))
model.add(Activation('softmax'))

In [12]:
# Compile the model
model.compile(loss='categorical_crossentropy', optimizer=OPTIM, metrics=['accuracy'])
model.summary()

Model: "sequential_1"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv2d_1 (Conv2D)            (None, 32, 32, 32)        896       
_________________________________________________________________
activation_1 (Activation)    (None, 32, 32, 32)        0         
_________________________________________________________________
conv2d_2 (Conv2D)            (None, 32, 32, 32)        9248      
_________________________________________________________________
activation_2 (Activation)    (None, 32, 32, 32)        0         
_________________________________________________________________
max_pooling2d_1 (MaxPooling2 (None, 16, 16, 32)        0         
_________________________________________________________________
dropout_1 (Dropout)          (None, 16, 16, 32)        0         
_________________________________________________________________
conv2d_3 (Conv2D)            (None, 16, 16, 64)       

In [13]:
# Train the model
history = model.fit(X_train, y_train,
                    batch_size=BATCH_SIZE,
                    epochs=NB_EPOCH,
                    validation_split=VALIDATION_SPLIT,
                    verbose=VERBOSE)

Train on 40000 samples, validate on 10000 samples
Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20


In [14]:
# Evaluate the model
score = model.evaluate(X_test, y_test, verbose=VERBOSE)
print("Test score:", score[0])
print("Test accuracy:", score[1])

Test score: 0.7819904750347138
Test accuracy: 0.7656000256538391


In [15]:
# Save model architecture and weights
model_json = model.to_json()
with open("cifar10_architecture.json", "w") as json_file:
    json_file.write(model_json)
model.save_weights("cifar10_weights.h5", overwrite=True)

#### ----------------------------------------------------------------------------------------------------------------------------------------------------------------

## Data Augmentation

#### ----------------------------------------------------------------------------------------------------------------------------------------------------------------

In [16]:
# imports

from keras.preprocessing.image import ImageDataGenerator
import numpy as np

In [17]:
# Data augmentation
datagen = ImageDataGenerator(
    rotation_range=40,
    width_shift_range=0.2,
    height_shift_range=0.2,
    zoom_range=0.2,
    horizontal_flip=True,
    fill_mode='nearest'
)

In [18]:
# Fit the data generator to the training data
datagen.fit(X_train)

In [19]:
# Augment and train the model
history = model.fit_generator(datagen.flow(X_train, y_train, batch_size=BATCH_SIZE),
                              steps_per_epoch=X_train.shape[0] // BATCH_SIZE,
                              epochs=NB_EPOCH,
                              validation_data=(X_test, y_test),
                              verbose=VERBOSE)

Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20


In [20]:
# Evaluate the model with augmented data
score = model.evaluate(X_test, y_test, verbose=VERBOSE)
print("Test score:", score[0])
print("Test accuracy:", score[1])

Test score: 0.9951906910896301
Test accuracy: 0.6675000190734863


## Ethical and Privacy Implications of the Algorithm

The convolutional neural network (CNN) designed for CIFAR-10 image recognition demonstrates the power of machine learning to categorize image data efficiently. However, its potential applications raise significant ethical and privacy concerns when similar algorithms are trained on sensitive datasets, such as facial images or personal identifiers.

## Surveillance and Privacy Risks
Training this algorithm on facial recognition datasets could enable intrusive surveillance systems capable of identifying and tracking individuals without their consent. The erosion of anonymity in public spaces, as highlighted by Hill (2020), demonstrates the dangers posed by facial recognition technologies when used by law enforcement or private companies. Such misuse risks creating a surveillance society where personal freedom and privacy are diminished.

## Bias and Discrimination
Algorithms are only as unbiased as the datasets used to train them. If the training data lacks diversity, the algorithm may exhibit discriminatory behavior. Buolamwini and Gebru (2018) revealed that facial recognition systems often misidentify individuals from minority groups, leading to potential harm and injustice. This highlights the importance of ensuring fairness and accountability in dataset selection and model training.

## Misuse of Technology
The potential misuse of algorithms like this in authoritarian regimes or by malicious actors further underscores the need for ethical guardrails. Stahl and Wright (2018) emphasize the importance of responsible research and innovation (RRI) frameworks to mitigate the unintended consequences of AI development.

## Data Security Concerns
Access to large datasets often containing sensitive information introduces risks of data breaches and unauthorized use. Proper anonymization and stringent security measures are essential to safeguard individual privacy and maintain public trust (Granados, 2016).

These considerations underline the need for ethical guidelines, robust regulatory oversight, and transparent practices in AI development. Without these measures, the widespread deployment of AI systems could exacerbate existing societal inequities and undermine democratic values.

References:

Buolamwini, J., & Gebru, T. (2018). Gender shades: Intersectional accuracy disparities in commercial gender classification. Proceedings of Machine Learning Research.

Granados, N. (2016, June 30). How Facebook biases your news feed. Forbes. Retrieved from https://www.forbes.com/sites/nelsongranados/2016/06/30/how-facebook-biases-your-news-feed/

Hill, K. (2020, January 21). The secretive company that might end privacy as we know it. International New York Times. Retrieved from https://www.nytimes.com

Stahl, B. C., & Wright, D. (2018). Ethics and privacy in AI and big data: Implementing responsible research and innovation. IEEE Security & Privacy, 16(3), 26–33.
