**WHAT IS THIS?**

An experiment on image classication using softmax regression

**GOAL**

To classify a digit

**IMPEMENTATION**

This uses the following update function
$$
\theta^{k + 1} = \theta^k + a\times X(Y - H(X))
$$
where $H(X)\exists\R^{k - 1 \times m}$ denotes the hypothesis per feature. We can calculate it through the following matrix transformations

$$
H(X) = \frac{1}{\sum_j^ke^{\theta_j x^T}} \times E
$$

And
$$
X \exists \R^{m \times d}
$$

such that m is the number of training data and d is the number of features. We can determine its classification using a one-hot vector

$$
Y \exist\R^{k\times m}
$$

Also k is the number of classifications, and the i’th entry in Y is the classification of the i’th classification in X denoted as a vector that is 0 everywhere except the k’th index that pertains to the classification of $x_i$. Example, if the classification of $x_i$ is 2, we expect a one hot vector as follows

$$
V_2 = [0, 0, 1, 0, 0,0]^T
$$

We can then denote the parameters per classification $\theta$ as 

$$
\theta\exists\R^{k\times d}
$$

where $\theta_i$ is the i’th parameter that produces the line that helps us gauge how close a data is to this classification.

**HYPOTHESIS**

To produce a hypothesis, we will use the following
$$
h(x) = 
\frac{1}{\sum_j^ke^{\theta_j x^T}}\times\begin{bmatrix}
e^{\theta_0 x^T} \\ 
e^{\theta_1 x^T} \\ 
e^{\theta_3 x^T} \\
\vdots \\
e^{\theta_{k} x^T}
\end{bmatrix}
$$  

which can easily be calculated using the following matrix tranformation
$$
\frac{1}{\sum_j^ke^{\theta_j x^T}} \times e^{\theta x^T}
$$

## STEP 1: Parse the images and create X, Y and $\theta$

In [21]:
import os
import tensorflow as tf

# Config accordingly
dataset_path = 'dataset/'
classifications = ['0', '1', '2', '3', '4', '5', '6', '7', '8', '9']

k = len(classifications)
s = 128
d = s*s                   # expected number of features. try to ensure that it's a square number
f = 100000                # factor to divide THETA and H to avoid getting large or very small outputs
initialize_x_y = False
initialize_theta = False

# Values to be computed
m = 0                     # number of dataset
all_features = []         # i'th index represents the features of the i'th image
all_classifications = []  # i'th index represents the one-hot classification of the i'th image

# Function to load, resize, and preprocess an image
def load_and_preprocess_image(image_path, target_size=(s, s)):
    print("Processing image: " + image_path)
    image = tf.io.read_file(image_path)                 # Read image file   
    image = tf.image.decode_image(image, channels=1)    # Decode image to 3D tensor (height, width, channels)
    image = tf.image.resize(image, target_size)         # Resize image
    image = tf.reshape(image, (-1,))                    # Flatten image tensor to 1D tensor
    return image                                        # Returns a row vector of size d

# Process all images
def convert_all_images_to_tensors():
    for i in range(k):
        # Get the list of all image files in the folder
        image_folder_path = os.path.join(dataset_path, classifications[i])
        image_files = [os.path.join(image_folder_path, f) for f in os.listdir(image_folder_path) 
                       if os.path.isfile(os.path.join(image_folder_path, f))]
        
        print("Processing Folder: " + image_folder_path)
        for image_file in image_files:
            features = load_and_preprocess_image(image_file)
            all_features.append(features / f)
            all_classifications.append(tf.one_hot(i, k))
    
    print("Processing Dataset Succesfull")
    m = len(all_features)

def save_progress(tensor, path, name):
    serialized_tensor = tf.io.serialize_tensor(tensor)
    tf.io.write_file(path, serialized_tensor)
    print("Saving " + name + " Successful")

def extract(path, name):
    tensor = tf.io.read_file(path)
    matrix = tf.io.parse_tensor(tensor, out_type=tf.float32)
    print("Extracting " + name + " Successful")

    return matrix

# Create TensorFlow constants for features (X) and classifications (Y) and theta
if initialize_x_y:
    convert_all_images_to_tensors()
    X = tf.transpose(tf.convert_to_tensor(all_features))
    Y = tf.transpose(tf.convert_to_tensor(all_classifications))
    
    save_progress(X, 'x.tfrecord', "X")
    save_progress(Y, 'y.tfrecord', "Y")

else:
   X = tf.Variable(extract('x.tfrecord', "X"))
   Y = tf.Variable(extract('y.tfrecord', "Y"))

if initialize_theta:
    THETA = tf.Variable(tf.zeros((k, d)))
else:
    THETA = tf.Variable(extract('parameters.tfrecord', "THETA"))
    
    
print("X: ", X)
print("Y: ", Y)
print("THETA: ", THETA)

Extracting X Successful
Extracting Y Successful
Extracting THETA Successful
X:  <tf.Variable 'Variable:0' shape=(16384, 10160) dtype=float32, numpy=
array([[0.0255, 0.0255, 0.0255, ..., 0.0255, 0.0255, 0.0255],
       [0.0255, 0.0255, 0.0255, ..., 0.0255, 0.0255, 0.0255],
       [0.0255, 0.0255, 0.0255, ..., 0.0255, 0.0255, 0.0255],
       ...,
       [0.0255, 0.0255, 0.0255, ..., 0.0255, 0.0255, 0.0255],
       [0.0255, 0.0255, 0.0255, ..., 0.0255, 0.0255, 0.0255],
       [0.0255, 0.0255, 0.0255, ..., 0.0255, 0.0255, 0.0255]],
      dtype=float32)>
Y:  <tf.Variable 'Variable:0' shape=(10, 10160) dtype=float32, numpy=
array([[1., 1., 1., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.],
       ...,
       [0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 1., 1., 1.]], dtype=float32)>
THETA:  <tf.Variable 'Variable:0' shape=(10, 16384) dtype=float32, numpy=
array([[ 1.12989306e-04,  1.12989306e-04,  

# STEP 2: Softmax Regression

In [22]:
def produce_hypothesis(image_to_predict_features):
    transformation = tf.matmul(THETA, image_to_predict_features)
    scales_to_positive = tf.exp(transformation / f)
    
    normalised = tf.divide(scales_to_positive, tf.reduce_sum(scales_to_positive))

    print("Producing Prediction Success")
    return normalised

def H():
    transformation = tf.matmul(THETA, X)
    print("Transformation: ", transformation)
    scaled_to_positive = tf.exp(transformation / f)

    sum_per_col = tf.reduce_sum(scaled_to_positive, axis=0)
    normalised = tf.divide(scaled_to_positive, sum_per_col)
    
    print("H(X): ", normalised)
    return normalised

def is_not_finite(tensor):
    return tf.reduce_any(tf.math.is_nan(tensor) | tf.math.is_inf(tensor))

def train_softmax_regression(a, iterations):
    for i in range(iterations):
        print("Training iteration number: " + str(i))
        loss = tf.subtract(Y, H())
        THETA_NEW = THETA + a * tf.matmul(loss, tf.transpose(X))
        print("Loss: ", loss)

        if is_not_finite(THETA_NEW):
            print("Aborting training, Theta is exploding to oblivion. Try adjusting learning rate (a)")
            return
        
        THETA.assign(THETA_NEW)

    print(THETA)
    save_progress(THETA, 'parameters.tfrecord', "THETA")

train_softmax_regression(.00001, 100)
HYPOTHESIS = produce_hypothesis(tf.transpose([load_and_preprocess_image("dataset/0/img001-00001.png")]))

print(HYPOTHESIS)

Training iteration number: 0
Transformation:  tf.Tensor(
[[-4.4745193  -2.7394142  -4.6419735  ... -2.7503982  -0.2417761
  -0.13671575]
 [ 5.093271    2.114684    5.4217267  ...  3.6761658   1.6947759
   1.069276  ]
 [-0.07921189 -0.25577092  0.11963016 ... -1.8291363   0.06262594
  -0.34876075]
 ...
 [ 4.6629324   2.5625973   4.1645145  ...  2.8114014   1.3505461
  -0.18284748]
 [-3.506748   -1.9596726  -3.3075814  ... -2.916297   -2.8632765
  -1.7250051 ]
 [-2.8615918  -1.8423537  -2.524724   ...  2.5544696   2.1499743
   1.987298  ]], shape=(10, 10160), dtype=float32)
H(X):  tf.Tensor(
[[0.09999553 0.09999726 0.09999536 ... 0.09999725 0.09999976 0.09999987]
 [0.1000051  0.10000211 0.10000543 ... 0.10000367 0.10000169 0.10000108]
 [0.09999993 0.09999974 0.10000012 ... 0.09999817 0.10000006 0.09999966]
 ...
 [0.10000467 0.10000256 0.10000416 ... 0.10000281 0.10000135 0.09999982]
 [0.09999651 0.09999804 0.09999669 ... 0.09999709 0.09999714 0.09999829]
 [0.09999715 0.09999816 0.0999974