# Image Recognition - Logistic Regression
---

In this two-part project, we first explore image recognition using Logistic Regression. The subsequent segment contrasts this with Torchvision, offering a comparative insight into both methodologies.

---

![title](header_image.jpg)

Data source for this project - https://www.cs.toronto.edu/~kriz/cifar.html

*Recommended to download the file and save in the same directory of the jupyter notebook*

---

## Packages and Instalations

In [8]:
# Imports
from platform import python_version
import math
import pickle
import random
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline

In [15]:
# python and package version
# install watermark package if do not have -> !pip install -q -U watermark

%reload_ext watermark
print('Python:', python_version())
%watermark --iversions

Python: 3.9.13
matplotlib: 3.5.2
numpy     : 1.21.5



## Class for Load, Process and Handle image data 

In [97]:
class ImageDataHandler:
    """
    A class to handle and process image data from the CIFAR-10 dataset.
    
    Attributes:
    - base_path: Directory where CIFAR-10 batch files are stored.
    """
    
    def __init__(self, base_path="cifar-10-batches-py/"):
        """
        Initialize ImageDataHandler with a base path.
        
        Args:
        - base_path: Path to the CIFAR-10 data.
        """
        self.base_path = base_path
    
    def load_data(self, filename):
        """
        Load and preprocess the CIFAR-10 image data.
        
        Args:
        - filename: Name of the CIFAR-10 batch file.
        
        Returns:
        - X: Processed image data.
        - y: Corresponding labels.
        
        Steps:
        1. Normalize: Convert raw pixel values (0-255) to the range (0-1).
        2. Reshape & Transpose: Convert 1D data to 3D format ([height, width, channels]) for visualization.
        3. Flatten: Transform data back to 1D format for ML algorithms expecting flat vectors.
        """
        
        full_path = f"{self.base_path}{filename}"
        
        try:
            with open(full_path, 'rb') as file:
                data = pickle.load(file, encoding='bytes')
        except Exception as e:
            print(f"Error loading data: {e}")
            return None, None

        raw_images = data[b'data']
        y = np.array(data[b'labels'])
        raw_float = np.array(raw_images, dtype=float) / 255.0
        images = raw_float.reshape([-1, 3, 32, 32]).transpose([0, 2, 3, 1])
        X = images.reshape((images.shape[0], 3*32*32))
        
        return X, y

    def format_data(self, X, Y, v0, v1):
        """
        Filter and format data based on specific class labels.
        
        Args:
        - X: Image data.
        - Y: Labels.
        - v0, v1: Class labels for filtering.
        
        Returns:
        - X: Filtered image data.
        - Y: Corresponding labels.
        
        Steps:
        - Identify data points with labels v0 or v1.
        - Filter and adjust the data accordingly.
        """
        
        lg = max(v0, v1)
        indices = np.where((Y == v0) | (Y == v1))
        X = np.squeeze(np.take(X, indices, axis=0))
        Y = np.squeeze(np.floor(np.take(Y, indices, axis=0) / lg))
        
        return X, Y

    def prepare_data(self, batch_number, start_val, end_val):
        """
        Load and format a specific batch of CIFAR-10 data.
        
        Args:
        - batch_number: CIFAR-10 batch number.
        - start_val, end_val: Range for data filtering.
        
        Returns:
        - x_train: Prepared image data.
        - y_train: Corresponding labels.
        
        Steps:
        - Load the data using the specified batch number.
        - Format the data based on the specified range.
        """
        
        x_train, y_train = self.load_data(f"data_batch_{batch_number}")
        
        if x_train is None or y_train is None:
            return None, None
        
        x_train, y_train = self.format_data(x_train, y_train, start_val, end_val)
        
        return x_train, y_train


## Instantiating an Image Handler and Creating train data

In [98]:
handler = ImageDataHandler()
start_val = 0
end_val = 3
train_batch = 1
x_train, y_train = handler.prepare_data(train_batch, start_val, end_val)

## Logistic Regression from Scratch

Creating a Logistic Regression model from scratch (no framework used)

In [None]:
class LogisticRegression:
    def __init__(self):
        """
        Initialize the Logistic Regression model.
        
        Attributes:
        - beta: Weights for the logistic regression model.
        - likelihood_history: List to store the log likelihood values during training.
        - iteration_history: List to store iteration counts during training.
        """
        self.beta = None
        self.likelihood_history = []
        self.iteration_history = []

    def _func_log_likelihood(self, beta, X, Y):
        """
        Compute the log likelihood for the current weights (beta).
        
        Args:
        - beta: Current weights of the model.
        - X: Feature data.
        - Y: Target labels.
        
        Returns:
        - Log likelihood value.
        """
        t = np.dot(X, beta.T)
        w = np.subtract(Y, 1)
        w = np.dot(w, t)
        m = 1 / (1 + np.exp(-t))
        z = np.sum(np.log(m))
        return w - z

    def _calcula_gradiente(self, beta, X, Y):
        """
        Calculate the gradient of the log likelihood with respect to the weights.
        
        Args:
        - beta: Current weights of the model.
        - X: Feature data.
        - Y: Target labels.
        
        Returns:
        - Gradient values for each weight.
        """
        z = np.subtract(Y, 1)
        w = np.exp(np.dot(-X, beta.T))
        p = w / (1 + w)
        q = z + p
        delta = np.dot(q, X)
        return delta

    def fit(self, X, Y, epsilon, learning_rate, start, end, beta=None, max_iterations=2000):
        """
        Train the Logistic Regression model using Gradient Descent.
        
        Args:
        - X: Feature data.
        - Y: Target labels.
        - epsilon: Threshold for the gradient descent stopping criteria.
        - learning_rate: Learning rate for the gradient descent update rule.
        - start: Starting index for the data.
        - end: Ending index for the data.
        - beta: Initial weights (if provided).
        - max_iterations: Maximum number of iterations for gradient descent.
        
        Updates:
        - beta: Learned weights after training.
        """
        X = X[start:end]
        Y = Y[start:end]
        data_points = X.shape[0]
        dimensions = X.shape[1]

        if beta is None:
            beta = np.zeros(dimensions)

        delta = 0
        itr = 0
        while True:
            grad = self._calcula_gradiente(beta, X, Y)
            beta += learning_rate * grad
            log_likelihood = self._func_log_likelihood(beta, X, Y)
            self.likelihood_history.append(log_likelihood)
            itr += 1
            self.iteration_history.append(itr)
            delta = np.linalg.norm(grad)
            if delta < epsilon or itr > max_iterations:
                break

        self.beta = beta

    def predict(self, X):
        """
        Predict using the trained Logistic Regression model.
        
        Args:
        - X: Feature data to make predictions on.
        
        Returns:
        - Predicted labels.
        """
        z = np.dot(X, self.beta)
        predY = (1 / (1 + np.exp(-z)))
        return np.around(predY)

    def evaluate(self, X, Y):
        """
        Evaluate the model's accuracy on provided data.
        
        Args:
        - X: Feature data.
        - Y: True labels.
        
        Prints:
        - Model accuracy.
        """
        predY = self.predict(X)
        correct = np.sum(predY == Y)
        total = len(Y)
        accuracy = (correct / total) * 100
        print(f"Acurácia do Modelo: {accuracy:.2f}%")
