In [None]:
import numpy as np
from torch.utils.data import DataLoader
from torchvision.datasets import MNIST
import torch
def download_mnist(is_train: bool):
    dataset = MNIST(root='./data',
                    transform= lambda x: np.array(x).flatten(),
                    download=True,
                    train=is_train)
    mnist_data= []
    mnist_labels= []
    for image, label in dataset:
        mnist_data.append(image)
        mnist_labels.append(label)
     
    # print(procesed_mnist_labels)    
    return mnist_data, mnist_labels

In [None]:
def process_data(mnist_data,mnist_labels):
    mnist_data = np.array(mnist_data).reshape(-1,784)
    mnist_labels = np.array(mnist_labels).reshape(-1,1) 
    # infer all data to rows,with 1 column
    # convert the labels to one-hot-encoding . 
    # EX: { Red,Blue,White} as : Red = [1,0,0] , Blue = [ 0, 1, 0] and White = [ 0, 0, 1]
    
    
    # Here we normalize the label data
    procesed_mnist_labels = []
    for mnist_label in mnist_labels:
        a = np.array([0,0,0,0,0,0,0,0,0,0]) # or we could use : np.zeros(10)
        value =mnist_label[0]
        a[value] = 1
        procesed_mnist_labels.append(np.array(a))
        
    # Here we normalize data set ,with a representation in [0 , 1]
    mnist_data = mnist_data / 255
    # print(procesed_mnist_labels)
    return mnist_data, procesed_mnist_labels
    

In [None]:
from torch.utils.data import TensorDataset


# split data in batches 
def split_in_batches(data, labels, batch_size=100):
    dataset = TensorDataset(torch.tensor(data, dtype=torch.float32), torch.tensor(labels, dtype=torch.float32))
    loader = DataLoader(dataset, batch_size=batch_size, shuffle=True)
    
    batched_data = []
    batched_labels = []
    
    for batch_data, batch_labels in loader:
        batched_data.append(batch_data.numpy())
        batched_labels.append(batch_labels.numpy())
    return batched_data, batched_labels

def initialize_parameters(input_size, num_classes):
    """
    Initialize the weights and biases for the model.
    - input_size: the size of the input vector (784 for 28x28 images)
    - num_classes: the number of classes (10 for MNIST)
    """
    # Initialize weights with random values from a normal distribution
    weights = np.random.randn(input_size, num_classes) * 0.01  # Size: [784, 10]
    
    # Initialize biases with zero values
    biases = np.zeros((1, num_classes))  # Size: [1, 10]   
    return weights, biases # Practically we return Wi and Bi with i in [1,10] which represent each label

def forward(data, weights, biases):
    """
    Compute logits for each image.
    - data: the dataset of images (input vectors)
    - weights: the weights (matrix [784, 10])
    - biases: the biases (vector [1, 10])
    """
   # Compute the dot product between each image in the batch and the weights
    # and add the bias for each class.
    logits = (data @ weights) + biases  # Output size: [num_images, 10]
    return logits # This is pondered sum which indicate score for each class

In [None]:
def softmax(logits):
    """
     This function transform eac score in a probability by applying softmax function
    """
    exp_logits = np.exp(logits - np.max(logits, axis=1, keepdims=True)) # Subtract the maximum value for numerical stability
    probabilities = exp_logits / np.sum(exp_logits, axis=1, keepdims=True)
    
    return probabilities # the predicted class coreesponds to the highest probability
    

In [None]:
def evaluate_cross_entropy_loss(labels, probabilities):
    # Convert labels and probabilities to PyTorch tensors if they are not already
    torch_labels = torch.tensor(labels, dtype=torch.float32)
    torch_probabilities = torch.tensor(probabilities, dtype=torch.float32)

    # Compute the cross-entropy loss
    cross_entropy_loss = -torch.sum(torch_labels * torch.log(torch_probabilities), dim=1)  # This is the loss for a single image
    # Compute the average loss over the batch
    average_loss = torch.mean(cross_entropy_loss)
    
    return average_loss  # This is the average loss for all images

In [None]:
def train_epoch(process_train_X, process_train_Y, weights, biases):
    """
    Train the model for one epoch.
    - process_train_X: the training dataset of images
    - process_train_Y: the training dataset of labels
    - weights: the weights of the model
    - biases: the biases of the model
    """
    # Split the training data into batches
    batched_train_data, batched_train_labels = split_in_batches(process_train_X, process_train_Y)

    # Initialize the loss
    epoch_loss = 0
    
    # Iterate over the batches
    for batch_data, batch_labels in zip(batched_train_data, batched_train_labels):
        # Compute the logits
        logits = forward(batch_data, weights, biases)
        
        # Compute the probabilities
        probabilities = softmax(logits)
        
        # Compute the cross-entropy loss
        loss = evaluate_cross_entropy_loss(batch_labels, probabilities)
        
        # Update the epoch loss
        epoch_loss += loss
        
    # Compute the average loss for the epoch
    epoch_loss /= len(batched_train_data)
    
    print(f"Epoch loss: {epoch_loss}")
    
    return epoch_loss
    

In [None]:
def update_weights(process_train_X, process_train_Y, weights, biases, learning_rate=0.01):
    """
    Update the weights and biases for the model using gradient descent.
    - process_train_X: the training dataset of images
    - process_train_Y: the training dataset of labels
    - weights: the weights of the model
    - biases: the biases of the model
    - learning_rate: the learning rate for the update
    """
    batched_train_data, batched_train_labels = split_in_batches(process_train_X, process_train_Y)
    
    for batch_data, batch_labels in zip(batched_train_data, batched_train_labels):
        # Compute the logits
        logits = forward(batch_data, weights, biases)
        
        # Compute the probabilities
        probabilities = softmax(logits)
        # Compute the error (Target - y)
        error = batch_labels - probabilities

        # Compute the gradients for weights and biases
        weights_gradient = np.dot(batch_data.T, error) / len(batch_data)
        biases_gradient = np.sum(error,axis=0) / len(batch_data)
    
        # Update the weights and biases
        weights -= learning_rate * weights_gradient
        biases -= learning_rate * biases_gradient

    return weights, biases

In [None]:
def update_learning_rate(initial_lr, epoch, decay):
    return initial_lr / (1 + decay * epoch)


In [None]:

def calculate_accuracy(data, labels, weights, biases):
    """
    Calculate the accuracy of the model.
    - data: the dataset (either train or test)
    - labels: the true labels
    - weights: the weights of the model
    - biases: the biases of the model
    """
    # Compute logits (raw scores for each class)
    logits = forward(data, weights, biases)
    
    # Compute probabilities (softmax output)
    probabilities = softmax(logits)
    
    # Get the predicted class for each data point
    predicted_classes = np.argmax(probabilities, axis=1)
    
    # Get the true class from the labels (assuming labels are one-hot encoded)
    true_classes = np.argmax(labels, axis=1)
    
    # Calculate how many predictions are correct
    correct_predictions = np.sum(predicted_classes == true_classes)
    
    # Calculate accuracy as a percentage
    accuracy = correct_predictions / len(labels) * 100
    
    return accuracy


In [None]:
# obtain train and test raw data
train_X, train_Y = download_mnist(True)
test_x, test_y = download_mnist(False)

# process data ,normalizing 
process_train_X , process_train_Y = process_data(train_X,train_Y)
process_test_x,process_test_y=process_data(test_x,test_y)
# first time we initialize random weights for training
weights,biases = initialize_parameters(784,10) # 784 is the number of pixels in the image and 10 is the number of classes



In [99]:
# Train the model for one epoch
# Încărcare weights și biases din fișier
weights = np.load("weights.npy")
biases = np.load("biases.npy")

num_epochs= 100
for epoch in range(num_epochs):
    print(f"Epoch {epoch + 1}/{num_epochs}")
    # Actualizează learning rate-ul
    # learning_rate = update_learning_rate(initial_lr, epoch, decay)

    # Antrenează modelul și actualizează weights și biases
    epoch_loss = train_epoch(process_train_X, process_train_Y, weights, biases)
    weights, biases = update_weights(process_train_X, process_train_Y, weights, biases, 0.001)
    # print("epoch ",epoch,"weights",weights)
    # print("epoch",epoch,"biases",biases)
    
    print(f"Loss after epoch {epoch + 1}: {epoch_loss}")
   

# Calculează acuratețea pe setul de antrenament
train_accuracy = calculate_accuracy(process_train_X, process_train_Y, weights, biases)   
print(f"Train Accuracy: {train_accuracy}%")
# print("learning rate",learning_rate)
# Salvare weights și biases într-un fișier



Epoch 1/100
Epoch loss: nan
Loss after epoch 1: nan
Epoch 2/100
Epoch loss: nan
Loss after epoch 2: nan
Epoch 3/100
Epoch loss: nan


KeyboardInterrupt: 

In [97]:
# Salvare weights și biases într-un fișier
# np.save("weights.npy", weights)
# np.save("biases.npy", biases)


Epoch loss: 2.3064732551574707 

Epoch loss: 2.306473731994629

Epoch loss: 2.29582142829895

Epoch loss: 2.2958221435546875


In [None]:
# %pip install --upgrade certifi

In [None]:

# # Example batch of one-hot encoded labels (batch_size=3, num_classes=4)
# labels = np.array([
#     [0, 0, 1, 0],
#     [0, 1, 0, 0],
#     [1, 0, 0, 0]
# ])
# 
# # Example batch of predicted probabilities (batch_size=3, num_classes=4)
# probabilities = np.array([
#     [0.1, 0.2, 0.6, 0.1],
#     [0.3, 0.4, 0.2, 0.1],
#     [0.7, 0.1, 0.1, 0.1]
# ])
# 
# # Compute the cross-entropy loss for each image
# cross_entropy_loss = -np.sum(labels * np.log(probabilities), axis=1)
# 
# # Compute the average loss over the batch
# average_loss = np.mean(cross_entropy_loss)
# 
# print("Cross-Entropy Loss for each image:", cross_entropy_loss)
# print("Average Cross-Entropy Loss for the batch:", average_loss)

In [None]:
# Sample input data
# process_train_X = np.array([[0.5, 0.2, 0.1], [0.9, 0.7, 0.3]])
# process_train_Y = np.array([[1, 0, 0], [0, 1, 0]])
# 
# # Initial weights and biases
# weights = np.array([[0.1, 0.2, 0.3], [0.4, 0.5, 0.6], [0.7, 0.8, 0.9]])
# biases = np.array([[0.1, 0.2, 0.3]])
# 
# # Learning rate
# learning_rate = 0.01
# 
# # Define the required functions
# # def split_in_batches(data, labels, batch_size=2):
# #     return [data], [labels]
# 
# # def compute_logits(data, weights, biases):
# #     return np.dot(data, weights) + biases
# # 
# # def softmax(logits):
# #     exp_logits = np.exp(logits - np.max(logits, axis=1, keepdims=True))
# #     return exp_logits / np.sum(exp_logits, axis=1, keepdims=True)
# # 
# # # Function to test
# # def update_weights(process_train_X, process_train_Y, weights, biases, learning_rate=0.01):
# #     batched_train_data, batched_train_labels = split_in_batches(process_train_X, process_train_Y)
# # 
# #     for batch_data, batch_labels in zip(batched_train_data, batched_train_labels):
# #         logits = compute_logits(batch_data, weights, biases)
# #         probabilities = softmax(logits)
# #         error = batch_labels - probabilities
# #         weights_gradient = np.dot(batch_data.T, error) / len(batch_data)
# #         biases_gradient = np.sum(error, axis=0) / len(batch_data)
# #         weights -= learning_rate * weights_gradient
# #         biases -= learning_rate * biases_gradient
# #     return weights, biases
# 
# # Run the test
# updated_weights, updated_biases = update_weights(process_train_X, process_train_Y, weights, biases, learning_rate)
# 
# # Print the results
# print("Updated weights:")
# print(updated_weights)
# print("Updated biases:")
# print(updated_biases)