<a href="https://colab.research.google.com/github/Btere/btereml/blob/main/Increasing_The_Hidden_layers.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import torch
from torch.utils.data import Dataset, DataLoader
from torchvision import datasets, transforms
import torch.nn as nn
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from torchvision.datasets.mnist import MNIST

%matplotlib inline
device = "cuda" if torch.cuda.is_available() else "cpu"
print(torch.__version__)

In [None]:
train_data = datasets.MNIST(root='path/to/dataset', train = True, download = True)
tr_images = train_data.data
tr_targets = train_data.targets

In [None]:
test_data = datasets.MNIST(root='path/to/dataset', train =False, download =True)
val_images = test_data.data
val_targets = test_data.targets

In [None]:
train_data.classes

In [None]:
img=tr_images[39999,:,:].shape

In [None]:
figure = plt.figure(figsize=(8, 8))
cols, rows = 5, 5

for i in range(1, cols * rows + 1):
    sample_idx = torch.randint(len(train_data), size=(1,)).item()
    img, label = train_data[sample_idx]
    figure.add_subplot(rows, cols, i)
    plt.axis("off")
    plt.imshow(np.squeeze(img), cmap="gray")
plt.show()

# Flatten the image:
flattening is used to reduce the dimensionality of the input to a layer. A dense layer expects a row vector which we could say is convenient equivalent of Numpy's reshape. It allows us to do fast and memory efficient reshaping, slicing and element-wise operations.

In [None]:
#preprocessing of the data.
class Mnist_dataset(Dataset):
    def __init__(self, x, y):
        x = x.float()
        x = x.view(-1,28*28)
        self.x, self.y = x, y

    def __getitem__(self, ix):
        x, y = self.x[ix], self.y[ix]
        return x.to(device), y.to(device)

    def __len__(self):
        return len(self.x)


In [None]:
def get_data():
    train = Mnist_dataset(tr_images, tr_targets)
    trn_dl = DataLoader(train, batch_size=1000, shuffle=True)
    val = Mnist_dataset(val_images, val_targets)
    val_dl = DataLoader(val, batch_size=len(val_images), shuffle=False)
    return trn_dl, val_dl

### Building The custom Network

In [None]:

from torch.optim import SGD, Adam

def build_model():
    model = nn.Sequential(
        nn.Linear(28 * 28, 900),     #torch.nn.Linear(in_features, out_features, bias=True, device=None, dtype=None)
        nn.ReLU(),
        nn.Linear(900, 900),
        nn.ReLU(),
        nn.Linear(900,900),
        nn.ReLU(),
        nn.Linear(900, 10)            # label =10, node in the hidden layer that is passed to output layer =1000
    ).to(device)
    #Defining loss function and optimizer
    loss_fn = nn.CrossEntropyLoss()
    optimizer = Adam(model.parameters(), lr=1e-2)
    return model, loss_fn, optimizer

In [None]:
build_model()

### Training the network
Here, we train the image classification model. Epoch: Is a single pass through the training data (60,000 images). The number of batches passed to the model until all the training data is covered.

In [None]:
# The training loop
#we used .item() to EXTRACTTHE LOSS VALUE AS A SCALER.
def train_batch(x, y, model, optimizer, loss_fn):
      model.train()
      prediction = model(x)
      batch_loss = loss_fn(prediction, y)
      batch_loss.backward()
      optimizer.step()
      optimizer.zero_grad()
      return batch_loss.item()

def accuracy(x, y, model):
    model.eval()
    with torch.no_grad():
        prediction = model(x)
    max_values, argmaxes = prediction.max(-1)
    is_correct = argmaxes == y
    return is_correct.cpu().numpy().tolist()

In [None]:
@torch.no_grad()
def val_loss(x, y, model):
    prediction = model(x)
    val_loss = loss_fn(prediction, y)
    return val_loss.item()

In [None]:
trn_dl, val_dl = get_data()
model, loss_fn, optimizer = build_model()

In [None]:
train_losses, train_accuracies = [], []
val_losses, val_accuracies = [], []
for epoch in range(5):
    print(epoch)
    train_epoch_losses, train_epoch_accuracies = [], []

    for ix, batch in enumerate(iter(trn_dl)):
        x, y = batch
        batch_loss = train_batch(x, y, model, optimizer, loss_fn)
        train_epoch_losses.append(batch_loss)
    train_epoch_loss = np.array(train_epoch_losses).mean()

    for ix, batch in enumerate(iter(trn_dl)):
        x, y = batch
        is_correct = accuracy(x, y, model)
        train_epoch_accuracies.extend(is_correct)
    train_epoch_accuracy = np.mean(train_epoch_accuracies)

    for ix, batch in enumerate(iter(val_dl)):
        x, y = batch
        val_is_correct = accuracy(x, y, model)
        validation_loss = val_loss(x, y, model)
    val_epoch_accuracy = np.mean(val_is_correct)
    train_losses.append(train_epoch_loss)
    train_accuracies.append(train_epoch_accuracy)
    val_losses.append(validation_loss)
    val_accuracies.append(val_epoch_accuracy)


In [None]:
epochs = np.arange(5)+1
import matplotlib.ticker as mtick
import matplotlib.pyplot as plt
import matplotlib.ticker as mticker
%matplotlib inline
plt.subplot(211)
plt.plot(epochs, train_losses, 'bo', label='Training loss')
plt.plot(epochs, val_losses, 'r', label='Validation loss')
plt.gca().xaxis.set_major_locator(mticker.MultipleLocator(1))
plt.title('Training and validation loss when batch size is 1000')
plt.xlabel('Epochs')
plt.ylabel('Loss')
plt.legend()
plt.grid('off')
plt.show()
plt.subplot(212)
plt.plot(epochs, train_accuracies, 'bo', label='Training accuracy')
plt.plot(epochs, val_accuracies, 'r', label='Validation accuracy')
plt.gca().xaxis.set_major_locator(mticker.MultipleLocator(1))
plt.title('Training and validation accuracy when batch size is 1000')
plt.xlabel('Epochs')
plt.ylabel('Accuracy')
plt.gca().set_yticklabels(['{:.0f}%'.format(x*100) for x in plt.gca().get_yticks()])
plt.legend()
plt.grid('off')
plt.show()



In [None]:
#  Analysis of Result
# Red: VL
#Blue TL

#During training, the  model's parameter are updated in an attempt to increase the accuracy on both the training and validation data.
# IT is important to  monitor the validation accuracy to ensure that the model is not overfitting the training data.
# The training accuracy is the fraction of correctly classified examples in the training data. A high  accuracy indicate that the  model is
# making  good predictions on the training data.
# training loss: At the beginning, the training loss is typically high as the model starts with random weights. As the training progresses, the loss should decrease, indicating that the model is learning from the data. If the loss does not decrease,
#then what happens or what does it implies?
#Validation loss is when  the validation loss increase probably at first or decreases cos the model has seen similar data before, and with more training it does not increases.
# WHen there is overfitting, the training loss decreases, and and validation loss increases

# Validation Accuracy: The  validation accuracy is the fraction of correctly classifed examples in the validation data.

#Trained data, find the loss and accuracy. Then we passed a test data, check the loss(the error) and check the accuracy(the fraction of  correctly)
# classifed dataset.
# # Traning loss vs validation loss and training accuracy vs validation accuracy.
# The graph of training and validation loss in the context of machine learning represents how well a model is performing during the training process. Here's what each term means:

# 1. **Training Loss:**
#    - **Definition:** The training loss is a measure of how well the model is performing on the training dataset. It represents the error between the predicted values and the actual values during the training phase.
#    - **Graph Behavior:** In the beginning, the training loss is typically high as the model starts with random weights. As the training progresses, the loss should decrease, indicating that the model is learning from the data.

# 2. **Validation Loss:**
#    - **Definition:** The validation loss is a measure of how well the model is performing on a separate dataset that it has not seen during training. This dataset, called the validation set, serves as a proxy for new, unseen data.
#    - **Graph Behavior:** The validation loss is crucial for detecting overfitting. Initially, it might decrease along with the training loss. However, if the model starts memorizing the training data too much and loses its ability to generalize, the validation loss may start to increase even as the training loss continues to decrease.

# Here's a breakdown of the possible scenarios based on the graph:

# - **Ideal Scenario:**
#   - Both training and validation losses decrease steadily over time.
#   - This indicates that the model is learning from the training data and generalizing well to new, unseen data.

# - **Overfitting Scenario:**
#   - Training loss decreases, but validation loss increases.
#   - This suggests that the model is fitting the training data too closely and is not able to generalize well to new data.

# - **Underfitting Scenario:**
#   - Both training and validation losses remain high.
#   - The model is not able to capture the patterns in the training data, and it performs poorly on both the training and validation sets.

# Monitoring these loss curves is a common practice in training machine learning models to ensure they are learning effectively without overfitting or underfitting.
#When we iterate the first epoch, what happen to the loss function? When we update the  the weight, on each epoch what happen to the

#we train, predict with xtest, then find accuracy

# We have 60,0000 DS, we grouped(batch-size) = 1000, How many batches complete one epoch? 100


# check batch optimzation and mini batch training.


In [None]:
# Overfitting is a common issue in machine learning where a model learns the training data too well, including its noise and random fluctuations, to the extent that it negatively impacts the model's performance on new, unseen data. In other words, an overfit model performs well on the training data but fails to generalize effectively to new, unseen data.

# Key characteristics of overfitting:

# 1. **High Training Accuracy, Poor Generalization:**
#    - The model achieves high accuracy on the training dataset because it memorizes the training examples.
#    - However, when presented with new data (validation or test set), its performance is significantly worse.

# 2. **Complex Model:**
#    - Overfitting often occurs when the model is too complex, with too many parameters or features relative to the size of the training dataset.

# 3. **Capturing Noise:**
#    - The model captures not only the underlying patterns in the data but also noise, random fluctuations, or outliers present in the training data.

# 4. **Poor Generalization:**
#    - The overfit model fails to generalize well to new, unseen data, leading to poor performance in real-world scenarios.

# 5. **Validation Loss Plateau or Increase:**
#    - In the training-validation loss curve, the training loss may continue to decrease, but the validation loss either plateaus or starts to increase, indicating that the model is not improving on new data.

# Methods to address overfitting:

# 1. **Simplifying the Model:**
#    - Reduce the complexity of the model by reducing the number of parameters, features, or layers.

# 2. **Regularization:**
#    - Add regularization techniques like L1 or L2 regularization to penalize overly complex models.

# 3. **Dropout:**
#    - Use dropout, a regularization technique where randomly selected neurons are ignored during training to prevent reliance on specific neurons.

# 4. **More Data:**
#    - Increase the size of the training dataset to provide more diverse examples for the model to learn from.

# 5. **Cross-Validation:**
#    - Use techniques like cross-validation to assess the model's performance on multiple subsets of the data.

# Overfitting is a crucial challenge in machine learning, and finding the right balance between model complexity and generalization is essential for building effective models.

In [None]:
# understanding APi, API call and response or errors tht could occur.
#Absolutely! Your analogy with reading the Bible and making an API call through prayers is a creative way to understand the concept.

# In the same way, when you interact with an API to get information from a website or a third-party service:

# - **Reading the Bible:** This is like having access to the API documentation. It's a guide that tells you what kind of requests (prayers) you can make and what information (answers) you can expect.

# - **API Call through Prayers:** When you make an API call, it's like saying a prayer. You're expressing a specific request based on what the API documentation (Bible) allows.

# - **Response:** Just like you trust that your prayers will be answered, you trust that the API will send back a response. This response could be the information you requested or an indication that there's an issue (just like how you might sense something is wrong if your prayer isn't answered promptly).

# - **Checking Again:** If the response takes longer than expected or if there's an error, you might "check in" again with another prayer. In the tech world, this could involve reattempting the API call or troubleshooting any issues.

# So, the idea is similar—there's a communication process where you express a specific need, trust that it will be addressed, and handle any issues that might arise in the interaction. It's a great way to connect a technical concept with something more familiar and personal!