In [1]:
%matplotlib inline

In [2]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

In [4]:
import os
import tensorflow as tf

os.environ["TF_CPP_MIN_LOG_LEVEL"] = "2"

In [5]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Input, Dense

In [6]:
import torch

In [11]:
from torcheval.metrics.functional import multiclass_accuracy

In [7]:
from sklearn.datasets import load_iris

In [9]:
print("Num GPUs Available: ", len(tf.config.list_physical_devices('GPU')))

In [10]:
torch.cuda.is_available()

# Intro to Deep Learning

## Live Demo

In [12]:
# python basic operation
a = 10
b = 15
# result is of type int
(3 * a + 4 * b) ** 2

In [13]:
# python list manipulation similar to numpy
a = [10, 17, 28, 13, 12]
b = [22, 12, 55, 11, 7]
# result is type list
[(3 * x + 4 * y) for x, y in zip(a, b)]

In [14]:
# numpy version with vectorization
a = np.array([10, 17, 28, 13, 12])
b = np.array([22, 12, 55, 11, 7])
print((3 * a + 4 * b) ** 2)
print(type((3 * a + 4 * b) ** 2))

In [15]:
# tensorflow version
a = tf.constant([10, 17, 28, 13, 12])
b = tf.constant([22, 12, 55, 11, 7])
print((3 * a + 4 * b) ** 2)
print(type((3 * a + 4 * b) ** 2))

In [16]:
# torch version
a = torch.tensor([10, 17, 28, 13, 12])
b = torch.tensor([22, 12, 55, 11, 7])
print((3 * a + 4 * b) ** 2)
print(type((3 * a + 4 * b) ** 2))

In [17]:
# if we want to pass a clean python function to tensorflow, we decorate it (tensorflow 2.)
def tf_input_decorator(func):
    def wrapper(*args, **kwargs):
        # Convert all positional arguments to tensors
        tf_args = [tf.convert_to_tensor(arg) for arg in args]
        # Convert all keyword arguments to tensors
        tf_kwargs = {key: tf.convert_to_tensor(value) for key, value in kwargs.items()}
        # Call the original function with TensorFlow objects
        return func(*tf_args, **tf_kwargs)
    return wrapper

In [18]:
@tf_input_decorator
def my_func(a, b):
    return (3 * a + 4 * b) ** 2

In [19]:
a = [10, 17, 28, 13, 12]
b = [22, 12, 55, 11, 7]
my_func(a, b)

In [20]:
iris_df = load_iris()

In [21]:
attrs, labels = iris_df.data, iris_df.target

In [22]:
(attrs.shape[1], )

In [23]:
len(set(labels))

In [24]:
model_tf = Sequential([
    Input((attrs.shape[1],)), # input layer, shape passed count of the attributes passed as tuple
    # no hidden layers
    Dense(len(set(labels))) # output layer
])

We expect 3 logistic regressions with 4 input params each and a bias, total 15 elements for inpout and 3 for output

In [25]:
model_tf.summary()

Must define proper loss function in order to set the model as regressor or classifier

Crossentropy is the classification loss function. Binary (for 2 classes) and categorigal (for > 2 classes). If the data is 'normal', this means that all the classes are listed in one column, before one-hot-encoding, must use 'sparse CE', if the data is like OHE, then use 'categorical CE'

In [26]:
model_tf.compile(loss='sparse_categorical_crossentropy')

The model expects probas, this means that it has to sum to 1. We need activation function

In [27]:
model_tf = Sequential([
    Input((attrs.shape[1],)), # input layer, shape passed count of the attributes passed as tuple
    # no hidden layers
    Dense(len(set(labels)), activation='softmax') # output layer
])

If activation is not stated, the model will collapse to basic linear regression. **softmax** is used with more than 1 class. If only one class - **sigmoid**. 

In [28]:
model_tf.compile(loss='sparse_categorical_crossentropy', optimizer='adam')

**optimizer** or solver is the algorithm used for gradient descent.


In [29]:
model_tf.summary()

Then we fit the model

In [30]:
model_tf.fit(attrs, labels)

Output Breakdown
1. 5/5

Indicates the number of batches processed out of the total number of batches in the epoch.
Here, 5 is both the current batch count and the total batch count, meaning the training data was split into 5 batches for this epoch.

2. [==============================]

A visual representation of the progress of the current epoch. The bar fills up as training progresses through the batches.

3. - 2s

The time taken to complete the current epoch (in this case, 2 seconds).

4. 5ms/step

The average time (in milliseconds) taken to process one batch (or step) during training.

5. loss: 4.1142

The loss value calculated at the end of the epoch.
This value is the output of the loss function used during training and reflects how well the model is performing on the training data. A lower value generally indicates better performance.

6. <keras.callbacks.History at 0x7f4a70470250>

After training, Keras returns a History object, which contains the details of the training process, such as loss values and metrics for each epoch.
The memory address (0x7f4a70470250) indicates where this History object is stored in memory.

In [31]:
history = model_tf.fit(attrs, labels)

The fit is partial fit, so every time we run a 'fit' it is not reseting but instead fitting the data to the model once again. The loss function is being reduced. If we increase the **batch** per epoch, the result is more processed batched per epoch. 

In [32]:
model_tf.fit(attrs, labels, batch_size=8)

This is the 3rd training of the model on the data. Each training is called **epoch**. We set the epoch so it is done automatically

In [33]:
model_tf.fit(attrs, labels, batch_size=8, epochs=1000)

In order to initialize the model again, we need to clear the session. If we run the basic cell, the model number is increased and previous model stored in memory. It is not good because the memory space is important.

In [34]:
model_tf = Sequential([
    Input((attrs.shape[1],)), # input layer, shape passed count of the attributes passed as tuple
    # no hidden layers
    Dense(len(set(labels)), activation='softmax') # output layer
])

In [35]:
model_tf.summary()

In [36]:
# clearing the session
tf.keras.backend.clear_session()

In [37]:
model_tf = Sequential([
    Input((attrs.shape[1],)), # input layer, shape passed count of the attributes passed as tuple
    # no hidden layers
    Dense(len(set(labels)), activation='softmax') # output layer and activation function
])

In [38]:
model_tf.summary()

In [39]:
model_tf.compile(loss='sparse_categorical_crossentropy', optimizer='adam')

In [40]:
history = model_tf.fit(attrs, labels, batch_size=8, epochs=200)

In [41]:
plt.plot(history.history['loss'])
plt.xlabel('Epoch')
plt.ylabel('Loss')
plt.title('Training Loss Over Epochs')
plt.show()

In [42]:
tf.argmax(model_tf.predict(attrs), axis=1)

In [43]:
labels

We can add metrics while compiling the model. The metrics will be displayed and added to the **history**. It is providing the score of the model against the training set during training.

In [44]:
model_tf.compile(loss='sparse_categorical_crossentropy', optimizer='adam', metrics=['accuracy'])

In [45]:
model_tf.fit(attrs, labels, batch_size=8)

In [46]:
model_tf.evaluate(attrs, labels) # score in scikit learn

Going deep in TensorFlow by adding **Dense** layer with **relu** activation

In [65]:
model_tf = Sequential([
    Input((attrs.shape[1],)), # input layer, shape passed count of the attributes passed as tuple
    Dense(20, activation='relu'), # hidden layer
    Dense(10, activation='relu'), # hidden layer
    Dense(len(set(labels)), activation='softmax') # output layer and activation function
])

In [66]:
model_tf.summary()

In [67]:
model_tf.compile(loss='sparse_categorical_crossentropy', optimizer='adam', metrics=['accuracy'])

In [68]:
model_tf.fit(attrs, labels, batch_size=8, epochs=100)

A model constructed this way would require less *epochs* to reach minimum loss. Therefore, the deeper model has bigger capacity and is training a lot faster

### Pytorch

Pytorch has an OOP based API. Must convert all the values to tensors.

In [56]:
class LogisticRegressionPT(torch.nn.Module):
    def __init__(self):
        super(LogisticRegressionPT, self).__init__()
        self.layer = torch.nn.Linear(n_features, n_classes)

    def forward(self, x):
        x = torch.nn.functional.softmax(self.layer(x), dim=1)
        return x

In [57]:
n_features = attrs.shape[1]
n_classes = len(set(labels))
pt_model = LogisticRegressionPT()

In [58]:
print(pt_model)

In [59]:
# conveert the data to pytorch tensors
attrs_pt = torch.FloatTensor(attrs)
labels_pt = torch.LongTensor(labels)

In [60]:
learning_rate = 0.01
criterion = torch.nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(pt_model.parameters(), lr=learning_rate)

In [61]:
# training the model by creating a function
def train(model, optimizer, criterion, X, y, num_epochs, train_losses):
    for epoch in range(num_epochs):
        optimizer.zero_grad()
        output_train = model(X) # forward

        loss_train = criterion(output_train, y)
        loss_train.backward() # backward
        optimizer.step() # weights update

        train_losses[epoch] = loss_train.item()

        if (epoch + 1) % 50 == 0:
            print(f'Epoch: {epoch + 1} / {num_epochs}, Loss: {loss_train.item():.4f}')

In [62]:
num_epochs = 200
train_losses = np.zeros(num_epochs)

train(pt_model, optimizer, criterion, attrs_pt, labels_pt, num_epochs, train_losses)

Fast fitting can be achieved using **PyTorch lightning**. This is analogue to **keras**. It is additional package that must be installed. In addition, saves writing of code.

The evaluation is done:

In [63]:
predictions = torch.argmax(pt_model.forward(attrs_pt), dim=1)
multiclass_accuracy(predictions, labels_pt)

Adding layer of neurons is done in tha class. However, the output of the first layer is the input of the second layer and so on. The final layer has the final output. The function of the final layer is kept **softmax**. The rest are changed to **relu**

In [64]:
class LogisticRegressionPT(torch.nn.Module):
    def __init__(self):
        super(LogisticRegressionPT, self).__init__()
        self.layer1 = torch.nn.Linear(n_features, 20)
        self.layer2 = torch.nn.Linear(20, 10)
        self.layer3 = torch.nn.Linear(10, n_classes)

    def forward(self, x):
        x = torch.nn.functional.relu(self.layer1(x), dim=1)
        x = torch.nn.functional.relu(self.layer2(x), dim=1)
        x = torch.nn.functional.softmax(self.layer3(x), dim=1)
        return x