In [1]:
import torch
import numpy as np
import pandas as pd

In [2]:
import torch.nn as nn

In [4]:
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split

In [11]:
from torch.utils.data import DataLoader, Dataset, TensorDataset

In [7]:
iris = load_iris()

In [8]:
X = iris['data']
y = iris['target']

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=1./3, random_state=1)

In [10]:
X_train_norm = (X_train - np.mean(X_train))/np.std(X_train)
X_train_norm = torch.from_numpy(X_train_norm).float()
y_train = torch.from_numpy(y_train)

In [12]:
train_ds = TensorDataset(X_train_norm, y_train)

torch.manual_seed(1)

batch_size = 2

train_dl = DataLoader(train_ds, batch_size, shuffle=True)

When it comes to saving and loading models - 3 core functions to be familiar with:
- torch.save: saves a serialized object to disk. (uses python's pickle utility for serialization. Model's tensors, and dictionaries of all kinds of objects can be saved using this function)
- torch.load: Uses pickle's unpickling facilities to deserialize pickled object files to memory. Fxn also facilitates the device to load the data into 

Whats a state_dict:
- In pytorch => learnable parameters (i.e. weights and biases) of a model are contained in the model's parameters (accessed with model.parameters()). A state dict is simply a python dictionary object that maps each layer to its parameter tensor. Note that only layers with learnable parameters and registered buffers have entries in the model's state_dict. Because state_dict objects they can be easily saved, updated, altered, and restored, adding a great deal of modularity to PyTorch models and optimizers.

In [3]:
class Model(nn.Module):
    def __init__(self, input_size, hidden_size, output_size):
        super().__init__()
        self.layer1 = nn.Linear(input_size, hidden_size)
        self.layer2 = nn.Linear(hidden_size, output_size)
    
    def forward(self, x):
        x = self.layer1(x)
        x = nn.Sigmoid()(x)
        x = self.layer2(x)
        return x

In [13]:
input_size = X_train_norm.shape[1]
hidden_size = 16
output_size = 3

model = Model(input_size, hidden_size, output_size)

In [15]:
# initialize everything else:
learning_rate = 0.001
loss_fn = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr= learning_rate)

In [16]:
# print the model's state_dict:
for param_tensor in model.state_dict():
    print(param_tensor,"\t",model.state_dict()[param_tensor].size())

layer1.weight 	 torch.Size([16, 4])
layer1.bias 	 torch.Size([16])
layer2.weight 	 torch.Size([3, 16])
layer2.bias 	 torch.Size([3])


In [17]:
# print the optimizer's state dict:
for var_name in optimizer.state_dict():
    print(var_name,"\t",optimizer.state_dict()[var_name])

state 	 {}
param_groups 	 [{'lr': 0.001, 'betas': (0.9, 0.999), 'eps': 1e-08, 'weight_decay': 0, 'amsgrad': False, 'maximize': False, 'foreach': None, 'capturable': False, 'differentiable': False, 'fused': None, 'decoupled_weight_decay': False, 'params': [0, 1, 2, 3]}]


Saving and Loading model for inference:

In [None]:
# saving the model
# torch.save(model.state_dict(),PATH)


# Loading the saved model:
# model = ### => initialize
# model.load_state_dict(torch.load(PATH, weights_only=True))
# model.eval()

Saving/Loading the entire model:


In [None]:
# torch.save(model, PATH)

# Loading the model:
# model = torch.load(PATH, weights_only=False)
# model.eval()

In [19]:
num_epochs = 100
loss_hist = [0] * num_epochs
accuracy_hist = [0] * num_epochs

for epoch in range(num_epochs):
    for x_batch, y_batch in train_dl:
        pred = model(x_batch)
        y_batch = y_batch.long()
        loss = loss_fn(pred, y_batch)
        loss.backward()
        optimizer.step()
        optimizer.zero_grad()

        loss_hist[epoch] += loss.item()*y_batch.size(0)
        is_correct = (torch.argmax(pred, dim=1)==y_batch).float()
        accuracy_hist[epoch]+= is_correct.sum()
    loss_hist[epoch]/=len(train_dl.dataset)
    accuracy_hist[epoch]/=len(train_dl.dataset)

Try to verify the model architecture by calling model.eval()

Choosing activation functions for multilayer neural networks:

In [20]:
X = np.array([1, 1.4, 2.5])
w = np.array([0.4, 0.3, 0.5])

In [28]:
def net_input(X, w):
    return np.dot(X, w)

def logistic(z):
    return 1.0/(1.0 + np.exp(-z))

def logistic_activation(X, w):
    z = net_input(X, w)
    return logistic(z)

print(f'P(y=1|x) = {logistic_activation(X,w):.3f}')

P(y=1|x) = 0.888


In [24]:
a = np.array([[1,2,3],[4,5,6]])
b = np.array([4,5,6])

In [27]:
np.dot(a,b.T)

array([32, 77])