In [1]:
# Import dependency 
import numpy as np 
import torch 
import torch.optim as optim
import torch.nn as nn 
# Import the tensorboard used dependency
from torch.utils.tensorboard import SummaryWriter

In [2]:
# Data generation 
true_b = 1
true_w = 2 
N = 100

# set the random seed for numpy 
np.random.seed(43)

x= np.random.rand(N,1)
epsilon = (.1 * np.random.rand(N,1))

y = true_b + true_w *x + epsilon

In [3]:
#Generate training and validating sets
idx = np.arange(N)

# Use first 80 random indices for train 
train_idx = idx[:int(N*.8)]
val_idx = idx[int(N*.8):]

# Generate train and validation sets
x_train, y_train = x[train_idx], y[train_idx]




In [4]:
# Data preparation 

device = 'cuda' if torch.cuda.is_available() else 'cpu'

# Transform data from numpy array to torch tensor
x_train_tensor = torch.as_tensor(x_train).float().to(device)
y_train_tensor = torch.as_tensor(y_train).float().to(device)

In [5]:
# Build a Dataset
from torch.utils.data import Dataset

class CustomDataset(Dataset):
    def __init__(self, x_tensor, y_tensor):
        self.x = x_tensor
        self.y = y_tensor
    
    def __getitem__(self, index):
        return (self.x[index], self.y[index])
    
    def __len__(self):
        return len(self.x)

train_data = CustomDataset(x_train_tensor, y_train_tensor)
print(train_data[0])
print(len(train_data))

(tensor([0.1151]), tensor([1.2404]))
80


In [6]:
#Use the `random split` to split data
import torch.utils.data as data 

torch.manual_seed(42)

#Build tensors from numpy arrays BEfore split
x_tensor = torch.as_tensor(x).float()
y_tensor = torch.as_tensor(y).float()

# Build the datasets containing all data points
dataset = CustomDataset(x_tensor, y_tensor)

# Performs the split
ratio = .8
n_total = len(dataset)
n_train = int(n_total*ratio)
n_val = n_total - n_train

train_data, val_data = data.random_split(dataset, [n_train, n_val])

print(len(train_data))

80


In [7]:
from torch.utils.data import DataLoader

# Build a data loader that yields mini-batches of size 2
train_loader = DataLoader(
        dataset=train_data,
        batch_size=2,
        shuffle=True
)
print(train_loader)

<torch.utils.data.dataloader.DataLoader object at 0x7fe0c814e520>


In [8]:
# Create a data loader for the validation set
val_loader = DataLoader(
    dataset=val_data,
    batch_size=16
)

In [9]:
print(next(iter(train_loader)))

[tensor([[0.6953],
        [0.0569]]), tensor([[2.4350],
        [1.1967]])]


In [10]:
#Define the train step

def make_train_step(model, loss_fn, optimizer):
    # Builds function that performs a step in the train loop 
    def perform_train_step(x,y):
        
        # Set the model to TRAIN mode
        model.train()
        
        # Step1: Compute the model's predicition - forward pass
        yhat = model(x)
        
        # Step2: Compute the loss
        loss = loss_fn(yhat, y)
        
        # Step3: Compute gradients for "b" and "w" parameters
        loss.backward()
        
        # Step4: Updates parameters using gradients and the learning rate
        optimizer.step()
        optimizer.zero_grad()
        
        #print(model.state_dict())
        # Return the loss 
        return loss.item()
    #Return the function that will be called inside the train loop
    return perform_train_step

In [11]:
# Define the validation step 
def make_val_step(model, loss_fn):
    # Build function that perform a step in the validation loop
    def perform_val_step(x,y):
        # Set the model to evaluation mode
        model.eval()
        
        # Step 1: Compute the model's prediciton-forward pass
        yhat = model(x)
        
        # Step 2: Compute the loss
        loss = loss_fn(yhat, y)
        
        # We don't need to compute gradients and update the parameter here
        # since we don't update parameters during evaluation.
        return loss.item()
    return perform_val_step

In [12]:
# Model config 

# Define the model 
class ManualLinearRegression(nn.Module):
    def __init__(self):
        super().__init__()
        self.linear = nn.Linear(1,1)
    def forward(self,x):
        return self.linear(x)


# Set learning rate 
lr = 0.1

torch.manual_seed(42)

# Create a model and send it to the device 
model = ManualLinearRegression().to(device)
print(model.state_dict())

# Define a SGD optimizer to update the parameters 
optimizer = torch.optim.SGD(model.parameters(), lr=lr)

# Define a MSE loss function 
loss_fn = nn.MSELoss(reduction="mean")


# Create a train_step 
train_step = make_train_step(model, loss_fn, optimizer)

# Create a val_step
val_step = make_val_step(model, loss_fn)

# Create a summary writer to interface with Tensorboard
writer = SummaryWriter('runs/simple_linear_regression')

# Add graph
# Fetch tuple of feature and label 
dummy_x, dummy_y = next(iter(train_loader))

# Since our model was sent to device, we need to do the same
writer.add_graph(model, dummy_x.to(device))

OrderedDict([('linear.weight', tensor([[0.7645]])), ('linear.bias', tensor([0.8300]))])


In [13]:
# Construct a function for mini_batch
def mini_batch(device, dataloader, step):
    """A function th do mini-batch training.
    
    Arguments
    ---------
        device: where to send the data
        dataloader: draw the mini-batch
        step: the training step fucntion
    """
    
    mini_batch_losses = []
    for x_batch, y_batch in dataloader:
        # Send the mini-batch data to the device
        x_batch = x_batch.to(device)
        y_bacth = y_batch.to(device)
        
        # Perform the train step
        mini_batch_loss = step(x_batch, y_batch)
        mini_batch_losses.append(mini_batch_loss) 
    
    #Compute the average loss over all mini-batches
    loss = np.mean(mini_batch_losses)
    return loss

In [14]:
# Model training

n_epochs = 1000
losses = []
val_losses = []
for epoch in range(n_epochs):
    
    # Call the mini batch function
    loss = mini_batch(device, train_loader, train_step)
    losses.append(loss)
    
    # Validation - no gradients in validation
    # Use `torch.no_grad()` this context manager 
    # to disable any gradient computation
    with torch.no_grad():
        val_loss = mini_batch(device, val_loader, val_step)
        val_losses.append(val_loss)
        
    # Add scalars 
    writer.add_scalars(
       main_tag = 'loss',
       tag_scalar_dict = {
           'training': loss,
           'validation': val_loss
       },
        global_step=epoch)

#close the writer
writer.close()

In [15]:
# Model's parameter before training 
print(model.state_dict())

OrderedDict([('linear.weight', tensor([[1.9951]])), ('linear.bias', tensor([1.0443]))])


In [16]:
# Load the tensorboard notebook extension
%load_ext tensorboard 

In [17]:
# Running the Tensorboard notebook extension
%tensorboard --logdir runs/simple_linear_regression

In [20]:
# Check model's parameters
#print(model.state_dict())

In [14]:
print(list(model.parameters()))

[Parameter containing:
tensor([[2.0054]], requires_grad=True), Parameter containing:
tensor([1.0554], requires_grad=True)]


In [21]:
print(val_losses)

[0.013594414107501507, 0.00614015688188374, 0.0029982510022819042, 0.0017715154099278152, 0.0013672834902536124, 0.0014056369254831225, 0.001380442758090794, 0.0013766773045063019, 0.0009196145401801914, 0.001146456750575453, 0.0010036606690846384, 0.0012659892381634563, 0.0009606099338270724, 0.0011863455292768776, 0.0010928059346042573, 0.0009636757604312152, 0.0009554814605508, 0.001381834561470896, 0.0011494726059027016, 0.0008877780055627227, 0.0016226691077463329, 0.0015697689959779382, 0.0008746515086386353, 0.0013831548858433962, 0.0011265744105912745, 0.001196929399156943, 0.001670920115429908, 0.0009932741522789001, 0.0015094078262336552, 0.001701157249044627, 0.0010661108826752752, 0.0010211825428996235, 0.0010415800788905472, 0.001434624195098877, 0.0014004451804794371, 0.0013494285522028804, 0.0011520017578732222, 0.0017580457497388124, 0.0011886984575539827, 0.0015203332877717912, 0.0010357198189012706, 0.0010937144106719643, 0.0013684626901522279, 0.001031870546285063, 0

In [2]:
# Contruct a class and define the constructor 
class StepbyStep(object):
    def __init__(self,model,loss_fn, optimizer):
        # Define some attributes to use them later 
        self.model = model 
        self.loss_fn = loss_fn 
        self.optimizer = optimizer
        
        # Automatically decided the device to use
        self.device = 'cuda' if torch.cuda.is_available() else 'cpu'
        # Send the model to the device
        self.model.to(self.device)
        
        # These attributes are defined here, but since they are 
        # not available at the moment of creation, we keep them None
        self.train_loader = None
        self.val_loader = None 
        self.writer = None
        
        # These attributes are going to be computed internally
        self.losses = []
        self.val_losses = []
        self.total_epochs = 0 
        
        # Create the train_step function for our model/loss function and optimizer 
        # there are no arguments there. It makes use of the class attributes directly 
        self.train_step = self._make_train_step()
        
        # Create the val_step function for model and loss
        self.val_step = self._make_val_step()
        
    def to(self, device):
        """Function to let the user specify the device."""
        self.device = device
        self.model.to(self.device)
        
    
    def set_loaders(self, train_loader, val_loader=None):
        """Let the user set the loaders."""
        self.train_loader = train_loader
        self.val_loader = val_loader
    
    def set_tensorboard(self, name, folder='runs'):
        """Let the user to set the tensorboard."""
        suffix = datetime.datetime.now().strftime('%Y%m%d%H%M%S')
        self.writer = SummaryWriter(
            '{}/{}_{}'.format(folder, name, suffix)
        )
        
    def _make_train_step(self):
        # Builds function that performs a step in the train loop 
        def perform_train_step(x,y):
        
            # Set the model to TRAIN mode
            self.model.train()
        
            # Step1: Compute the model's predicition - forward pass
            yhat = self.model(x)
        
            # Step2: Compute the loss
            loss = self.loss_fn(yhat, y)
        
            # Step3: Compute gradients for "b" and "w" parameters
            loss.backward()
        
            # Step4: Updates parameters using gradients and the learning rate
            self.optimizer.step()
            self.optimizer.zero_grad()
        
            #print(model.state_dict())
            # Return the loss 
            return loss.item()
        #Return the function that will be called inside the train loop
        return perform_train_step
    
    def _make_val_step(self):
        # Build function that perform a step in the validation loop
        def perform_val_step(x,y):
            # Set the model to evaluation mode
            self.model.eval()
        
            # Step 1: Compute the model's prediciton-forward pass
            yhat = self.model(x)
        
            # Step 2: Compute the loss
            loss = self.loss_fn(yhat, y)
        
            # We don't need to compute gradients and update the parameter here
            # since we don't update parameters during evaluation.
            return loss.item()
        return perform_val_step
    
    def _mini_batch(self, validation=False):
        """Could be used for training and validation.
        
        we use the argument `validation` to define which loader be used 
        """
        if validation:
            dataloader = self.val_loader
            step = self.val_step
        else:
            dataloader = self.train_loader
            step = self.train_step
        
        if dataloader is None:
            return None 
        
        mini_batch_losses = []
        for x_batch, y_batch in dataloader:
            # Send the mini batch to device
            x_batch = x_batch.to(self.device)
            y_batch = y_batch.to(self.device)
            
            # Perform the step
            mini_batch_loss = step(x_batch, y_batch)
            mini_batch_losses.append(mini_batch_loss)
        # Compute the average loss over all mini-batches
        loss = np.mean(mini_batch_losses)
        return loss
    
    def set_seed(self, seed=42):
        """Let the user to set the seed for reproducibility."""
        # https://pytorch.org/docs/stable/notes/randomness.html
        torch.backends.cudnn.derministic = True
        torch.backends.cudnn.benchmark =False
        
        #Sets the seed for generating random numbers.
        torch.manual_seed(seed)
        np.random.seed(seed)

    def train(self, n_epochs, seed=42):
        """Define the train loops."""
        
        # Set the seed for reproducibility
        self.set_seed(seed)
        
        losses = []
        val_losses = []
        for epoch in range(n_epochs):
            # Keep track of the number of epochs
            self.total_epochs+=1
            
            # Call the mini batch function
            loss = self._mini_batch(validation=False)
            self.losses.append(loss)
    
            # Validation - no gradients in validation
            # Use `torch.no_grad()` this context manager 
            # to disable any gradient computation
            with torch.no_grad():
                val_loss = self._mini_batch(validation=True)
                self.val_losses.append(val_loss)

            # If a SummarWriter has been set...
            if self.writer:
                scalars = {'training':loss}
                if val_loss is not None:
                    scalars.update({'validation': val_loss})
                    
                # Add scalars 
                self.writer.add_scalars(
                   main_tag = 'loss',
                   tag_scalar_dict = scalars,
                global_step=epoch)
        
        if self.writer:
            #Flushes the writer
            self.writer.flush()
            
    def save_checkpoint(self, filename):
        """Builds dictionary with all elements for resuming training"""
        checkpoint ={
            'epoch': self.total_epochs,
            'model_state_dict': self.model_state_dict(),
            'optimizer_state_dict': self.optimizer_state_dict(),
            'loss': self.losses,
            'val_loss':sel.val_losses
        }
        torch.save(checkpoint, filename)
        
    def load_checkpoint(self, filename):
        """Function to let the user load the checkpoint."""
        
        # Load the checkpoint file to dictionary
        checkpoint = torch.load(filename)
        
        # Restore the saved parameter 
        self.model.load_state_dict(checkpoint['model_state_dict'])
        self.optimizer.load_state_dict(checkpoint['optimizer_state_dict'])
        self.total_epochs=checkpoint['epoch']
        self.losses=checkpoint['loss']
        self.val_losse=checkpoint['val_loss']
        
    def predict(self,x):
        """Construct the predict funciton."""
        
        # Set the model to eval mode for prediction
        self.model.eval()
        
        # Take a numpy input and make it a float tensor 
        x_tensor = torch.as_tensor(x).float()
        
        # Send input to device and use model for prediction
        y_hat_tensor = self.model(x_tensor.to(self.device))
        
        # Set the model back to train mode
        self.mode.train()
        
        # Detaches it, bring it to CPU and back to numpy
        return y_hat_tensor.detach().cpu().numpy()
    
    def plot_lossed(self):
        """Draw some basic plots."""
        fig = plt.figure(figsiz=(10,4))
        
        plt.plot(self.losses, label="Training Loss", c='b')
        
        if self.val_loader:
            plt.plot(self.val_losses, label="Validation Loss", c='r')
        plt.yscale('log')
        plt.xlabel('Epochs')
        plt.ylabel('Loss')
        plt.legend()
        plt.tight_layout()
        return fig 
    
    def add_graph():
        """Add tensorboard used graph."""
        if self.train_loader and self.writer:
            x_dummy, y_dummy =next(iter(self.train_loader))
            self.writer.add_graph(self.model, x_dummy.to(self.device))
            