### Batches, Batch Normalization and Dropout

In this workbook you can experiment what you learnt about how to make batches out of your data, how to perform batch normalization and dropout

In [25]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import torch

Load the data from data/batches_norm_drop.csv, then take a look at them.

In [5]:
data=pd.read_csv('./data/batches_norm_drop.csv', header=None)

data.head()


Unnamed: 0,0,1,2
0,0.35014,4.248592,0
1,0.950728,3.528855,0
2,1.371517,3.149416,0
3,0.268221,4.337209,0
4,1.881996,1.515387,0


In [16]:
int(0.7* data.shape[0])

525

In [13]:
x, y=data.drop(columns=2), data[2]

Now you are ready to code your own function to create batches. If needed rewatch the video we provided in Eduflow.

**Extra challange:**    Are you able to split between train and test _**without**_ using sklearn?

In [26]:
from typing import Any, Tuple
def split_train_test(x:Any, y:Any, train_size: float =0.8, random_state: int =None) ->Tuple[torch.tensor]:
    if random_state:
        np.random.set_state(random_state)
    num_of_rows=x.shape[0]*train_size
    num_of_rows=int(num_of_rows)
    np.random.shuffle(x.values)
    np.random.shuffle(y.values)
    x_train, x_test=x.values[:num_of_rows], x.values[num_of_rows:]
    y_train, y_test=y.values[:num_of_rows], y.values[num_of_rows:]
    
    x_train = torch.tensor(x_train.astype(np.float32))
    x_test = torch.tensor(x_test.astype(np.float32))

    y_train = torch.tensor(y_train.astype(np.float32))
    y_test = torch.tensor(y_test.astype(np.float32))
    
    return x_train,x_test, y_train,y_test

In [27]:
x_train,x_test, y_train,y_test=split_train_test(x, y, random_state=0)

print(f'x train shape: {x_train.shape}')
print(f'x test shape: {x_test.shape}')
print(f'y train shape: {y_train.shape}')
print(f'y test shape: {y_test.shape}')

x train shape: torch.Size([600, 2])
x test shape: torch.Size([150, 2])
y train shape: torch.Size([600])
y test shape: torch.Size([150])


In [28]:

def batchify(x: Any,y: Any,batch_size:int, rand_state: int =None):

    x_train, x_test, y_train, y_test=split_train_test(x, y, random_state=rand_state)

    n_batches = x_train.shape[0] // batch_size # 11 / 3 = 3.66 -> 3
    n_batches_test = x_test.shape[0] // batch_size

    indexes = np.random.permutation(x_train.shape[0])
    indexes_test = np.random.permutation(x_test.shape[0])


    x_train = x_train[indexes]
    y_train = y_train[indexes]

    x_test = x_test[indexes_test]
    y_test = y_test[indexes_test]

    x_train = x_train[ :batch_size * n_batches ].reshape(n_batches, batch_size, x_train.shape[1])
    y_train = y_train[ :batch_size * n_batches ].reshape(n_batches, batch_size, 1)
    
    x_test = x_test[ :batch_size * n_batches_test ].reshape(n_batches_test, batch_size, x_test.shape[1])
    y_test = y_test[ :batch_size * n_batches_test ].reshape(n_batches_test, batch_size, 1)


    return x_train, x_test, y_train, y_test

In [29]:
import torch.nn as nn

It's time to create your model! Remember to include the new tricks you learnt (batch normalization and dropout)!

In [None]:
class Network(nn.Module):
  def __init__(self):
    super(Network,self).__init__()
    
    self.linear=nn.Linear(200,100)
    self.linear_bn=nn.BatchNorm1d(100)
    
    self.linear1=nn.Linear(100,50)
    self.linear2_bn=nn.BatchNorm1d(50)
    
    self.dropout=nn.Dropout(0.25)
    
    self.fc=nn.Linear(50,25)
    self.fc_bn=nn.BatchNorm1d(25)
    
    self.fc1=nn.Linear(25,5)


def forward(self,y):
    y=self.linear(y)
    y=torch.relu(self.linear_bn(y))
    
    y=self.linear1(y)
    y=torch.relu(self.linear1_bn(y))
    
    #y=fun.max_pool2d(y,2)
    y=self.dropout(y)
    
    #y=torch.flatten(y,1)
    
    y=self.fc(y)
    y=torch.relu(self.fc_bn(y))
    
    output=self.fc1(y)
    
    return output

Train your model and evaluate it. **Extra challenge**: try to figure out how you can tell if batch norm and dropout are effective

In [None]:
# YOUR CODE HERE