## Implementing the network

In [2]:
%matplotlib inline
%config InlineBackend.figure_format = 'retina'

import numpy as np
import torch
import matplotlib.pyplot as plt
from torchvision import datasets, transforms
from torch import nn
from torch import optim
import torch.nn.functional as F


### Define genaric model

In [3]:
class Network(nn.Module):
    def __init__(self, input_size, LSTM_hidden_size, LTSM_num_layers, hidden_layers,
                 LSTM_dropout, batch_first=True, output_size=1, drop_p=0.5):
        
        super().__init__()
        self.lstm = nn.LSTM(input_size = input_size,
                            hidden_size = LSTM_hidden_size,
                            num_layers = LTSM_num_layers,
                            batch_first = batch_first,
                            dropout = LSTM_dropout,
                            bidirectional=True)
        
        
        # Add the first layer, input to a hidden layer
        self.hidden_layers = nn.ModuleList([nn.Linear(LSTM_hidden_size, hidden_layers[0])])
        
        # Add a variable number of more hidden layers
        layer_sizes = zip(hidden_layers[:-1], hidden_layers[1:])
        self.hidden_layers.extend([nn.Linear(h1, h2) for h1, h2 in layer_sizes])
        
        self.output = nn.Linear(hidden_layers[-1], output_size)
        
        self.dropout = nn.Dropout(p=drop_p)
        
    def forward(self, x):
        ''' Forward pass through the network, returns the output logits '''
        # 
        x = F.tanh(self.lstm(x))

        # Forward through each layer in `hidden_layers`, with ReLU activation and dropout
        for linear in self.hidden_layers:
            x = F.relu(linear(x))
            x = self.dropout(x)
        
        x = self.output(x)
        
        return F.log_softmax(x, dim=1)
    

### Try different model

In [6]:
'''
input_size, LSTM_hidden_size, LTSM_num_layers, hidden_layers,
LSTM_dropout, batch_first=True, output_size=1, drop_p=0.5)
'''
model_1 = Network(256, 256, 2, [256,150,50],
               0.1, True, 1, 0.2)

model_2 = Network(256, 256, 1, [256,150,50],
               0.1, True, 1, 0.2)

model_3 = Network(256, 256, 1, [256,50],
               0.1, True, 1, 0.2)

model_4 = Network(256, 100, 1, [100,50,25],
               0.1, True, 1, 0.2)

model_5 = Network(256, 100, 2, [100,50,25],
               0.1, True, 1, 0.2)

model_6 = Network(256, 200, 2, [100,50,25],
               0.1, True, 1, 0.2)

model_7 = Network(256, 200, 2, [100,50,25],
               0.9, True, 1, 0.9)

model_8 = Network(256, 256, 3, [100,50,25],
               0.9, True, 1, 0.9)

model_9 = Network(256, 256, 4, [100,50,25],
               0.9, True, 1, 0.9)

model_10 = Network(256, 256, 5, [100,50,25],
               0.9, True, 1, 0.9)

print("Trainable parameters for model_1 is ", sum(p.numel() for p in model_1.parameters() if p.requires_grad))
print("Trainable parameters for model_2 is ", sum(p.numel() for p in model_2.parameters() if p.requires_grad))
print("Trainable parameters for model_3 is ", sum(p.numel() for p in model_3.parameters() if p.requires_grad))
print("Trainable parameters for model_4 is ", sum(p.numel() for p in model_4.parameters() if p.requires_grad))
print("Trainable parameters for model_5 is ", sum(p.numel() for p in model_5.parameters() if p.requires_grad))
print("Trainable parameters for model_6 is ", sum(p.numel() for p in model_6.parameters() if p.requires_grad))
print("Trainable parameters for model_7 is ", sum(p.numel() for p in model_7.parameters() if p.requires_grad))
print("Trainable parameters for model_8 is ", sum(p.numel() for p in model_8.parameters() if p.requires_grad))
print("Trainable parameters for model_9 is ", sum(p.numel() for p in model_9.parameters() if p.requires_grad))
print("Trainable parameters for model_10 is ", sum(p.numel() for p in model_10.parameters() if p.requires_grad))

  "num_layers={}".format(dropout, num_layers))


Trainable parameters for model_1 is  2741575
Trainable parameters for model_2 is  1164615
Trainable parameters for model_3 is  1131365
Trainable parameters for model_4 is  302851
Trainable parameters for model_5 is  544451
Trainable parameters for model_6 is  1722451
Trainable parameters for model_7 is  1722451
Trainable parameters for model_8 is  4238643
Trainable parameters for model_9 is  5815603
Trainable parameters for model_10 is  7392563


### Hyperparameter

In [7]:
learning_rate=0.1
input_size= 256
LSTM_hidden_size= 256
LTSM_num_layers= 2
hidden_layers= [256,150,50]
LSTM_dropout= 0.1
batch_first=True
output_size=1
drop_p=0.5

### Instantiate Model

In [8]:
model = Network(input_size, LSTM_hidden_size, LTSM_num_layers, hidden_layers, LSTM_dropout,
                batch_first, output_size, drop_p)

### Load data

### Try forward 

### Optimizer and criterion

In [None]:
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=learning_rate)

### Train function 

In [None]:
def train(model, trainloader, testloader, criterion, optimizer, epochs=5, print_every=40):
    
    steps = 0
    running_loss = 0
    for e in range(epochs):
        # Model in training mode, dropout is on
        model.train()
        for tweet, result in trainloader:
            steps += 1
            
    
            optimizer.zero_grad()
            
            output = model.forward(tweet)
            loss = criterion(output, results)
            loss.backward()
            optimizer.step()
            
            running_loss += loss.item()

            if steps % print_every == 0:
                # Model in inference mode, dropout is off
                model.eval()
                
                # Turn off gradients for validation, will speed up inference
                with torch.no_grad():
                    test_loss, accuracy = validation(model, testloader, criterion)
                
                print("Epoch: {}/{}.. ".format(e+1, epochs),
                      "Training Loss: {:.3f}.. ".format(running_loss/print_every),
                      "Test Loss: {:.3f}.. ".format(test_loss/len(testloader)),
                      "Test Accuracy: {:.3f}".format(accuracy/len(testloader)))
                
                running_loss = 0
                
                # Make sure dropout and grads are on for training
                model.train()

### save model

In [None]:
torch.save(model.state_dict(), 'checkpoint.pth')