# Recurrent Neural Networks & Other Sequence Models

## Recurrent Neural Networks

In [1]:
import torch

model = torch.nn.RNN(300, 512)

### RNNs in PyTorch from Scracth

In [1]:
import fastai
fastai.__version__

'2.7.17'

In [2]:
from fastai.text.all import *

In [4]:
class RNNCell(nn.Module):    

    def __init__(self, input_size, hidden_size):
        super(RNNCell, self).__init__()
        
        self.input_size = input_size
        self.hidden_size = hidden_size
        self.ih = nn.Linear(input_size, hidden_size)
        self.hh = nn.Linear(hidden_size, hidden_size)

    def forward(self, x, h = None):
        if h is None:
            h = torch.zeros(x.size(0), self.hidden_size)
        h = torch.tanh(self.ih(x) + self.hh(h))
        return h

In [5]:
#hide
cell = RNNCell(100, 300)
cell(torch.randn(1, 100)).shape

torch.Size([1, 300])

In [6]:
class RNN(nn.Module):    

    def __init__(self, input_size, hidden_size):
        super(RNN, self).__init__()
        self.cell = RNNCell(input_size, hidden_size)

    def forward(self, x, h = None):
        
        print(x.shape)
        for i in range(x.shape[1]):
            h = self.cell(x[:,i], h)
            
        return h

In [7]:
#hide
rnn = RNN(100, 300)
rnn(torch.randn(256, 10, 100)).shape

torch.Size([256, 10, 100])


torch.Size([256, 300])

In [8]:
class TextClassifier(nn.Module):
    
    def __init__(self, vocab_size, hidden_size):
        super(TextClassifier, self).__init__()
        self.emb = nn.Embedding(vocab_size, hidden_size)
        self.rnn = RNN(hidden_size, hidden_size)
        self.fc1 = nn.Linear(hidden_size, 10)
        self.fc2 = nn.Linear(10, 1)
        
    def forward(self, x):
        
        x = self.emb(x)
        x = self.rnn(x)
        x = self.fc1(x)
        out = self.fc2(x)
        
        return out

In [3]:
device = default_device()
print(device)

mps


In [4]:
path = untar_data(URLs.IMDB)
dls = TextDataLoaders.from_folder(path, valid='test', bs=256, device=device)

In [11]:
dls.show_batch(max_n=5)

Unnamed: 0,text,category
0,"xxbos xxmaj match 1 : xxmaj tag xxmaj team xxmaj table xxmaj match xxmaj bubba xxmaj ray and xxmaj spike xxmaj dudley vs xxmaj eddie xxmaj guerrero and xxmaj chris xxmaj benoit xxmaj bubba xxmaj ray and xxmaj spike xxmaj dudley started things off with a xxmaj tag xxmaj team xxmaj table xxmaj match against xxmaj eddie xxmaj guerrero and xxmaj chris xxmaj benoit . xxmaj according to the rules of the match , both opponents have to go through tables in order to get the win . xxmaj benoit and xxmaj guerrero heated up early on by taking turns hammering first xxmaj spike and then xxmaj bubba xxmaj ray . a xxmaj german xxunk by xxmaj benoit to xxmaj bubba took the wind out of the xxmaj dudley brother . xxmaj spike tried to help his brother , but the referee restrained him while xxmaj benoit and xxmaj guerrero",pos
1,"xxbos * * attention xxmaj spoilers * * \n\n xxmaj first of all , let me say that xxmaj rob xxmaj roy is one of the best films of the 90 's . xxmaj it was an amazing achievement for all those involved , especially the acting of xxmaj liam xxmaj neeson , xxmaj jessica xxmaj lange , xxmaj john xxmaj hurt , xxmaj brian xxmaj cox , and xxmaj tim xxmaj roth . xxmaj michael xxmaj canton xxmaj jones painted a wonderful portrait of the honor and dishonor that men can represent in themselves . xxmaj but alas … \n\n it constantly , and unfairly gets compared to "" braveheart "" . xxmaj these are two entirely different films , probably only similar in the fact that they are both about xxmaj scots in historical xxmaj scotland . xxmaj yet , this comparison frequently bothers me because it seems",pos
2,"xxbos xxmaj some have praised _ xxunk _ as a xxmaj disney adventure for adults . i do n't think so -- at least not for thinking adults . \n\n xxmaj this script suggests a beginning as a live - action movie , that struck someone as the type of crap you can not sell to adults anymore . xxmaj the "" crack staff "" of many older adventure movies has been done well before , ( think _ the xxmaj dirty xxmaj dozen _ ) but _ atlantis _ represents one of the worse films in that motif . xxmaj the characters are weak . xxmaj even the background that each member trots out seems stock and awkward at best . xxmaj an xxup md / xxmaj medicine xxmaj man , a tomboy mechanic whose father always wanted sons , if we have not at least seen these before",neg
3,"xxbos xxmaj some have praised xxunk xxmaj lost xxmaj xxunk as a xxmaj disney adventure for adults . i do n't think so -- at least not for thinking adults . \n\n xxmaj this script suggests a beginning as a live - action movie , that struck someone as the type of crap you can not sell to adults anymore . xxmaj the "" crack staff "" of many older adventure movies has been done well before , ( think xxmaj the xxmaj dirty xxmaj dozen ) but xxunk represents one of the worse films in that motif . xxmaj the characters are weak . xxmaj even the background that each member trots out seems stock and awkward at best . xxmaj an xxup md / xxmaj medicine xxmaj man , a tomboy mechanic whose father always wanted sons , if we have not at least seen these before ,",neg
4,"xxbos xxmaj warning : xxmaj does contain spoilers . \n\n xxmaj open xxmaj your xxmaj eyes \n\n xxmaj if you have not seen this film and plan on doing so , just stop reading here and take my word for it . xxmaj you have to see this film . i have seen it four times so far and i still have n't made up my mind as to what exactly happened in the film . xxmaj that is all i am going to say because if you have not seen this film , then stop reading right now . \n\n xxmaj if you are still reading then i am going to pose some questions to you and maybe if anyone has any answers you can email me and let me know what you think . \n\n i remember my xxmaj grade 11 xxmaj english teacher quite well . xxmaj",pos


In [12]:
learn = Learner(dls, TextClassifier(len(dls.vocab[0]), 100),
                loss_func=CrossEntropyLossFlat(), 
                metrics=accuracy)

In [13]:
learn.fit(10)

epoch,train_loss,valid_loss,accuracy,time


torch.Size([256, 3345, 100])


RuntimeError: Placeholder storage has not been allocated on MPS device!

에러 발생 원인: 기본 device가 mps인데 커스텀 셀은 fastai가 to_device로 자동으로 처리하지 못함.
nn.RNN 사용 시 문제 해결.

In [None]:
import torch
??torch.nn.RNN

In [14]:
class TextClassifier(nn.Module):
    
    def __init__(self, vocab_size, hidden_size):
        super(TextClassifier, self).__init__()
        self.emb = nn.Embedding(vocab_size, hidden_size)
        self.rnn = nn.RNN(hidden_size, hidden_size, batch_first=True)
        self.fc1 = nn.Linear(hidden_size, 10)
        self.fc2 = nn.Linear(10, 2)
        
    def forward(self, x):
        
        x = self.emb(x)
        _, x = self.rnn(x)
        x = self.fc1(x)
        out = self.fc2(x)

        return out

In [15]:
learn = Learner(dls, TextClassifier(len(dls.vocab[0]), 100),
                loss_func=CrossEntropyLossFlat(), 
                metrics=accuracy)
learn.fit(10)

epoch,train_loss,valid_loss,accuracy,time
0,0.694249,0.695882,0.48016,00:37
1,0.692689,0.682223,0.56564,00:35
2,0.687442,0.68542,0.55256,00:33
3,0.689139,0.693504,0.5042,00:32
4,0.689101,0.678905,0.57396,00:32
5,0.677768,0.673047,0.59848,00:32
6,0.65823,0.655778,0.62596,00:33
7,0.685508,0.68868,0.53908,00:33
8,0.692532,0.690735,0.502,00:32
9,0.693399,0.691917,0.5192,00:32


### Bidirectional RNN

In [16]:
class TextClassifier(nn.Module):
    
    def __init__(self, vocab_size, hidden_size):
        super(TextClassifier, self).__init__()
        self.emb = nn.Embedding(vocab_size, hidden_size)
        self.rnn = nn.RNN(hidden_size, hidden_size,
                          bidirectional=True, batch_first=True)
        self.fc1 = nn.Linear(hidden_size * 2, 10)
        self.fc2 = nn.Linear(10, 2)
        
    def forward(self, x):
        
        x = self.emb(x)
        _, x = self.rnn(x)
        x = torch.cat((x[0], x[1]), dim=-1)
        x = self.fc1(x)
        out = self.fc2(x)

        return out

In [17]:
learn = Learner(dls, TextClassifier(len(dls.vocab[0]), 100),
                loss_func=CrossEntropyLossFlat(), 
                metrics=accuracy)
learn.fit(10)

epoch,train_loss,valid_loss,accuracy,time
0,0.690899,0.682683,0.55808,00:47
1,0.676884,0.671271,0.5908,00:48
2,0.664795,0.683342,0.5306,00:47
3,0.669165,0.667307,0.59556,00:47
4,0.649393,0.657185,0.62436,00:47
5,0.634403,0.646956,0.6362,00:48
6,0.625843,0.671031,0.59768,00:48
7,0.60739,0.662667,0.60684,00:48
8,0.586323,0.60984,0.68112,00:47
9,0.568564,0.613985,0.6882,00:48


### Sequence to Sequence Using RNNs

## Long Short Term Memory (LSTM)

In [6]:
class TextClassifier(nn.Module):
    
    def __init__(self, vocab_size, hidden_size):
        super(TextClassifier, self).__init__()
        self.emb = nn.Embedding(vocab_size, hidden_size)
        self.rnn = nn.LSTM(hidden_size, hidden_size, batch_first=True)
        self.fc1 = nn.Linear(hidden_size, 10)
        self.fc2 = nn.Linear(10, 2)
        
    def forward(self, x):
        
        x = self.emb(x)
        x, _ = self.rnn(x)[1]
        x = self.fc1(x)
        out = self.fc2(x)
        
        return out

In [7]:
learn = Learner(dls, TextClassifier(len(dls.vocab[0]), 100),
                loss_func=CrossEntropyLossFlat(), 
                metrics=accuracy)
learn.fit(10)

epoch,train_loss,valid_loss,accuracy,time
0,0.689288,0.670477,0.5854,00:25
1,0.638322,0.623733,0.65808,00:24
2,0.573043,0.538287,0.74448,00:23
3,0.491922,0.466627,0.79168,00:24
4,0.490327,0.460932,0.7952,00:23
5,0.418083,0.408355,0.8216,00:23
6,0.368535,0.381206,0.84288,00:23
7,0.395935,0.521197,0.73304,00:23
8,0.377113,0.446771,0.8202,00:24
9,0.361235,0.44548,0.81864,00:24


연속으로 3개째의 주피터 노트북 모델 학습을 실행할 경우 28gb m4 맥북에서 oom 에러 발생.
메모리 관리를 위해선 모델을 끊어서 실행할 필요가 있다.
사실 주피터 노트북 자체의 문제인 거 같긴 하다. .py로 모델 학습을 진행하면 학습 종료 후 메모리 release가 될 테니까.

## Gated Recurrent Units (GRU)

In [5]:
class TextClassifier(nn.Module):
    
    def __init__(self, vocab_size, hidden_size):
        super(TextClassifier, self).__init__()
        self.emb = nn.Embedding(vocab_size, hidden_size)
        self.rnn = nn.GRU(hidden_size, hidden_size, batch_first=True)
        self.fc1 = nn.Linear(hidden_size, 10)
        self.fc2 = nn.Linear(10, 2)
        
    def forward(self, x):
        
        x = self.emb(x)
        _, x = self.rnn(x)
        x = self.fc1(x)
        out = self.fc2(x)
        
        return out

In [6]:
learn = Learner(dls, TextClassifier(len(dls.vocab[0]), 100),
                loss_func=CrossEntropyLossFlat(), 
                metrics=accuracy)
learn.fit(10)

epoch,train_loss,valid_loss,accuracy,time
0,0.690114,0.679439,0.57752,00:51
1,0.601322,0.550634,0.72568,00:48
2,0.516466,0.544121,0.74572,00:53
3,0.432937,0.437708,0.81208,00:54
4,0.368316,0.455195,0.8118,00:54
5,0.322226,0.365492,0.85092,00:54
6,0.289722,0.340253,0.85936,00:54
7,0.23629,0.335776,0.85952,00:54
8,0.218792,0.324971,0.86556,00:54
9,0.210426,0.339435,0.864,00:55


## Conclusion: The Future of RNNs