<a href="https://colab.research.google.com/github/jery5237hent/Deep-Learning/blob/main/HW2_RNN.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import torch   
import pandas as pd
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torchtext.legacy import data   
from torchtext.vocab import GloVe
import random
import matplotlib.pyplot as plt
torch.manual_seed(2021)
torch.cuda.manual_seed(2021)
torch.backends.cudnn.deterministic = True  
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

In [None]:
def accuracy(y_pred, y_test):
    pred = torch.argmax(y_pred, dim = 1, keepdim = True).squeeze(1)
    return (pred == y_test).sum()/len(y_test)

In [None]:
def train(data, optimizer, criterion):
    epoch_loss = 0
    epoch_acc = 0

    model.train()
    for batch in data:
        optimizer.zero_grad()
        
        text, text_len = batch.title
#         print(text_len)
        prediction = model(text, text_len)#.squeeze(1)
        batch_loss = criterion(prediction, batch.category.squeeze(1))
        batch_acc = accuracy(prediction, batch.category.squeeze(1))

        batch_loss.backward()
        nn.utils.clip_grad_norm_(model.parameters(), 0.01)
        optimizer.step()
        
        
        epoch_loss += batch_loss.item()
        epoch_acc += batch_acc.item()

    return epoch_loss / len(data), epoch_acc / len(data)

In [None]:
def predict(data, model):
    epoch_loss = 0
    epoch_acc = 0
    
    model.eval()
    ans = []
    
    with torch.no_grad():
        for batch in data:
            text, text_len = batch.title
            prediction = model(text, text_len).squeeze(1)
            pred = torch.argmax(prediction, dim = 1, keepdim = True).squeeze(1)
            ans.extend(list(map(lambda x: category.vocab.itos[x], pred)))
            
        return pd.DataFrame(ans, columns=['Category'])

In [None]:
def evaluate(data, optimizer, criterion):
    epoch_loss = 0
    epoch_acc = 0
    
    model.eval()
    
    with torch.no_grad():
        for batch in data:
            text, text_len = batch.title
            prediction = model(text, text_len).squeeze(1)

            batch_loss = criterion(prediction, batch.category.squeeze(1))
            batch_acc = accuracy(prediction, batch.category.squeeze(1))
            
            epoch_loss += batch_loss.item()
            epoch_acc += batch_acc.item()
        return epoch_loss / len(data), epoch_acc / len(data)

In [None]:
class RNN(nn.Module):
    def __init__(self, vocab_size, n_class):
        super(RNN, self).__init__()

        hidden_size = 128

        self.embedding = vocab.vectors
#         self.lstm1 = nn.LSTM(vocab.vectors.shape[1], hidden_size, 2, bidirectional = True, batch_first=True)
        
        self.gru1 = nn.GRU(vocab.vectors.shape[1], int(hidden_size/2), bidirectional=True, batch_first=True)
        
        self.w_omega = nn.Parameter(torch.Tensor(hidden_size, hidden_size))
        nn.init.uniform_(self.w_omega, -0.1, 0.1)
        self.u_omega = nn.Parameter(torch.Tensor(hidden_size, 1))
        nn.init.uniform_(self.u_omega, -0.1, 0.1)

#         self.lstm2 = nn.LSTM(hidden_size * 2, hidden_size, 2, bidirectional = True, batch_first=True)
        self.gru2 = nn.GRU(hidden_size, int(hidden_size/2), bidirectional=True, batch_first=True)
        
        self.fc = nn.Linear(hidden_size, n_class)
        self.dropout = nn.Dropout(0.1)
#         self.conv = nn.Conv1d(in_channels=vocab.vectors.shape[1]，out_channels=hidden_size,kernel_size=3, stride = 2)
        
    def forward(self, text, text_len):
        outputs = self.embedding[text]
#         outputs = self.ln1(outputs)
#         outputs, _ = self.lstm1(outputs)
        outputs = outputs.to(device)
        outputs, _ = self.gru1(outputs)
        # Attention
        u = torch.tanh(torch.matmul(outputs, self.w_omega)) #batch_size, seq_len, 2 * num_hiddens
        att = torch.matmul(u, self.u_omega) #batch_size, seq_len, 1
        att_score = F.softmax(att, dim=1) #batch_size, seq_len, 1
        scored_x = outputs * att_score #batch_size, seq_len, 2 * num_hiddens
        # Attention
        outputs = torch.sum(scored_x, dim=1) # batch_size * batch_size
        outputs = torch.unsqueeze(outputs, 1)
        outputs, _ = self.gru2(outputs)
        outputs = torch.mean(outputs, dim = 1)
        outputs = self.fc(outputs) # batch_size * 5  
        outputs = F.silu(outputs)
#         outputs= torch.tanh(outputs) # batch_size * 5

        return outputs

In [None]:
def build_dict():
    category.build_vocab(train_data)
    title.build_vocab(train_data, test_data, vectors = GloVe())#, unk_init=torch.Tensor.normal_)
    
    vocab = title.vocab
    vocab_size = len(title.vocab)
    n_class = len(category.vocab)
    
    PAD_IDX= vocab.stoi[title.pad_token]
    UNK_IDX = vocab.stoi[title.unk_token]

    vocab.vectors[UNK_IDX]=torch.zeros(vocab.vectors.shape[1])
    vocab.vectors[PAD_IDX]=torch.zeros(vocab.vectors.shape[1])
    
    return vocab_size, vocab, n_class

In [None]:
def prepare_data(train, test):
    train = data.BucketIterator(
      (train),
      sort_key = lambda x: len(x.title),
      sort = True,
      sort_within_batch=True,
      batch_size = 64,
      device = device
    )

    test = data.BucketIterator(
      (test),
      batch_size = 64,
        sort = False,
        sort_within_batch=False,
        shuffle=False,
      device = device
    )
    
    return train, test

In [None]:
# !pip install spacy-transformers
# !python -m spacy download en_core_web_trf

In [None]:
import spacy
nlp = spacy.load('en_core_web_trf')
stop_words = nlp.Defaults.stop_words

In [None]:
category = data.Field(batch_first=True, pad_token=None, unk_token=None)
title = data.Field(tokenize='spacy', tokenizer_language='en_core_web_trf', fix_length=10, #stop_words=stop_words,
                   batch_first=True, lower= True, include_lengths=True, init_token='<sos>', eos_token='<eos>')
# title = data.Field(fix_length=10, stop_words=stop_words,
#                    batch_first=True, lower= True, include_lengths=True, init_token='<sos>', eos_token='<eos>')

train_data = data.TabularDataset(
   path = 'news_data/train.csv',
   format = 'csv',
   fields = [(None, None), ('category', category), ('title', title)],
   skip_header = True
)

test_data = data.TabularDataset(
   path = 'news_data/test.csv',
   format = 'csv',
   fields = [(None, None), ('title', title)],
   skip_header = True
)

# # check an example
# print(vars(test_data[0]))

In [None]:
# best_loss = float('inf')
# train_loss = train_acc = 0
# criterion = nn.CrossEntropyLoss()

vocab_size, vocab, n_class = build_dict()
# n_class

.vector_cache/glove.840B.300d.zip: 2.18GB [07:30, 4.83MB/s]                            
100%|█████████▉| 2195724/2196017 [04:00<00:00, 10063.94it/s]

In [None]:
# train_set, valid_set = train_data.split(split_ratio=0.8, random_state=random.getstate())

In [None]:
# N_EPOCHS = 15
# model = RNN(vocab_size, n_class).to(device)
# # optimizer = optim.SGD(model.parameters(), lr=1e-4, momentum=0.9)
# # optimizer = optim.AdamW(model.parameters(), lr=1e-4, weight_decay=1e-3)

# optimizer = optim.RMSprop(model.parameters(), lr = 1e-4, weight_decay=1e-4)
# training_data, testing_data = prepare_data(train_set, valid_set)
# # scheduler = optim.lr_scheduler.ExponentialLR(optimizer, 0.1)
    
# l_train, acc_train = [], []
# l_valid, acc_valid = [], []

# for epoch in range(N_EPOCHS):

#     train_loss, train_acc = train(training_data, optimizer, criterion)
#     valid_loss, valid_acc = evaluate(testing_data, optimizer, criterion)
#     # scheduler.step()
#     if valid_loss < best_loss:
#         best_loss = valid_loss
#         best_model = model
    
#     acc_train.append(train_acc)
#     l_train.append(train_loss)

#     acc_valid.append(valid_acc)
#     l_valid.append(valid_loss)

#     print(f'Epoch: {epoch+1}')
#     print('learning rate: ', optimizer.param_groups[0]['lr'])
#     print(f'\tTrain Loss: {train_loss:.3f} | Train Acc: {train_acc*100:.2f}%')
#     print(f'\tValid Loss: {valid_loss:.3f} | Valid Acc: {valid_acc*100:.2f}%')

In [None]:
# plt.plot(l_train)
# plt.plot(l_valid)

In [None]:
# plt.plot(acc_train)
# plt.plot(acc_valid)

In [None]:
training_data, testing_data = prepare_data(train_data, test_data)
model = RNN(vocab_size, n_class).to(device)
model.load_state_dict(torch.load('RNN_9189.pt',  map_location=torch.device(device)))
# train_loss, train_acc = train(training_data, optimizer, criterion)

ans = predict(testing_data, model)
ans.insert(0, column="Id", value = ans.index.values)
ans.to_csv('0852629_submission_RNN.csv', index = False)

In [None]:
# torch.save(model.state_dict(), 'model_weight_9189.pt')