In [8]:
import torch

import numpy as np
import torch
import os
import argparse

from torch.utils.data import DataLoader, Dataset, Subset

from utils import seed_torch, seed_worker
from train import create_parser
from models import *
from modules import *

In [9]:
parser = create_parser()
# Be careful to use parser.parse_args([]) instead of parser.parse_args(). Otherwise it will prompt error. 
# The issue lies in JupyterNotebook
# See answer in https://stackoverflow.com/questions/50360012/python-argparse-error-error-argument-count-invalid-int-value for more details
args_1 = parser.parse_args([])
args_1.device = torch.device("cpu")
args_2 = parser.parse_args([])
args_2.device = torch.device("cpu")
args_3 = parser.parse_args([])
args_3.device = torch.device("cpu")
args_4 = parser.parse_args([])
args_4.device = torch.device("cpu")
args_enc = parser.parse_args([])
args_enc.device = torch.device("cpu")
args_dec = parser.parse_args([])
args_dec.device = torch.device("cpu")

### 1. Module Parameters

In [42]:
args_enc.dim_hidden = 448
args_dec.dim_hidden = 448

In [5]:
enc_rnn = EncoderRNN(args_enc)
dec_rnn_LR = AttnDecoderRNN(args_dec)
dec_rnn_noLR = AttnDecoderRNN(args_dec)

num_total_enc_rnn = sum([p.numel() for p in enc_rnn.parameters()])
num_total_dec_rnn_LR = sum([p.numel() for p in dec_rnn_LR.parameters()])
num_total_dec_rnn_noLR = sum([p.numel() for p in dec_rnn_noLR.parameters()])

num_trainable_enc_rnn = sum([p.numel() for p in enc_rnn.parameters() if p.requires_grad])
num_trainable_dec_rnn_LR = sum([p.numel() for p in dec_rnn_LR.parameters() if p.requires_grad])
num_trainable_dec_rnn_noLR = sum([p.numel() for p in dec_rnn_noLR.parameters() if p.requires_grad])

In [6]:
num_total_enc_rnn, num_total_dec_rnn_LR, num_total_dec_rnn_noLR

(3906048, 3249375, 3249375)

In [7]:
num_trainable_enc_rnn, num_trainable_dec_rnn_LR, num_trainable_dec_rnn_noLR

(3906048, 3249375, 3249375)

In [43]:
enc_trans = EncoderTrans(args_enc)
dec_trans_LR = DecoderTrans(args_dec)
dec_trans_noLR = DecoderTrans(args_dec)

num_total_enc_trans = sum([p.numel() for p in enc_trans.parameters()])
num_total_dec_trans_LR = sum([p.numel() for p in dec_trans_LR.parameters()])
num_total_dec_trans_noLR = sum([p.numel() for p in dec_trans_noLR.parameters()])

num_trainable_enc_trans = sum([p.numel() for p in enc_trans.parameters() if p.requires_grad])
num_trainable_dec_trans_LR = sum([p.numel() for p in dec_trans_LR.parameters() if p.requires_grad])
num_trainable_dec_trans_noLR = sum([p.numel() for p in dec_trans_noLR.parameters() if p.requires_grad])

In [44]:
num_total_enc_trans, num_total_dec_trans_LR, num_total_dec_trans_noLR

(8591616, 10779455, 10779455)

### 2. Model Parameters

In [3]:
args_1.dim_hidden, args_2.dim_hidden, args_3.dim_hidden, args_4.dim_hidden

(256, 256, 256, 256)

In [16]:
args_1.dim_hidden = 384

In [10]:
seq2seq_no_fact = Seq2SeqNoFact(args_1)
seq2seq_fact = Seq2SeqFact(args_2)
seq2seq_naive_2enc = Seq2SeqFactNaive_2enc(args_3)
seq2seq_naive = Seq2SeqFactNaive(args_4)

In [11]:
# Initialize models
seq2seq_no_fact = Seq2SeqNoFact(args_1)
seq2seq_fact = Seq2SeqFact(args_2)
seq2seq_naive_2enc = Seq2SeqFactNaive_2enc(args_3)
seq2seq_naive = Seq2SeqFactNaive(args_4)

# Find number of parameters of each model 
num_total_seq2seq_no_fact = sum([p.numel() for p in seq2seq_no_fact.parameters()])
num_total_seq2seq_fact = sum([p.numel() for p in seq2seq_fact.parameters()])
num_total_seq2seq_naive_2enc = sum([p.numel() for p in seq2seq_naive_2enc.parameters()])
num_total_seq2seq_naive = sum([p.numel() for p in seq2seq_naive.parameters()])

# Find number of parameters of each model
num_trainable_seq2seq_no_fact = sum([p.numel() for p in seq2seq_no_fact.parameters() if p.requires_grad])
num_trainable_seq2seq_fact = sum([p.numel() for p in seq2seq_fact.parameters() if p.requires_grad])
num_trainable_seq2seq_naive_2enc = sum([p.numel() for p in seq2seq_naive_2enc.parameters() if p.requires_grad])
num_trainable_seq2seq_naive = sum([p.numel() for p in seq2seq_naive.parameters() if p.requires_grad])

In [12]:
num_total_seq2seq_no_fact, num_total_seq2seq_fact, num_total_seq2seq_naive_2enc, num_total_seq2seq_naive

(2179807, 4206237, 5372829, 6539421)

In [14]:
num_trainable_seq2seq_no_fact, num_trainable_seq2seq_fact, num_trainable_seq2seq_naive_2enc, num_trainable_seq2seq_naive

(2179807, 4206237, 5372829, 6539421)

In [13]:
# Initialize models
trans_no_fact = TransNoFact(args_1)
trans_fact = TransFact(args_2)
trans_naive = TransFactNaive(args_3)

# Find number of parameters of each model 
num_total_trans_no_fact = sum([p.numel() for p in trans_no_fact.parameters()])
num_total_trans_fact = sum([p.numel() for p in trans_fact.parameters()])
num_total_trans_naive = sum([p.numel() for p in trans_naive.parameters()])

# Find number of parameters of each model
num_trainable_trans_no_fact = sum([p.numel() for p in trans_no_fact.parameters() if p.requires_grad])
num_trainable_trans_fact = sum([p.numel() for p in trans_fact.parameters() if p.requires_grad])
num_trainable_trans_naive = sum([p.numel() for p in trans_naive.parameters() if p.requires_grad])

In [15]:
num_total_trans_no_fact, num_total_trans_fact, num_total_trans_naive

(9274367, 19178493, 27823101)