In [8]:
%load_ext autoreload
%autoreload 2


## classic pydata stack
import os 
import numpy as np
from tqdm import tqdm
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import statsmodels.api as sm
import statsmodels.formula.api as smf

%matplotlib inline 

plt.style.use('ggplot')
plt.rcParams['figure.figsize'] = (15,7)



## torch
import torch
from torch.utils.data import Dataset
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim

## SEEDING



REBUILD_DATA = True

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [9]:
import sys

import vanilla_NN as NN

sys.path.append('..')
from pipeline import *
from models import *


In [3]:
num_blocks = 3
pipe = AA0066_Pipeline(num_blocks=3)
dataset = PolymerDataset(data_paths=["AA00400AA.npy","AA66466AA.npy"],pipeline=pipe)

Processing AA00400AA.npy: 100%|██████████| 14162/14162 [00:41<00:00, 344.04it/s]
Processing AA66466AA.npy: 100%|██████████| 14185/14185 [01:37<00:00, 145.21it/s]


In [4]:
train_data, test_data = train_test_split(dataset)


In [5]:
model = VanillaLSTM(input_dim=dataset.num_features, output_dim=dataset.num_classes)

In [6]:
model = train(train_data, model, batch_size=512, num_epochs=20, lr_rate=0.01)

epoch=0/20, loss=0.2741706689198812, accuracy=88.25733947753906
epoch=1/20, loss=0.15626064240932463, accuracy=93.5179443359375
epoch=2/20, loss=0.1443950868315167, accuracy=93.99858856201172
epoch=3/20, loss=0.12777058515283796, accuracy=94.78790283203125
epoch=4/20, loss=0.12934217817253538, accuracy=94.6556167602539
epoch=5/20, loss=0.12374301999807358, accuracy=94.8364028930664
epoch=6/20, loss=0.12511229945553673, accuracy=94.86727142333984
epoch=7/20, loss=0.12219851927624809, accuracy=94.88050079345703
epoch=8/20, loss=0.12046502182881037, accuracy=94.93782806396484
epoch=9/20, loss=0.1193733486864302, accuracy=95.11420440673828
epoch=10/20, loss=0.11511428455511728, accuracy=95.23767852783203
epoch=11/20, loss=0.12255211687750286, accuracy=94.93341827392578
epoch=12/20, loss=0.11374554948674308, accuracy=95.29940795898438
epoch=13/20, loss=0.11606910427411397, accuracy=95.18917083740234
epoch=14/20, loss=0.11330001817809211, accuracy=95.28617858886719
epoch=15/20, loss=0.113808

In [10]:
test(test_data,model=model)

Accuracy: 94.99%
F1 Score: 94.93%
Precision: 94.63%
Recall: 95.24%


Lists of runs:

With SGD:

model = NN.LSTM.train(dataset=train_data, num_features=num_features, num_blocks=num_blocks, hidden_dim=4, num_epochs=100, batch_size=64, lr=0.05, verbose='vv') 

92.62%

model = NN.LSTM.train(dataset=train_data, num_features=num_features, num_blocks=num_blocks, hidden_dim=6, num_epochs=100, batch_size=64, lr=0.05, verbose='vv') 

93.318% (increasing hidden_dim from 4 to 6)


With Adam:

model = NN.LSTM.train(dataset=train_data, num_features=num_features, num_blocks=num_blocks, hidden_dim=4, num_epochs=100, batch_size=64, lr=0.05, verbose='vv') 

- Accuracy: 91.81%
- F1 Score: 91.82%
- Precision: 91.30%
- Recall: 92.34%

model = NN.LSTM.train(dataset=train_data, num_features=num_features, num_blocks=num_blocks, hidden_dim=9, num_epochs=100, batch_size=64, lr=0.001, verbose='vv')

(changed hidden_dim from 4 to 6)


- Accuracy: 92.23%
- F1 Score: 92.35%
- Precision: 90.53%
- Recall: 94.25%


model = NN.LSTM.train(dataset=train_data, num_features=num_features, num_blocks=num_blocks, hidden_dim=6, num_epochs=100, batch_size=64, lr=0.001, verbose='vv')

(changed hidden_dim from 4 to 9)


- Accuracy: 92.48%
- F1 Score: 92.56%
- Precision: 91.22%
- Recall: 93.93%


model = NN.LSTM.train(dataset=train_data, num_features=num_features, num_blocks=num_blocks, hidden_dim=12, num_epochs=300, batch_size=64, lr=0.001, verbose='vv')

(changed hidden_dim from 4 to 12 and num_epochs from 100 to 300)

- Accuracy: 92.16%
- F1 Score: 92.21%
- Precision: 91.25%
- Recall: 93.19%


