# LSTM models Notebook

In this notebook, we implement LSTM models to differentiate two different backbones.

In [15]:
%load_ext autoreload
%autoreload 2


## classic pydata stack
import os 
import numpy as np
from tqdm import tqdm
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import statsmodels.api as sm
import statsmodels.formula.api as smf

%matplotlib inline 

plt.style.use('ggplot')
plt.rcParams['figure.figsize'] = (15,7)



## torch
import torch
from torch.utils.data import Dataset
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim

## SEEDING



The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [16]:
import sys
sys.path.append('..')
from pipeline import *
from models import *


In [7]:
pipe3 = AA0066_Pipeline(num_blocks=3)
dataset3 = PolymerDataset(data_paths=["AA00400AA.npy","AA66466AA.npy"],pipeline=pipe3)

Processing AA00400AA.npy: 100%|██████████| 14162/14162 [00:45<00:00, 308.34it/s]
Processing AA66466AA.npy: 100%|██████████| 14185/14185 [01:41<00:00, 139.52it/s]


In [8]:
train_data3, test_data3 = train_test_split(dataset3)

### Leaky Relu LSTM

In [9]:
model = LeakyReluLSTM(input_dim=dataset3.num_features, output_dim=dataset3.num_classes, hidden_dim=64)
model, test_metrics = train(train_dataset = train_data3, test_dataset = test_data3, model=model, batch_size=512, num_epochs=100, lr_rate=0.01,verbose=2, log=False)

epoch=0/100, loss=0.27301207813951706, accuracy=88.34994506835938
epoch=1/100, loss=0.15561555690235562, accuracy=93.59291076660156
epoch=2/100, loss=0.13936342315541372, accuracy=94.28520965576172
epoch=3/100, loss=0.12737100041574903, accuracy=94.68647766113281
epoch=4/100, loss=0.1310902660091718, accuracy=94.69088745117188
epoch=5/100, loss=0.12467977404594421, accuracy=94.88050079345703
epoch=6/100, loss=0.12331853922870424, accuracy=94.986328125
epoch=7/100, loss=0.12011118630568186, accuracy=94.9422378540039
epoch=8/100, loss=0.11666261106729507, accuracy=95.22003936767578
epoch=9/100, loss=0.11618759665224287, accuracy=95.23326873779297
epoch=10/100, loss=0.11773521569040087, accuracy=95.05247497558594
epoch=11/100, loss=0.11831288420491748, accuracy=95.1362533569336
epoch=12/100, loss=0.11721382174226973, accuracy=95.16271209716797
epoch=13/100, loss=0.11611947235133913, accuracy=95.10538482666016
epoch=14/100, loss=0.11539284918043348, accuracy=95.24649810791016
epoch=15/100,

In [10]:
test_metrics

{'accuracy': 0.9532545422473099,
 'f1_score': 0.952053555274109,
 'precision': 0.9623262618873446,
 'recall': 0.9419978517722879,
 'confusion_matrix': array([[0.964, 0.036],
        [0.058, 0.942]])}

### Vanilla LSTM

In [17]:
model2 = VanillaLSTM(input_dim=dataset3.num_features, output_dim=dataset3.num_classes, hidden_dim=64)
model2, test_metrics2 = train(train_dataset = train_data3, test_dataset = test_data3, model=model2, batch_size=512, num_epochs=100, lr_rate=0.01,verbose=2,log=False)

epoch=0/100, loss=0.105826, accuracy=95.5993
epoch=0/100, test_accuracy=95.0609
epoch=1/100, loss=0.104416, accuracy=95.5419
epoch=1/100, test_accuracy=95.4313
epoch=2/100, loss=0.102956, accuracy=95.7668
epoch=2/100, test_accuracy=95.7135
epoch=3/100, loss=0.104752, accuracy=95.6874
epoch=3/100, test_accuracy=95.1491
epoch=4/100, loss=0.107160, accuracy=95.5904
epoch=4/100, test_accuracy=95.3960
epoch=5/100, loss=0.099907, accuracy=95.7492
epoch=5/100, test_accuracy=95.5195
epoch=6/100, loss=0.106348, accuracy=95.6257
epoch=6/100, test_accuracy=95.4489
epoch=7/100, loss=0.102913, accuracy=95.7580
epoch=7/100, test_accuracy=95.3607
epoch=8/100, loss=0.103333, accuracy=95.6213
epoch=8/100, test_accuracy=95.4666
epoch=9/100, loss=0.102769, accuracy=95.6301
epoch=9/100, test_accuracy=95.5195
epoch=10/100, loss=0.102964, accuracy=95.5948
epoch=10/100, test_accuracy=95.5019
epoch=11/100, loss=0.102976, accuracy=95.7404
epoch=11/100, test_accuracy=95.4842
epoch=12/100, loss=0.102631, accurac