# Polymer Properties with CNN

In [25]:
import sys, os

sys.path.append("../")

In [26]:
%load_ext autoreload
%autoreload 2
%matplotlib inline

import torch
from torch import nn
from model.models import PolymerCNN
from model.training import train, configure
from model.data import PolymerDataset
from utils.loss import MADLoss, weightedMADLoss
from utils.visualize import visualize_loss

import pandas as pd

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


### Load in the dataset

In [27]:
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
data = PolymerDataset("../data/train.csv", device)
test_set = pd.read_csv("../data/test.csv") 



### Create the model

In [28]:
tg_model = PolymerCNN(1, (32, 64, 64, 64, 128, 128), ((4, 8), (8, 16, 16, 32)), 2, (2, 4), 512)
ffv_model = PolymerCNN(1, (32, 64, 64, 64, 128, 128), ((4, 8), (8, 16, 16, 32)), 2, (2, 4), 512)
tc_model = PolymerCNN(1, (32, 64, 64, 64, 128, 128), ((4, 8), (8, 16, 16, 32)), 2, (2, 4), 512)
density_model = PolymerCNN(1, (32, 64, 64, 64, 128, 128), ((4, 8), (8, 16, 16, 32)), 2, (2, 4), 512)
rg_model = PolymerCNN(1, (32, 64, 64, 64, 128, 128), ((4, 8), (8, 16, 16, 32)), 2, (2, 4), 512)

In [32]:
# Configure the models
tg_criterion, tg_optimizer = configure(tg_model, device, MADLoss, lr=0.0001)
ffv_criterion, ffv_optimizer = configure(ffv_model, device, MADLoss, lr=0.0001)
tc_criterion, tc_optimizer = configure(tc_model, device, MADLoss, lr=0.0001)
density_criterion, density_optimizer = configure(density_model, device, MADLoss, lr=0.0001)
rg_criterion, rg_optimizer = configure(rg_model, device, MADLoss, lr=0.0001)

In [29]:
from time import perf_counter

In [None]:
models = {"tg": tg_model, "tc": tc_model, "ffv": ffv_model, "density": density_model, "rg": rg_model}
optimizers = {"tg":tg_optimizer, "tc": tc_optimizer, "ffv": ffv_optimizer, "density": density_optimizer, "rg": rg_optimizer}
criterions = {"tg": tg_criterion, "tc": tc_criterion, "ffv": ffv_criterion, "density": density_criterion, "rg": rg_criterion}

In [None]:
# Train the models
start = perf_counter()
train_history, test_history = train()
end = perf_counter()

### Evaluate models

In [None]:
visualize_loss(train_history, test_history)

### Predictions for the test set

In [30]:
# Vectorize the text
from utils.vectorize import mol_vectorize, EMBED_DIM
import numpy as np

In [33]:
X_test = torch.from_numpy(mol_vectorize(test_set["SMILES"], embed_dim=EMBED_DIM).astype(np.float32))
X_test = X_test.view(X_test.size(0), 1, -1)



In [34]:
# Predict
tg_outs = data.convert_tg(tg_model(X_test))
ffv_outs = ffv_model(X_test).cpu()
tc_outs = tc_model(X_test).cpu()
density_outs = density_model(X_test).cpu()
rg_outs = data.convert_rg(rg_model(X_test))

In [38]:
# Write the result to the csv file
preds_file = "../data/test_out.csv"
test_set["Tg"] = pd.Series(tg_outs.detach().view(-1))
test_set["Tc"] = pd.Series(tc_outs.detach().view(-1))
test_set["Rg"] = pd.Series(rg_outs.detach().view(-1))
test_set["Density"] = pd.Series(density_outs.detach().view(-1))
test_set["FFV"] = pd.Series(ffv_outs.detach().view(-1))

test_set.to_csv(preds_file)