# Comparison of original CNN and new PyTorch CNN
  PyTorch CNN trains with CosAnnealingWithWarmRestarts scheduler 
  The experiment plot: 
  <pre>
                                   ┌> PT CNN ─Training─> Trained PT CNN ─────┬────> Stats 
                                   |                                |
  DATASET (70 seqs of 10 families) |            TEST DATASET (67 seqs of 10 families)                 
                                   |                                |
                                   └> Ch CNN ─Training─> Trained Ch CNN ─────┴────> Stats
  </pre>

In [1]:
import sys
import os
# appending source path 
sys.path.append(os.sep.join(os.getcwd().split(os.sep)[:-1] + ["src"]))
sys.path.append(os.sep.join(os.getcwd().split(os.sep)[:-1] + ["src", "original_cnn"]))

In [2]:
import RunNN  # our cnn
from DataProcessing import AlignmentFilePrepare  # dataset class

from torch.utils.data import DataLoader

import json
from subprocess import Popen, PIPE


# IMPORTANT CONSTANTS
BATCH_SIZE= 128
EPOCH = 25
CPU_ONLY = False

## Prepare datasets
Please, stand by

In [None]:
# training ds
cmd = [
    "python",
    "../src/original_cnn/prepareData.py",
    "-i", "../data/train.fasta",
    "-o", "../data/train",
    "-t", "3"  # adjust threads
]
with Popen(cmd) as proc:
    print("Return code is {}".format(proc.wait()))

# test ds
cmd = [
    "python",
    "../src/original_cnn/prepareData.py",
    "-i", "../data/test.fasta",
    "-o", "../data/test",
    "-t", "3"  # adjust threads
]
with Popen(cmd) as proc:
    print("Return code is {}".format(proc.wait()))
    

## Train and test RunNN
### DAFS with alignments

In [None]:
# train model
model = RunNN.main(
    dataset="../data/train/ncRNApair_data.npy",
    label="../data/train/ncRNApair_labe.npy",
    genelabel="../data/train/genelabel.txt",
    batchsize=BATCH_SIZE,
    epoch=EPOCH,
    vpart=0.1,
    cpu=CPU_ONLY,
    structure=False,
    predictor=""  # empty, if you want to train model
)
# evaluate model
dataset = AlignmentFilePrepare("../data/test/ncRNApair_data.npy", "../data/test/ncRNApair_labe.npy", "../data/test/genelabel.txt")
dl = DataLoader(dataset, num_workers=3, shuffle=True, batch_size=BATCH_SIZE)
RunNN.predict(model, dl)
# save stats
with open("stats.json") as fin:
    dafs_pt_cnn_history = json.load(fin)
dafs_pt_cnn_results = RunNN.predict(model, dl)
# clean space
del model

### DAFS structure-only

In [None]:
# train model
model = RunNN.main(
    dataset="../data/train/ncRNApair_data.npy",
    label="../data/train/ncRNApair_labe.npy",
    genelabel="../data/train/genelabel.txt",
    batchsize=BATCH_SIZE,
    epoch=EPOCH,
    vpart=0.1,
    cpu=CPU_ONLY,
    structure=True,
    predictor=""  # empty, if you want to train model
)
# evaluate model
dataset = AlignmentFilePrepare("../data/test/ncRNApair_data.npy", "../data/test/ncRNApair_labe.npy", "../data/test/genelabel.txt", structure=True)
dl = DataLoader(dataset, num_workers=3, shuffle=True, batch_size=BATCH_SIZE)
RunNN.predict(model, dl)
# save stats
with open("stats.json") as fin:
    struct_pt_cnn_history = json.load(fin)
strcut_pt_cnn_results = RunNN.predict(model, dl)
