# Neural-based classification rule learning for sequential data

## Install and Import used packages

In [None]:
pip install -r requirements.txt

In [None]:
from src.run import train, test, test_rule

In [None]:
path_checkpoint = "peptides_checkpoint.pt"

## Parameters

### Data

In [None]:
pip install requests

In [None]:
import pandas as pd
import requests
from io import BytesIO
from zipfile import ZipFile
import os

if not os.path.exists("ACPs_Breast_cancer.csv"):
    url = 'https://archive.ics.uci.edu/ml/machine-learning-databases/00589/Anticancer_Peptides.zip'
    r = requests.get(url)
    z = ZipFile(BytesIO(r.content))
    df = pd.read_csv(z.open("ACPs_Breast_cancer.csv"))
    df.to_csv("ACPs_Breast_cancer.csv")
else:
    df = pd.read_csv("ACPs_Breast_cancer.csv")
df

In [None]:
batch_size=100
val_size=0.2
test_size=0.2

In [None]:
from src.data.datasets import UCIAnticancerDataset
from src.data.util import construct_data

dataset = UCIAnticancerDataset(df)
train_loader, val_loader, test_loader = construct_data(dataset, batch_size=batch_size, val_size=val_size, test_size=test_size, seed=42)

### Model

In [None]:
input_size = len(dataset.features_name)
window_size = 3
base_model_hidden_size = window_size * 2
base_or_output_size=1
output_size=1

pad_border = True

max_sequence_length = max(
    train_loader.batch_sampler.max_length,
    val_loader.batch_sampler.max_length,
    test_loader.batch_sampler.max_length
)

# Compute dimension of convOR input size
conv_dim_out = max_sequence_length - (window_size - 1)
if pad_border:
    conv_dim_out = conv_dim_out + (window_size - 1) * 2

### Training

In [None]:
from src.sparsify import Pruning

pruning = Pruning()
pruning30 = Pruning(start=30)
no_pruning = False

In [None]:
pruning_strategy = pruning # or no_pruning or pruning30
epochs = 200
# Other parameters are fixed directly in train() function

## Run Local model

In [None]:
from src.models import LocalModel

for i in range(10):
    local_model = LocalModel(input_size, window_size, pad_border, max_sequence_length, base_model_hidden_size, conv_dim_out, base_or_output_size, output_size)
    train(local_model, train_loader, val_loader, pruning=pruning_strategy, epochs=epochs, path_checkpoint=path_checkpoint, verbose=True)
    rule = local_model.extract_rule(features_names=dataset.features_name, verbose=False)
    test(local_model.model, test_loader)
    test_rule(rule, test_loader)
    print('-----')

## Run Global Model

In [None]:
from src.models import GlobalModel

for i in range(10):
    global_model = GlobalModel(input_size, window_size, pad_border, max_sequence_length, base_model_hidden_size, conv_dim_out, base_or_output_size, output_size)
    train(global_model, train_loader, val_loader, pruning=pruning_strategy, epochs=epochs, path_checkpoint=path_checkpoint, verbose=True)
    rule = global_model.extract_rule(features_names=dataset.features_name, verbose=False)
    test(global_model.model, test_loader)
    test_rule(rule, test_loader)
    print('-----')