In [6]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import TensorDataset, DataLoader
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
import numpy as np
import pandas as pd

In [7]:
canonical_file="../dataset/canonical_trainset.csv"
canonical_smiles_df=pd.read_csv(canonical_file)

In [8]:
smiles_1d = canonical_smiles_df['SMILES'][:50].tolist()
labels_1d = canonical_smiles_df['Label'][:50].apply(lambda x: 1 if x == 'Positive' else 0).tolist()

In [9]:
import os
import sys
import nbimporter

project_root = os.path.join(os.getcwd(), '..')
sys.path.append(project_root)

from Feature_extract.Feature_extract_1D import *
from datapreparation import Data_Processing_2D as nb2
from Feature_extract.Feature_extract_2D import *
from datapreparation import Data_Processing_3D as nb3
from Feature_extract.Feature_extract_3D import *
from Feature_extract.Feature_extract_2D import get_torch_graph_data_list
from torch_geometric.loader import DataLoader as GeometricDataLoader
from torch_geometric.data import Data

In [10]:
model_1d = RNNModel(21, 64, 256, 1)
model_2d = GCN(12)
model_3d = Simple3DCNN()

model_1d.load_state_dict(torch.load('../Feature_extract/best1_model.pth',map_location=torch.device('cpu')))
model_2d.load_state_dict(torch.load('../Feature_extract/best2_model.pth',map_location=torch.device('cpu')))
model_3d.load_state_dict(torch.load('../Feature_extract/best3_model.pth',map_location=torch.device('cpu')))


<All keys matched successfully>

In [11]:
# 1D
tokenizer_1d = create_vocab(smiles_1d)
all_dataset_1d = SMILESDataset(smiles_1d, labels_1d, tokenizer_1d)
all_data_loader_1d = DataLoader(all_dataset_1d, batch_size=4, shuffle=False, collate_fn=collate_fn)
all_features_1d, all_labels_1d = extract_features_1d(model_1d, all_data_loader_1d)

In [12]:
# 2D
graph_data_2d = nb2.preprocess_smiles_with_labels(canonical_smiles_df["SMILES"][:50].values, canonical_smiles_df["Label"][:50].values)
torch_graph_data_list = get_torch_graph_data_list(graph_data_2d)

all_loader_2d = GeometricDataLoader(torch_graph_data_list, batch_size=4, shuffle=False)
all_features_2d, all_labels_2d = extract_features_2d(model_2d, all_loader_2d)

In [13]:
# 3D
graph_data_3d = nb3.preprocess_smiles_with_labels_3d(canonical_smiles_df["SMILES"][:50].values, canonical_smiles_df["Label"][:50].values)
voxels_3d = np.array([item['voxels'] for item in graph_data_3d])  
labels_3d = np.array([item['label'] for item in graph_data_3d])  
voxels_3d = torch.tensor(voxels_3d, dtype=torch.float).unsqueeze(1)
labels_3d = torch.tensor(labels_3d, dtype=torch.long)
all_loader_3d = DataLoader(TensorDataset(voxels_3d, labels_3d), batch_size=4, shuffle=False)
all_features_3d, all_labels_3d = extract_features_3d(model_3d, all_loader_3d)

In [14]:
print(all_features_1d.shape, all_labels_1d.shape)
print(all_features_2d.shape, all_labels_2d.shape)
print(all_features_3d.shape, all_labels_3d.shape)

(50, 256) (50, 1)
(50, 32) (50,)
(50, 128) (50,)


In [23]:
fused_features = np.concatenate([all_features_1d, all_features_2d, all_features_3d], axis=1)
labels = all_labels_1d.ravel()

scaler = StandardScaler()
fused_features = scaler.fit_transform(fused_features)

features_train, features_test, labels_train, labels_test = train_test_split(fused_features, labels, test_size=0.2, random_state=42)

features_train = torch.tensor(features_train, dtype=torch.float32)
features_test = torch.tensor(features_test, dtype=torch.float32)
labels_train = torch.tensor(labels_train, dtype=torch.long)
labels_test = torch.tensor(labels_test, dtype=torch.long)

train_dataset = TensorDataset(features_train, labels_train)
test_dataset = TensorDataset(features_test, labels_test)
train_loader = DataLoader(train_dataset, batch_size=64, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=64, shuffle=False)

In [24]:
class MLP(nn.Module):
    def __init__(self, input_size, hidden_size, num_classes):
        super(MLP, self).__init__()
        self.fc1 = nn.Linear(input_size, hidden_size)
        self.relu = nn.ReLU()
        self.fc2 = nn.Linear(hidden_size, num_classes)
        
    def forward(self, x):
        out = self.fc1(x)
        out = self.relu(out)
        out = self.fc2(out)
        return out

In [25]:
fused_dim = fused_features.shape[1]
model = MLP(input_size=fused_dim, hidden_size=100, num_classes=2)
optimizer = optim.Adam(model.parameters(), lr=0.001)
criterion = nn.CrossEntropyLoss()


def train(model, train_loader, optimizer, criterion):
    model.train()
    total_loss = 0
    correct = 0
    total = 0
    for data, labels in train_loader:
        optimizer.zero_grad()
        outputs = model(data)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()
        total_loss += loss.item()
        
        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()
    
    avg_loss = total_loss / len(train_loader)
    accuracy = correct / total
    return avg_loss, accuracy

def evaluate(model, test_loader, criterion):
    model.eval()
    total_loss = 0
    correct = 0
    total = 0
    with torch.no_grad():
        for data, labels in test_loader:
            outputs = model(data)
            loss = criterion(outputs, labels)
            total_loss += loss.item()
            
            _, predicted = torch.max(outputs.data, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()
    
    avg_loss = total_loss / len(test_loader)
    accuracy = correct / total
    return avg_loss, accuracy

num_epochs = 10  
for epoch in range(num_epochs):
    train_loss, train_accuracy = train(model, train_loader, optimizer, criterion)
    val_loss, val_accuracy = evaluate(model, test_loader, criterion)
    print(f'Epoch {epoch+1}: Train Loss: {train_loss:.4f}, Train Acc: {train_accuracy:.4f}, Validation Loss: {val_loss:.4f}, Validation Acc: {val_accuracy:.4f}')


Epoch 1: Train Loss: 0.7612, Train Acc: 0.1750, Validation Loss: 0.5686, Validation Acc: 1.0000
Epoch 2: Train Loss: 0.5861, Train Acc: 0.9000, Validation Loss: 0.4354, Validation Acc: 1.0000
Epoch 3: Train Loss: 0.4658, Train Acc: 0.9250, Validation Loss: 0.3423, Validation Acc: 1.0000
Epoch 4: Train Loss: 0.3817, Train Acc: 1.0000, Validation Loss: 0.2678, Validation Acc: 1.0000
Epoch 5: Train Loss: 0.3156, Train Acc: 1.0000, Validation Loss: 0.2060, Validation Acc: 1.0000
Epoch 6: Train Loss: 0.2620, Train Acc: 0.9750, Validation Loss: 0.1558, Validation Acc: 1.0000
Epoch 7: Train Loss: 0.2189, Train Acc: 0.9500, Validation Loss: 0.1167, Validation Acc: 1.0000
Epoch 8: Train Loss: 0.1854, Train Acc: 0.9500, Validation Loss: 0.0870, Validation Acc: 1.0000
Epoch 9: Train Loss: 0.1598, Train Acc: 0.9500, Validation Loss: 0.0654, Validation Acc: 1.0000
Epoch 10: Train Loss: 0.1398, Train Acc: 0.9500, Validation Loss: 0.0499, Validation Acc: 1.0000
