# Model

In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import networkx as nx

from math import sqrt

from rdkit import Chem
from rdkit.Chem import AllChem, Draw, PandasTools, Descriptors
from rdkit.Chem.rdmolops import GetAdjacencyMatrix
from rdkit.Chem.Draw import IPythonConsole

from tqdm import tqdm

import torch
import torch.nn.functional as F
from torch.utils.data import random_split

from torch_geometric.data import Data, InMemoryDataset, download_url, extract_gz
from torch_geometric.loader import DataLoader
from torch_geometric.utils import to_networkx, from_smiles
from torch_geometric.nn import AttentiveFP

from sklearn.metrics import r2_score
from sklearn.model_selection import train_test_split

import os
import random
from collections import Counter

from in_memory_dataset import MyOwnDataset

In [2]:
def global_seed(seed=2):
    random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    os.environ['PYTHONHASHSEED'] = str(seed)
    np.random.seed(seed)

### Loading Data

In [3]:
df_final = pd.read_csv('Data/Lipophilicity.csv')

lipo_ds = MyOwnDataset(root='Data', dataset=df_final)
lipo_ds

MyOwnDataset(4192)

In [7]:
# Train test split
train_size = int(len(lipo_ds)*0.8)
test_size = len(lipo_ds) - train_size
lengths = [train_size, test_size]
train_dataset, test_dataset = random_split(lipo_ds, lengths)

# Data Loader
train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=32)

### Model

**AttentiveFP model**
- Pytorch Geometrical: https://pytorch-geometric.readthedocs.io/en/latest/generated/torch_geometric.nn.models.AttentiveFP.html
- Research paper: https://pubs.acs.org/doi/10.1021/acs.jmedchem.9b00959https://pubs.acs.org/doi/10.1021/acs.jmedchem.9b00959

In [None]:
model = AttentiveFP()