In [13]:
# Imports

import rdkit
import openbabel
import matplotlib.pyplot as plt
import matplotlib

# PyTorch Lightning
import pytorch_lightning as pl
import seaborn as sns

# PyTorch
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
import torch.utils.data as data

from tqdm.notebook import tqdm
from IPython.display import set_matplotlib_formats
from pytorch_lightning.callbacks import ModelCheckpoint

# Import GPU-related things
if (torch.cuda.is_available()):
    import cupy as np
    import cudf as pd
    
    # Ensure that all operations are deterministic on GPU (if used) for reproducibility
    torch.backends.cudnn.determinstic = True
    torch.backends.cudnn.benchmark = False
else:
    import numpy as np
    import pandas as pd
    
device = torch.device("cuda:0") if torch.cuda.is_available() else torch.device("cpu")

# Plotting
plt.set_cmap("cividis")
%matplotlib inline
set_matplotlib_formats("svg", "pdf")  # For export
matplotlib.rcParams["lines.linewidth"] = 2.0
sns.reset_orig()

# Path to the folder where the datasets are/should be downloaded (e.g. CIFAR10)
DATASET_PATH = os.environ.get("PATH_DATASETS", "data/")
# Path to the folder where the pretrained models are saved
CHECKPOINT_PATH = os.environ.get("PATH_CHECKPOINT", "saved_models/")

# Setting the seed
pl.seed_everything(42)

print('CUDA:', torch.cuda.is_available())
print("Device:", device)

  set_matplotlib_formats("svg", "pdf")  # For export
Global seed set to 42


CUDA: False
Device: cpu


In [None]:
files = [
    'edrug3d.sdf',
    'qm9-1.sdf',
    'qm9-2.sdf',
    'qm9-3.sdf',
    'qm9-4.sdf',
    'qm9-5.sdf',
    'qm9-6.sdf',
    'qm9-7.sdf',
    'qm9-8.sdf'
]


def check_missing_files():
    """Checks for missing files. Returns true, if all files are present."""
    for file in files:
        if not os.path.exists('./data/' + file):
            return False

    return True

if not check_missing_files():
    !wget -nc -O data.zip "https://hochschulebonnrheinsieg-my.sharepoint.com/:u:/g/personal/nico_piel_365h-brs_de1/ESuGOTn_IflEk7I5HkOFpbwBZKeOk9Qf2nL5JEcq2om6_Q?e=sHYsTk&download=1"
    !unzip -u data.zip
    !rm data.zip

In [16]:
class CustomDataset(data.Dataset):
    def __init__(self, path):
        super().__init__()
        self.data = pd.read_csv(path)

    def __len__(self):
        return self.data.shape[0]

    def __getitem__(self, idx):
        series = self.data.iloc[idx]
        return series[0], series[1]

In [21]:
dataset = CustomDataset('./data/edrug3d.csv')
dataloader = data.DataLoader(dataset, batch_size=64, shuffle=True)

In [23]:
class DataModule(pl.LightningDataModule):
    def __init__(self):
        self.train_dataset = CustomDataset('./data/edrug3d.csv')
        self.val_dataset_1 = CustomDataset('./data/edrug3d.csv')
        self.val_dataset_2 = CustomDataset('./data/edrug3d.csv')
        self.test_dataset = CustomDataset('./data/edrug3d.csv')
        self.predict_dataset = CustomDataset('./data/edrug3d.csv')

    def train_dataloader(self):
        return data.DataLoader(self.train_dataset, batch_size=64, shuffle=True)

    def val_dataloader(self):
        return data.DataLoader(self.val_dataset_1)

    def test_dataloader(self):
        return data.DataLoader(self.test_dataset)

    def predict_dataloader(self):
        return data.DataLoader(self.predict_dataset)