Merge pull request #50 from ecrl/dev

Update to dependencies, readthedocs depencencies
ecrl · Apr 7, 2023 · 59dc3dc · 59dc3dc
2 parents dd33d4d + 5005c76
commit 59dc3dc
Show file tree

Hide file tree

Showing 6 changed files with 66 additions and 15 deletions.
diff --git a/.readthedocs.yaml b/.readthedocs.yaml
@@ -0,0 +1,20 @@
+# .readthedocs.yaml
+# Read the Docs configuration file
+# See https://docs.readthedocs.io/en/stable/config-file/v2.html for details
+
+# Required
+version: 2
+
+# Set the version of Python and other tools you might need
+build:
+  os: ubuntu-22.04
+  tools:
+    python: "3.11"
+
+mkdocs:
+  configuration: mkdocs.yml
+
+# Optionally declare the Python requirements required to build your docs
+python:
+   install:
+   - requirements: docs/requirements.txt
diff --git a/azure-pipelines.yml b/azure-pipelines.yml
@@ -12,7 +12,7 @@ pool:
 steps:
 - task: UsePythonVersion@0
   inputs:
-    versionSpec: '3.8'
+    versionSpec: '3.11'
     architecture: 'x64'
 
 - script: |
@@ -23,4 +23,4 @@ steps:
 - script: |
     cd tests
     python test_all.py
-  displayName: 'unittest'
+  displayName: 'Unit testing'
diff --git a/docs/requirements.txt b/docs/requirements.txt
@@ -1,2 +1,2 @@
 mkdocs-material
-mkdocstrings
+mkdocstrings-python
diff --git a/ecnet/__init__.py b/ecnet/__init__.py
@@ -1,2 +1,2 @@
 from .model import ECNet
-__version__ = '4.1.0'
+__version__ = '4.1.1'
diff --git a/ecnet/datasets/structs.py b/ecnet/datasets/structs.py
@@ -2,6 +2,7 @@
 from typing import List, Tuple, Iterable
 import torch
 from torch.utils.data import Dataset
+from sklearn.decomposition import PCA
 
 from .utils import _qspr_from_padel, _qspr_from_alvadesc,\
     _qspr_from_alvadesc_smifile
@@ -21,9 +22,9 @@ def __init__(self, smiles: List[str], target_vals: Iterable[Iterable[float]],
         """
 
         self.smiles = smiles
-        self.target_vals = torch.as_tensor(target_vals)
+        self.target_vals = torch.as_tensor(target_vals).type(torch.float32)
         self.desc_vals, self.desc_names = self.smi_to_qspr(smiles, backend)
-        self.desc_vals = torch.as_tensor(self.desc_vals)
+        self.desc_vals = torch.as_tensor(self.desc_vals).type(torch.float32)
 
     @staticmethod
     def smi_to_qspr(smiles: List[str], backend: str) -> Tuple[List[List[float]], List[str]]:
@@ -110,17 +111,17 @@ def __init__(self, smiles_fn: str, target_vals: Iterable[Iterable[float]],
         """
 
         self.smiles = self._open_smiles_file(smiles_fn)
-        self.target_vals = torch.as_tensor(target_vals)
+        self.target_vals = torch.as_tensor(target_vals).type(torch.float32)
         if backend == 'padel':
             self.desc_vals, self.desc_names = self.smi_to_qspr(
                 self.smiles, backend
             )
-            self.desc_vals = torch.as_tensor(self.desc_vals)
+            self.desc_vals = torch.as_tensor(self.desc_vals).type(torch.float32)
         elif backend == 'alvadesc':
             self.desc_vals, self.desc_names = _qspr_from_alvadesc_smifile(
                 smiles_fn
             )
-            self.desc_vals = torch.as_tensor(self.desc_vals)
+            self.desc_vals = torch.as_tensor(self.desc_vals).type(torch.float32)
 
     @staticmethod
     def _open_smiles_file(smiles_fn: str) -> List[str]:
@@ -156,5 +157,35 @@ def __init__(self, desc_vals: Iterable[Iterable[float]],
 
         self.smiles = ['' for _ in range(len(target_vals))]
         self.desc_names = ['' for _ in range(len(desc_vals[0]))]
-        self.desc_vals = torch.as_tensor(desc_vals)
-        self.target_vals = torch.as_tensor(target_vals)
+        self.desc_vals = torch.as_tensor(desc_vals).type(torch.float32)
+        self.target_vals = torch.as_tensor(target_vals).type(torch.float32)
+
+
+class PCADataset(QSPRDataset):
+
+    def __init__(self, smiles: List[str], target_vals: Iterable[Iterable[float]],
+                 backend: str = 'padel', existing_pca_dataset: 'PCADataset' = None):
+        """
+        PCADataset: creates a torch.utils.data.Dataset given supplied SMILES strings, supplied
+        target values; first generates QSPR descriptors, then transforms them via PCA; an existing
+        PCADataset can be supplied to peform PCA transformation
+
+        Args:
+            smiles (list[str]): SMILES strings
+            target_vals (Iterable[Iterable[float]]): target values of shape (n_samples, n_targets)
+            backend (str, optional): backend for QSPR generation, ['padel', 'alvadesc']
+            existing_pca_dataset (PCADataset, optional): if PCA already trained (e.g. trained
+                using training set, want to use for testing set), the pre-trained PCA can be used
+                to perform PCA for this data
+        """
+
+        self.smiles = smiles
+        self.target_vals = torch.as_tensor(target_vals).type(torch.float32)
+        self.desc_names = None
+        desc_vals, _ = self.smi_to_qspr(smiles, backend)
+        if existing_pca_dataset is None:
+            self.pca = PCA(n_components=min(desc_vals.shape[0], desc_vals.shape[1]))
+            self.pca.fit(desc_vals)
+        else:
+            self.pca = existing_pca_dataset.pca
+        self.desc_vals = torch.as_tensor(self.pca.transform(desc_vals)).type(torch.float32)
diff --git a/setup.py b/setup.py
@@ -2,17 +2,17 @@
 
 setup(
     name='ecnet',
-    version='4.1.0',
+    version='4.1.1',
     description='Fuel property prediction using QSPR descriptors',
     url='https://github.com/ecrl/ecnet',
     author='Travis Kessler',
     author_email='Travis_Kessler@student.uml.edu',
     license='MIT',
     packages=find_packages(),
     install_requires=[
-        'torch==1.8.0',
-        'sklearn',
-        'padelpy==0.1.9',
+        'torch==2.0.0',
+        'scikit-learn==1.2.2',
+        'padelpy==0.1.13',
         'alvadescpy==0.1.2',
         'ecabc==3.0.0'
     ],