In [1]:
from molmap import dataset
from molmap import loadmap
import molmap

import matplotlib.pyplot as plt
from joblib import dump, load
from tqdm import tqdm
import pandas as pd
import numpy as np
tqdm.pandas(ascii=True)

# list of various types of fingerprint

In [2]:
bitsinfo = molmap.feature.fingerprint.Extraction().bitsinfo
bitsinfo

Unnamed: 0,IDs,Subtypes,colors
0,AtomPairFP0,AtomPairFP,#ff8800
1,AtomPairFP1,AtomPairFP,#ff8800
2,AtomPairFP2,AtomPairFP,#ff8800
3,AtomPairFP3,AtomPairFP,#ff8800
4,AtomPairFP4,AtomPairFP,#ff8800
...,...,...,...
12103,TorsionFP2043,TorsionFP,#ff00a0
12104,TorsionFP2044,TorsionFP,#ff00a0
12105,TorsionFP2045,TorsionFP,#ff00a0
12106,TorsionFP2046,TorsionFP,#ff00a0


In [3]:
bitsinfo.groupby('Subtypes').size()

Subtypes
AtomPairFP       2048
AvalonFP         2048
DaylightFP       2048
ECFP             2048
EstateFP           79
MACCSFP           167
PharmacoErGFP     441
PharmacoPFP       300
PubChemFP         881
TorsionFP        2048
dtype: int64

## generate your own fingerprint molmap

In [4]:
flist = bitsinfo[bitsinfo.Subtypes.isin(['MACCSFP', 
                                         'PharmacoErGFP',
                                         'PubChemFP'])].IDs.tolist()

In [5]:
mp_name = './fingerprint_part.mp'

mp = molmap.MolMap(ftype = 'fingerprint', fmap_type = 'grid', flist = flist)
mp.fit(method = 'umap')
mp.save(mp_name)

UMAP(a=None, angular_rp_forest=False, b=None, init='spectral',
   learning_rate=1.0, local_connectivity=1.0, metric='precomputed',
   metric_kwds=None, min_dist=0.1, n_components=2, n_epochs=None,
   n_neighbors=50, negative_sample_rate=5, random_state=32,
   repulsion_strength=1.0, set_op_mix_ratio=1.0, spread=1.0,
   target_metric='categorical', target_metric_kwds=None,
   target_n_neighbors=-1, target_weight=0.5, transform_queue_size=4.0,
   transform_seed=42, verbose=2)
Construct fuzzy simplicial set
Thu Nov  7 23:22:52 2019 Finding Nearest Neighbors
Thu Nov  7 23:22:52 2019 Finished Nearest Neighbor Search
Thu Nov  7 23:22:53 2019 Construct embedding
	completed  0  /  500 epochs
	completed  50  /  500 epochs
	completed  100  /  500 epochs
	completed  150  /  500 epochs
	completed  200  /  500 epochs
	completed  250  /  500 epochs
	completed  300  /  500 epochs
	completed  350  /  500 epochs
	completed  400  /  500 epochs
	completed  450  /  500 epochs
Thu Nov  7 23:22:58 2019 Fini

['./fingerprint_part.mp']

## visulization of your molmap

In [6]:
mp.plot_grid()

2019-11-07 23:23:20,916 - INFO - [bidd-molmap] - generate file: ./fingerprint_1303_cosine_umap_molmap
2019-11-07 23:23:20,975 - INFO - [bidd-molmap] - save html file to ./fingerprint_1303_cosine_umap_molmap


In [7]:
mp.plot_scatter()

2019-11-07 23:23:34,676 - INFO - [bidd-molmap] - generate file: ./fingerprint_1303_cosine_umap_scatter
2019-11-07 23:23:34,709 - INFO - [bidd-molmap] - save html file to ./fingerprint_1303_cosine_umap_scatter


In [8]:
ls -lh

total 2.7G
-rw-rw-r-- 1 shenwanxiang shenwanxiang 2.3M Nov  7 23:06 01_fingerprint_map_all.ipynb
-rw-rw-r-- 1 shenwanxiang shenwanxiang 1.3M Nov  7 23:03 02_fingerprint_map_part.ipynb
-rw-rw-r-- 1 shenwanxiang shenwanxiang 1.3M Nov  7 23:21 03_fingerprint_map_part_grid.ipynb
-rw-rw-r-- 1 shenwanxiang shenwanxiang 1.4M Nov  7 23:05 fingerprint_11750_cosine_umap_scatter.html
-rw-rw-r-- 1 shenwanxiang shenwanxiang 117K Nov  7 23:23 fingerprint_1303_cosine_umap_molmap.html
-rw-rw-r-- 1 shenwanxiang shenwanxiang 158K Nov  7 23:23 fingerprint_1303_cosine_umap_scatter.html
-rw-rw-r-- 1 shenwanxiang shenwanxiang 758K Nov  7 23:01 fingerprint_6554_cosine_umap_scatter.html
-rw-rw-r-- 1 shenwanxiang shenwanxiang 2.7G Nov  7 23:05 fingerprint_all.mp
-rw-rw-r-- 1 shenwanxiang shenwanxiang  36M Nov  7 23:23 fingerprint_part.mp
