In [1]:
import pandas as pd 
import numpy as np 
from math import log10

from sklearn.model_selection import train_test_split
from sklearn.metrics import roc_auc_score, accuracy_score, precision_score, recall_score, f1_score, r2_score
from sklearn.neighbors import NearestNeighbors

from sklearn.neural_network import MLPClassifier
import xgboost
import shap

from matplotlib.colors import ListedColormap, LinearSegmentedColormap
import matplotlib.pyplot as plt
plt.rcParams['font.sans-serif'] = "Menlo"
plt.rcParams['font.family'] = "sans-serif"
plt.rcParams.update({'font.size': 10})

import tmap as tm
from faerun import Faerun

Using `tqdm.autonotebook.tqdm` in notebook mode. Use `tqdm.tqdm` instead to force console mode (e.g. in jupyter console)


### Import data

Import data

In [2]:
df = pd.read_excel('data/20240517_Liposome_data.xlsx', engine='openpyxl')

Normalize size data

In [3]:
df['log_size'] = df['Size'].apply(lambda x: log10(x))

Change CHIP to categorical data

In [4]:
df['CHIP'] = df['CHIP'].astype('category')
df['CHIPID'] = df['CHIP'].cat.codes

#### Featurize

Generate a feature vector for each datapoint

In [9]:
def feature_vector(row):
    return np.array([row['CHIPID'], row['Lipid n%'], row['CHOL %'], row['DSPE-PEG %'], row['TFR'], row['PDI'], row['Chain length 1'], row['Unsatturation chain 1'], row['Chain length 2'], row['Unsatturation chain 2'], row['FRR']])

Calculate

In [10]:
df['feature_vector'] = df.apply(feature_vector, axis=1)
features_np = np.array(df['feature_vector'].tolist())

#### TMAP

Calculate TMAP layout

In [11]:
knn = 500
knn_search = NearestNeighbors(n_neighbors=knn, radius=1.0, algorithm='auto', leaf_size=30, metric='euclidean', p=2, metric_params=None, n_jobs=None)
knn_search.fit(features_np)

edge_list = []
for i in range(len(features_np)):
    dists, idxs = knn_search.kneighbors(features_np[i].reshape(1, -1))
    for j in range(knn):
        edge_list.append([i, idxs[0, j], dists[0, j]])


cfg = tm.LayoutConfiguration()
cfg.node_size = 1 / 15
cfg.mmm_repeats = 2
cfg.sl_extra_scaling_steps = 5
cfg.k = 15
cfg.sl_scaling_type = tm.RelativeToAvgLength

x_, y_, s, t, gp = tm.layout_from_edge_list(len(features_np), edge_list, cfg)
tm_layout_mxfp = {'x': list(x_), 'y': list(y_), 's': list(s), 't': list(t)}

In [12]:
pop_labels, pop_data = Faerun.create_categories(df['Population'])
frr_labels, frr_data = Faerun.create_categories(df['FRR'])

cmap_discrete = ListedColormap(['#0481FA', '#FC0453'])
cmap_continuous = LinearSegmentedColormap.from_list('custom', ['#0481FA', '#FC0453'], N=256)

In [13]:
labels = []

for i, row in df.iterrows():
    labels.append(
            row['SMILES ']
            + '__'
            + '<small style="color:grey;">DSPE-PEG %</small>'
            + '__'
            + f'{row["DSPE-PEG %"]}'
            + '__'
            + '<small style="color:grey;">TFR</small>'
            + '__'
            + f'{row["TFR"]}'
            + '__'
            + '<small style="color:grey;">FRR</small>'
            + '__'
            + f'{row["FRR"]}'
            + '__'
            + '<small style="color:grey;">CHIP</small>'
            + '__'
            + f'{row["CHIP"]}'
            + '__'
            + '<small style="color:grey;">CHOL %</small>'
            + '__'
            + f'{row["CHOL %"]}'
        )

In [14]:
f = Faerun(
    view="front", 
    coords=False,
    title="",
    clear_color="#FFFFFF"
)

f.add_scatter(
    "Liposomes_TMAP",
    {
        "x": tm.VectorFloat(tm_layout_mxfp['x']),
        "y": tm.VectorFloat(tm_layout_mxfp['y']),
        "c": [pop_data,
                df['log_size'].values.tolist(),
                df['FRR'].values.tolist(),
            ],
        "labels": labels,
    },
    shader="smoothCircle",
    point_scale=10,
    max_point_size=20,
    legend_labels=[pop_labels, None, None],
    categorical=[True, False, False],
    colormap=[cmap_discrete, cmap_continuous, cmap_continuous],
    series_title=['Populations', 'log10 Size', 'FRR'],
    has_legend=True,
)

f.thumbnail_width = 250
f.add_tree("Liposomes_TMAP_tree", {"from": tm.VectorUint(tm_layout_mxfp['s']), "to": tm.VectorUint(tm_layout_mxfp['t'])}, point_helper="Liposomes_TMAP")
f.plot('liposomes_tmap', template='smiles')