In [1]:
DATA_FILE = "./data/data_for_artathon2021.csv"
INPUT_FILE = "./data/data_for_visualization.csv"

In [2]:
import pandas as pd

cereal_df = pd.read_csv(DATA_FILE)
tissues = pd.unique(cereal_df["tissue"])

cereal_df.patient = pd.Categorical(cereal_df.patient)
cereal_df["patient"] = cereal_df.patient.cat.codes

cereal_df.disease_stage = pd.Categorical(cereal_df.disease_stage)
cereal_df["disease_stage"] = cereal_df.disease_stage.cat.codes

cereal_df.tissue = pd.Categorical(cereal_df.tissue)
cereal_df["tissue"] = cereal_df.tissue.cat.codes
 
#print(cereal_df)


In [3]:
df_min_max_scaled = pd.DataFrame()
column='mutation_position'
if(cereal_df[column].max() == cereal_df[column].min()): df_min_max_scaled[column] = [0 for x in cereal_df[column]]
else: df_min_max_scaled[column] = (cereal_df[column] - cereal_df[column].min()) / (cereal_df[column].max() - cereal_df[column].min())

column='mutation_diversity'
if(cereal_df[column].max() == cereal_df[column].min()): df_min_max_scaled[column] = [0 for x in cereal_df[column]]
else: df_min_max_scaled[column] = (cereal_df[column] - cereal_df[column].min()) / (cereal_df[column].max() - cereal_df[column].min())

column='%top_10_clones'
if(cereal_df[column].max() == cereal_df[column].min()): df_min_max_scaled[column] = [0 for x in cereal_df[column]]
else: df_min_max_scaled[column] = (cereal_df[column] - cereal_df[column].min()) / (cereal_df[column].max() - cereal_df[column].min())

column='%top_100_clones'
if(cereal_df[column].max() == cereal_df[column].min()): df_min_max_scaled[column] = [0 for x in cereal_df[column]]
else: df_min_max_scaled[column] = (cereal_df[column] - cereal_df[column].min()) / (cereal_df[column].max() - cereal_df[column].min())

column='nonfunctional'
if(cereal_df[column].max() == cereal_df[column].min()): df_min_max_scaled[column] = [0 for x in cereal_df[column]]
else: df_min_max_scaled[column] = (cereal_df[column] - cereal_df[column].min()) / (cereal_df[column].max() - cereal_df[column].min())

#print(df_min_max_scaled)

In [4]:
INPUT_FILE = "./data/data_scaled.csv"
df_min_max_scaled.to_csv(INPUT_FILE, index=False)

**Clustering**

In [5]:
# Author: Jake Vanderplas -- <vanderplas@astro.washington.edu>

print(__doc__)

from collections import OrderedDict
from functools import partial
from time import time

import matplotlib.pyplot as plt
from mpl_toolkits.mplot3d import Axes3D
from matplotlib.ticker import NullFormatter

from sklearn import manifold, datasets

# Next line to silence pyflakes. This import is needed.
Axes3D

n_points = 1000
#X, color = datasets.make_s_curve(n_points, random_state=0)
data = pd.read_csv(INPUT_FILE)
#data = df_min_max_scaled
X = pd.DataFrame()
X['mutation_position'] = data['mutation_position'].astype(float)
X['mutation_diversity'] = data['mutation_diversity'].astype(float)
X['%top_10_clones'] = data['%top_10_clones'].astype(float)
X['%top_100_clones'] = data['%top_100_clones'].astype(float)
X['nonfunctional'] = data['%top_100_clones'].astype(float)
print("shape(X) " + str(X.shape))
#print(X)



#print("Colors" + str(color))
n_neighbors = 10
n_components = 2

# Set-up manifold methods
LLE = partial(manifold.LocallyLinearEmbedding, n_neighbors, n_components, eigen_solver='auto')

methods = OrderedDict()
methods['LLE'] = LLE(method='standard')
methods['LTSA'] = LLE(method='ltsa')
methods['Hessian LLE'] = LLE(method='hessian')
methods['Modified LLE'] = LLE(method='modified')
methods['Isomap'] = manifold.Isomap(n_neighbors, n_components)
methods['MDS'] = manifold.MDS(n_components, max_iter=100, n_init=1)
methods['SE'] = manifold.SpectralEmbedding(n_components=n_components, n_neighbors=n_neighbors)
methods['t-SNE'] = manifold.TSNE(n_components=n_components, init='pca', random_state=0)


# Plot results
models = {}
for i, (label, method) in enumerate(methods.items()):
    t0 = time()
    Y = method.fit_transform(X)
    models[label] = Y
    #print(label + " =" +str(models[label]))
    

Automatically created module for IPython interactive environment
shape(X) (13, 5)




**Fill positional information**<br>
The data file is addeed with columns containing the points in an X,Y,Z axis.<br>
the columns names are in the format of position_3d_<idetifier>...<br>

In [6]:
import random

random.seed(13)
random.uniform(0, 1)

cereal_df["position_3d_disease_stage"] = 0
cereal_df["position_3d_patient"] = 0

models_df = pd.DataFrame.from_dict(models['t-SNE'])

models_df[0] = ((models_df[0] - models_df[0].min()) / (models_df[0].max() - models_df[0].min()))
models_df[1] = ((models_df[1] - models_df[1].min()) / (models_df[1].max() - models_df[1].min()))

for index, row in cereal_df.iterrows():
    x = str(models_df[0][index])       #str(random.random())
    y = str(models_df[1][index])       #str(random.random()) 
    z = row['disease_stage']
    z = (z - cereal_df['disease_stage'].min()) / (cereal_df['disease_stage'].max() - cereal_df['disease_stage'].min()+1)

    position = x + "," + y + "," + str(z)
    cereal_df.iloc[index, cereal_df.columns.get_loc('position_3d_disease_stage')] = position
    z = row['patient']
    z = (z - cereal_df['patient'].min()) / (cereal_df['patient'].max() - cereal_df['patient'].min()+1)

    position = x + "," + y + "," + str(z)
    cereal_df.iloc[index, cereal_df.columns.get_loc('position_3d_patient')] = position


**Save to the visualization input file.**

In [7]:
INPUT_VIS = "./data/data_visualization.csv"
cereal_df.to_csv(INPUT_VIS, index=False)


# Comparison of Manifold Learning methods

An illustration of dimensionality reduction on the S-curve dataset
with various manifold learning methods.

For a discussion and comparison of these algorithms, see the
`manifold module page <manifold>`

For a similar example, where the methods are applied to a
sphere dataset, see `sphx_glr_auto_examples_manifold_plot_manifold_sphere.py`

Note that the purpose of the MDS is to find a low-dimensional
representation of the data (here 2D) in which the distances respect well
the distances in the original high-dimensional space, unlike other
manifold-learning algorithms, it does not seeks an isotropic
representation of the data in the low-dimensional space.

In [8]:
# Author: Jake Vanderplas -- <vanderplas@astro.washington.edu>
"""
print(__doc__)

from collections import OrderedDict
from functools import partial
from time import time

import matplotlib.pyplot as plt
from mpl_toolkits.mplot3d import Axes3D
from matplotlib.ticker import NullFormatter

from sklearn import manifold, datasets

# Next line to silence pyflakes. This import is needed.
Axes3D

n_points = 1000
#X, color = datasets.make_s_curve(n_points, random_state=0)
data = pd.read_csv(INPUT_FILE)
X = pd.DataFrame()
X['mutation_position'] = data['mutation_position'].astype(float)
X['mutation_diversity'] = data['mutation_diversity'].astype(float)
X['%top_10_clones'] = data['%top_10_clones'].astype(float)
X['%top_100_clones'] = data['%top_100_clones'].astype(float)
X['nonfunctional'] = data['%top_100_clones'].astype(float)
print("shape(X) " + str(X.shape))
print(X)



#print("Colors" + str(color))
n_neighbors = 10
n_components = 2

# Set-up manifold methods
LLE = partial(manifold.LocallyLinearEmbedding, n_neighbors, n_components, eigen_solver='auto')

methods = OrderedDict()
methods['LLE'] = LLE(method='standard')
methods['LTSA'] = LLE(method='ltsa')
methods['Hessian LLE'] = LLE(method='hessian')
methods['Modified LLE'] = LLE(method='modified')
methods['Isomap'] = manifold.Isomap(n_neighbors, n_components)
methods['MDS'] = manifold.MDS(n_components, max_iter=100, n_init=1)
methods['SE'] = manifold.SpectralEmbedding(n_components=n_components, n_neighbors=n_neighbors)
methods['t-SNE'] = manifold.TSNE(n_components=n_components, init='pca', random_state=0)


# Plot results
models = {}
for i, (label, method) in enumerate(methods.items()):
    t0 = time()
    Y = method.fit_transform(X)
    models[label] = Y
    print(label + " =" +str(models[label]))
"""

'\nprint(__doc__)\n\nfrom collections import OrderedDict\nfrom functools import partial\nfrom time import time\n\nimport matplotlib.pyplot as plt\nfrom mpl_toolkits.mplot3d import Axes3D\nfrom matplotlib.ticker import NullFormatter\n\nfrom sklearn import manifold, datasets\n\n# Next line to silence pyflakes. This import is needed.\nAxes3D\n\nn_points = 1000\n#X, color = datasets.make_s_curve(n_points, random_state=0)\ndata = pd.read_csv(INPUT_FILE)\nX = pd.DataFrame()\nX[\'mutation_position\'] = data[\'mutation_position\'].astype(float)\nX[\'mutation_diversity\'] = data[\'mutation_diversity\'].astype(float)\nX[\'%top_10_clones\'] = data[\'%top_10_clones\'].astype(float)\nX[\'%top_100_clones\'] = data[\'%top_100_clones\'].astype(float)\nX[\'nonfunctional\'] = data[\'%top_100_clones\'].astype(float)\nprint("shape(X) " + str(X.shape))\nprint(X)\n\n\n\n#print("Colors" + str(color))\nn_neighbors = 10\nn_components = 2\n\n# Set-up manifold methods\nLLE = partial(manifold.LocallyLinearEmbeddi

# Multi dimentional data view

The multi dimentional data view is not a dimentionality reduction of data!<br>
the view enables navigation threw the data, imagin a person living in a two dimentional space, the person will never be able to see three dimentional figures but should we pull him out in the 3ed dimention he will see the two dimentional space changes while traveling in the 3ed dimention.<br>
**This view enables nevigation threw the multi dimentional data.**




## Radar view.
<p> Visualizing a multi-dimentional data using a radar graph.<br>
The radar graph visualizes a data vector where each line represents  vector cell.<br>
Each line length represents the value of the vector cell.<br>
</p>
<img src="images/Radar.png">


## Data navigation view.
<p>This view visualize a multi dimentional data using a radar graph for each point.
The radar graph display a point in a normlized vector space.
Each line length represents the value of the vector cell.</p>
<img src="images/data_view.png">


## Data navigation control
<p>The data navigation control panel provides the similarity calculation and the data view selection.
</p>
<p>The similarity calculation enables selection of the distance between the radar views.
</p>
<p>The Data view selection enables selection of the fiture used for the three dimentional positioning.
</p>
<img src="images/control_vuew.png">



