In [None]:
import os
import sys

import numpy as np
import pandas as pd
import matplotlib as mpl
import matplotlib.pyplot as plt
import seaborn as sns

sys.path.insert(0, '..')
import ccal

%matplotlib inline
# %config InlineBackend.figure_formats = {'svg',}

# Test with real hematopoietic data

## Define states

In [None]:
h = ccal.support.read_gct('/home/cyborg/hematopoietic_cancer/result/ccle_hema_r/k9/ccle_hema_k_9_H.gct')
ks = list(range(2, 21))
n_clusterings = 50
state_labels_filepath_prefix = 'output/ccle_hema_k9_H'
state_labels_all, state_cophenetic_correlations = ccal.analyze.define_states(h, ks, n_clusterings=n_clusterings, filepath_prefix=state_labels_filepath_prefix)

## Plot state labels

In [None]:
a = np.asarray(state_labels_all, dtype=float)
a.sort()

figure = plt.figure(figsize=(16, 10))
sns.heatmap(pd.DataFrame(a, index=state_labels_all.index), cmap=mpl.cm.Paired, xticklabels=False)

## Make Onco-GPS

In [None]:
n_state = 12
state_labels = state_labels_all.ix[n_state, :]
ccal.onco_gps.make_map(h, state_labels)

# Test with random data

## Make Onco-GPS

In [None]:
ccal.support.VERBOSE = True

h = ccal.support.read_gct('/home/cyborg/hematopoietic_cancer/result/ccle_hema_r/k9/ccle_hema_k_9_H.gct')
ks = list(range(2, 21))

for n_components in [3, 4, 6, 9]:
    print('n_components:', n_components)
    for n_state in ks[::5]:
        print('n_state:', n_state)
        ccal.onco_gps.make_map(h.iloc[:n_components, :], np.random.choice(list(range(1, n_state + 1)), h.shape[1]))
        ccal.onco_gps.make_map(h.iloc[:n_components, :], np.random.choice(list(range(1, n_state + 1)), h.shape[1]), n_pullratio_components=2)
        ccal.onco_gps.make_map(h.iloc[:n_components, :], np.random.choice(list(range(1, n_state + 1)), h.shape[1]), colors=['#E52339',
                                                                                                                            '#4682B4',
                                                                                                                            '#BAFF51',
                                                                                                                            '#614C82',
                                                                                                                            '#FF8C00',
                                                                                                                            '#FFFF00',
                                                                                                                            '#DB7093',
                                                                                                                            '#B0E0E6',
                                                                                                                            '#09DCFF',
                                                                                                                            '#5F9EA0',
                                                                                                                            '#008000',
                                                                                                                            '#A479E7',
                                                                                                                            '#FFC0CB',
                                                                                                                            '#FFFFCC',
                                                                                                                            '#EF97EA',
                                                                                                                            '#DFCBFF',
                                                                                                                            '#FF00FF',
                                                                                                                            '#FB5200',
                                                                                                                            '#FFD700',
                                                                                                                            '#FF0000',
                                                                                                                            '#FF7F50',
                                                                                                                            '#8B0000',
                                                                                                                            '#6B8E23',
                                                                                                                            '#5F51FF',
                                                                                                                            '#B27E3C',
                                                                                                                            '#644824',
                                                                                                                            '#A0522D'])
        for annotation_type in ['continuous', 'categorical', 'binary']:
            print('annotation_type:', annotation_type)
            if annotation_type == 'continuous':
                annotations = np.random.random_sample(h.shape[1])
            elif annotation_type == 'categorical':
                annotations = np.random.choice(range(n_state), h.shape[1])
            elif annotation_type == 'binary':
                annotations = np.random.choice(range(2), h.shape[1])
            else:
                raise ValueError('Error in annotation_type.')
            ccal.onco_gps.make_map(h.iloc[:n_components, :],
                                   np.random.choice(list(range(1, n_state + 1)), h.shape[1]),
                                   annotations=annotations,
                                   annotation_type=annotation_type)

# Project samples

In [None]:
state = 12
states = labels.ix[state, :]

In [None]:
ccal.onco_gps.make_map(h, states)

In [None]:
ccal.onco_gps.make_map(h, states, h_test=h, states_test=states)

In [None]:
n = 10
ccal.onco_gps.make_map(h, states, h_test=h.iloc[:, :n], states_test=states[:n])
ccal.onco_gps.make_map(h, states, h_test=h.iloc[:, :n], states_test=states[:n], h_test_normalization='clip_and_0-1')
ccal.onco_gps.make_map(h, states, h_test=h.iloc[:, :n], states_test=states[:n], h_test_normalization=None)

# Test real data

In [None]:
H = ccal.support.read_gct('/home/cyborg/Downloads/KRAS_Matrix.gct')

MEMBERSHIPS = pd.read_csv('/home/cyborg/Downloads/KRAS_membership.gct', sep='\t', index_col=0)
K17_MEMBERSHIPS = MEMBERSHIPS.ix[:, 'k_17']
K17_MEMBERSHIPS_INT = K17_MEMBERSHIPS.apply(lambda s: int(s[1:])).tolist()

In [None]:
ccal.onco_gps.make_map(H, K17_MEMBERSHIPS_INT, background_markersize=0, background_mask_markersize=0)

In [None]:
training_h = H
empty = np.empty
X = 3
Y = 10
ratios = empty(training_h.shape[1])
for i, (c_idx, c) in enumerate(training_h.iteritems()):
    c_sorted = c.sort_values(ascending=False)
    ratio = float(c_sorted[:X].sum() / c_sorted[X:].sum()) * c.sum()
    ratios[i] = ratio
normalized_ratios = (ratios - ratios.min()) / (ratios.max() - ratios.min())
normalized_ratios *= Y
normalized_ratios.clip(0, 1)

In [None]:
ccal.onco_gps.make_map(H, K17_MEMBERSHIPS_INT, background_markersize=0, background_mask_markersize=0)

In [None]:
RPKM = ccal.support.read_gct('/home/cyborg/data/ccle/ccle_rpkm.gct')

CD274_RPKM = RPKM.ix['CD274', :]
CD274_SAMPLES_IN_H = CD274_RPKM.index & H.columns
H_CD274 = H.ix[:, CD274_SAMPLES_IN_H]
K17_MEMNERSHIPS_CD274 = K17_MEMBERSHIPS.ix[CD274_SAMPLES_IN_H].apply(lambda s: int(s[1:])).tolist()

In [None]:
ccal.onco_gps.make_map(H, K17_MEMBERSHIPS_INT, annotations=CD274_RPKM.ix[H.columns])

In [None]:
ccal.onco_gps.make_map(H, K17_MEMBERSHIPS_INT, h_test=H_CD274, states_test=K17_MEMNERSHIPS_CD274, annotations=CD274_RPKM.ix[H_CD274.columns])