From 760d1f7bd96c3eb6cc863ae65df88f6083bb2ab8 Mon Sep 17 00:00:00 2001
From: Lee Cooper <cooperle@gmail.com>
Date: Fri, 19 Aug 2016 00:51:11 -0400
Subject: [PATCH 1/5] Converted to matplotlib. Added KM and ScatterPair
 functions.

---
 survivalnet/analysis/Visualize.py | 516 +++++++++++++++++++-----------
 1 file changed, 330 insertions(+), 186 deletions(-)

diff --git a/survivalnet/analysis/Visualize.py b/survivalnet/analysis/Visualize.py
index c3a2603..54a0d44 100644
--- a/survivalnet/analysis/Visualize.py
+++ b/survivalnet/analysis/Visualize.py
@@ -1,15 +1,41 @@
+from lifelines import KaplanMeierFitter
+import matplotlib.pyplot as plt
 import numpy as np
-import plotly as py
-import plotly.graph_objs as go
+from statsmodels.nonparametric.smoothers_lowess import lowess
+from textwrap import wrap
 
 from . import RiskCohort
+from . import RiskCluster
 
 # define colors for positive risk (red) and negative risk (blue)
-Red = 'rgba(222,45,38,0.8)'
-Blue = 'rgb(49,130,189)'
-
-
-def Visualize(Model, Features, Symbols, N=30):
+REDFACE = '#DE2D26'
+BLUEFACE = '#3182BD'
+REDEDGE = '#DE2D26'
+BLUEEDGE = '#3182BD'
+MEDIAN = '#000000'
+WHISKER = '#AAAAAA'
+POINTS = '#000000'
+GRID = '#BBBBBB'
+
+# layout constants general
+WRAP = 20  # number of characters for text wrapping
+SPACING = 0.2  # margin
+
+# layout constants for boxplot
+BOX_FH = 4  # boxplot figure width
+BOX_FW = 8  # boxplot figure height
+JITTER = 0.08
+
+# layout constants for pairwise feature plot
+PAIR_FW = 10
+
+# layout constants for survival plot
+SURV_FW = 6
+SURV_FH = 6
+
+
+def Visualize(Model, Normalized, Raw, Symbols, Survival, Censored,
+              GeneSet=False, N=30, Tau=0.05, Path=None):
     """
     Generate visualizations of risk profiles. Backpropagation is used to
 
@@ -46,26 +72,39 @@ def Visualize(Model, Features, Symbols, N=30):
             Corrected[i] = Prefix[i] + Suffix[i]
 
     # generate risk derivative profiles for cohort
-    Gradients = RiskCohort(Model, Features)
-
-    # generate ranked bar chart
-    RankedBar(Gradients, Symbols, N)
+    Gradients = RiskCohort(Model, Normalized)
 
     # generate ranked box plot series
-    RankedBox(Gradients, Symbols, N)
+    RBFig = RankedBox(Gradients, Symbols, N)
 
     # generate paired scatter plot
-    PairScatter(Gradients, Symbols, N)
+    PSFig = PairScatter(Gradients, Symbols, N)
+
+    # generate cluster plot
+    CFig = RiskCluster(Gradients, Raw, Symbols, N, Tau)
+
+    # generate Kaplan-Meier plots for individual features
+    KMFigs, KMNames = KMPlots(Raw, Symbols, Survival, Censored, N)
+
+    # save figures
+    if Path is not None:
+
+        # save standard figures
+        RBFig.savefig(Path + 'RankedBox.pdf')
+        PSFig.savefig(Path + 'PairedScatter.pdf')
+        CFig.savefig(Path + 'Heatmap.pdf')
+        for i, Figure in enumerate(KMFigs):
+            Figure.savefig(Path + 'KM.' + KMNames[i] + '.pdf')
 
 
-def RankedBar(Gradients, Symbols, N=30):
+def RankedBox(Gradients, Symbols, N=30):
     """
-    Generates bar chart of feature gradients ranked by absolute magnitude.
+    Generates boxplot series of feature gradients ranked by absolute magnitude.
 
     Parameters:
     ----------
 
-    Risk_Gradients: numpy matrix
+    Gradients: numpy matrix
     a matrix containing feature weights.
 
     Symbols: numpy nd array
@@ -74,44 +113,74 @@ def RankedBar(Gradients, Symbols, N=30):
     N: integer value
     number of featurs to display in barchart.
 
+    Returns
+    -------
+    Figure : figure handle
+        Handle to figure used for saving image to disk i.e.
+        Figure.savefig('heatmap.pdf')
     """
 
-    # calculate means, standard deviations if multiple sample provided
-    if(Gradients.shape[0] > 1):
-        Mean = np.asarray(np.mean(Gradients, axis=0))
-        Std = np.asarray(np.std(Gradients, axis=0))
-        data = zip(Symbols, Mean, Std)
-    else:
-        data = zip(Symbols, np.asarray(Gradients)[0])
-
-    # sort by mean gradient for cohorts, gradient for individual samples
-    data = sorted(data, key=lambda x: np.abs(x[1]), reverse=True)
-
-    # generate variables for visualization
-    if(Gradients.shape[1] > 1):
-        Means = [X[1] for X in data[0:N]]
-        Stdevs = [X[2] for X in data[0:N]]
-        Colors = [Red if X[1] > 0 else Blue for X in data[0:N]]
-        Labels = [X[0] for X in data[0:N]]
-    else:
-        Values = [X[1] for X in data[0:N]]
-        Colors = [Red if X[1] > 0 else Blue for X in data[0:N]]
-        Labels = [X[0] for X in data[0:N]]
-
-    # generate plot
-    if(Gradients.shape[1] > 1):
-        trace = [go.Bar(x=Labels, y=Means, type='bar',
-                 error_y=dict(type='data', array=Stdevs, visible=True),
-                 name='Risk Gradient',
-                 marker=dict(color=Colors))]
-    else:
-        trace = [go.Bar(x=Labels, y=Values, type='bar',
-                 name='Risk Gradient',
-                 marker=dict(color=Colors))]
-    py.offline.plot(trace, filename='RankedBar')
+    # generate mean values
+    Means = np.asarray(np.mean(Gradients, axis=0))
 
+    # sort features by mean absolute gradient
+    Order = np.argsort(-np.abs(Means))
+
+    # generate figure and add axes
+    Figure = plt.figure(figsize=(BOX_FW, BOX_FH), facecolor='white')
+    Axes = Figure.add_axes([SPACING, SPACING, 1-2*SPACING, 1-2*SPACING],
+                           frame_on=False)
+    Axes.set_axis_bgcolor('white')
+
+    # generate boxplots
+    Box = Axes.boxplot(Gradients[:, Order[0:N]],
+                       patch_artist=True,
+                       showfliers=False)
+
+    # set global properties
+    plt.setp(Box['medians'], color=MEDIAN, linewidth=1)
+    plt.setp(Box['whiskers'], color=WHISKER, linewidth=1, linestyle='-')
+    plt.setp(Box['caps'], color=WHISKER, linewidth=1)
+
+    # modify box styling
+    for i, box in enumerate(Box['boxes']):
+        if Means[Order[i]] <= 0:
+            box.set(color=BLUEEDGE, linewidth=2)
+            box.set(facecolor=BLUEFACE)
+        else:
+            box.set(color=REDEDGE, linewidth=2)
+            box.set(facecolor=REDFACE)
 
-def RankedBox(Gradients, Symbols, N=30):
+    # add jittered data overlays
+    for i in np.arange(N):
+        plt.scatter(np.random.normal(i+1, JITTER, size=Gradients.shape[0]),
+                    Gradients[:, Order[i]], color=POINTS, alpha=0.2,
+                    marker='o', s=2,
+                    zorder=100)
+
+    # set limits
+    Axes.set_ylim(1.05 * Gradients.min(), 1.05 * Gradients.max())
+
+    # format x axis
+    plt.xlabel('Model Features')
+    Fixed = _FixSymbols(Symbols)
+    Names = plt.setp(Axes, xticklabels=[Fixed[Order[i]] for i in np.arange(N)])
+    plt.setp(Names, rotation=90, fontsize=10)
+    Axes.set_xticks(np.linspace(1.5, N-0.5, N-1), minor=True)
+    Axes.xaxis.set_ticks_position('bottom')
+
+    # format y axis
+    plt.ylabel('Risk Gradient')
+    Axes.yaxis.set_ticks_position('left')
+
+    # add grid lines and zero line
+    Axes.xaxis.grid(True, color=GRID, linestyle='-', which='minor')
+    plt.axhline(0, color='black')
+
+    return Figure
+
+
+def PairScatter(Gradients, Symbols, N=30):
     """
     Generates boxplot series of feature gradients ranked by absolute magnitude.
 
@@ -129,41 +198,68 @@ def RankedBox(Gradients, Symbols, N=30):
 
     """
 
-    # generate mean values
+    # calculate means, standard deviations
     Means = np.asarray(np.mean(Gradients, axis=0))
+    Std = np.asarray(np.std(Gradients, axis=0))
 
-    # generate colors
-    Colors = [Red if mean > 0 else Blue for mean in Means]
-
-    # zip data
-    data = zip(Symbols, Means, Colors, list(np.array(Gradients).transpose()))
-
-    # sort by mean gradient for cohorts, gradient for individual samples
-    data = sorted(data, key=lambda x: np.abs(x[1]), reverse=True)
+    # sort features by mean absolute gradient
+    Order = np.argsort(-np.abs(Means))
+
+    # generate subplots
+    Figure, Axes = plt.subplots(nrows=N, ncols=N,
+                                figsize=(PAIR_FW, PAIR_FW),
+                                facecolor='white')
+    Figure.subplots_adjust(hspace=SPACING, wspace=SPACING, bottom=SPACING)
+
+    # remove axes and ticks
+    for ax in Axes.flat:
+        ax.xaxis.set_visible(False)
+        ax.yaxis.set_visible(False)
+
+    # generate scatter plots in lower triangular portion
+    for i, j in zip(*np.triu_indices_from(Axes, k=1)):
+        Axes[i, j].scatter((Gradients[:, Order[j]]-Means[Order[j]]) /
+                           Std[Order[j]],
+                           (Gradients[:, Order[i]]-Means[Order[i]]) /
+                           Std[Order[i]],
+                           color=POINTS, alpha=0.2, marker='o', s=2)
+        Smooth = lowess((Gradients[:, Order[j]]-Means[Order[j]]) /
+                        Std[Order[j]],
+                        (Gradients[:, Order[i]]-Means[Order[i]]) /
+                        Std[Order[i]])
+        Axes[i, j].plot(Smooth[:, 1], Smooth[:, 0], color='red')
+
+    # generate histograms on diagonal
+    Fixed = _FixSymbols(Symbols, WRAP)
+    for i in np.arange(N):
+        if Means[Order[i]] <= 0:
+            Axes[i, i].hist(Gradients[:, Order[i]],
+                            facecolor=BLUEFACE,
+                            alpha=0.8)
+        else:
+            Axes[i, i].hist(Gradients[:, Order[i]],
+                            facecolor=REDFACE,
+                            alpha=0.8)
+        Axes[i, i].annotate(Fixed[Order[i]], (0, 0),
+                            xycoords='axes fraction',
+                            ha='right', va='top',
+                            rotation=45)
 
-    # generate boxplot traces
-    Traces = []
-    for Symbol, Mean, Color, Points in data[0:N]:
-        Traces.append(go.Box(y=Points,
-                             name=Symbol,
-                             jitter=0.5,
-                             whiskerwidth=0.2,
-                             boxpoints='all',
-                             fillcolor=Color,
-                             marker=dict(size=1, color=Color),
-                             line=dict(width=1),))
+    # delete unused axes
+    for i, j in zip(*np.tril_indices_from(Axes, k=-1)):
+        Figure.delaxes(Axes[i, j])
 
-    py.offline.plot(Traces, filename='RankedBox')
+    return Figure
 
 
-def PairScatter(Gradients, Symbols, N=30):
+def KMPlots(Raw, Symbols, Survival, Censored, N=30):
     """
-    Generates boxplot series of feature gradients ranked by absolute magnitude.
+    Generates KM plots for individual features ranked by absolute magnitude.
 
     Parameters:
     ----------
 
-    Risk_Gradients: numpy matrix
+    Gradients: numpy matrix
     a matrix containing feature weights.
 
     Symbols: numpy nd array
@@ -172,128 +268,176 @@ def PairScatter(Gradients, Symbols, N=30):
     N: integer value
     number of featurs to display in barchart.
 
+    Returns
+    -------
+    Figures : figure handle
+    List containing handles to figures.
+
+    Names : array_like
+    List of feature names for figures in 'Figures'
+
+    Notes
+    -----
+    Note this uses feature values as opposed to back-propagated risk gradients.
     """
 
-    # calculate means, standard deviations
-    Means = np.asarray(np.mean(Gradients, axis=0))
-    Std = np.asarray(np.std(Gradients, axis=0))
+    # initialize list of figures and names
+    Figures = []
+    Names = []
 
-    # zip data
-    data = zip(Symbols, Means, Std, list(np.array(Gradients).transpose()))
+    # generate mean values
+    Means = np.asarray(np.mean(Raw, axis=0))
 
-    # sort by mean gradient for cohorts, gradient for individual samples
-    data = sorted(data, key=lambda x: np.abs(x[1]), reverse=True)
+    # sort features by mean absolute gradient
+    Order = np.argsort(-np.abs(Means))
 
-    # generate subplot titles
-    Titles = [data[0][0]]
-    for i in np.arange(1, N):
-        for j in np.arange(N):
-            Titles.append("")
-        Titles.append(data[i][0])
-    Titles = tuple(Titles)
+    # generate Kaplan Meier fitter
+    kmf = KaplanMeierFitter()
 
-    # generate subplot matrix
-    Figure = py.tools.make_subplots(rows=N, cols=N, subplot_titles=Titles)
+    # generate KM plot for each feature
+    for count, i in enumerate(Order[0:N]):
 
-    # generate individual subplots
-    for i in np.arange(N):
+        # generate figure and axes
+        Figures.append(plt.figure(figsize=(SURV_FW, SURV_FH),
+                                  facecolor='white'))
+        Axes = Figures[count].add_axes([SPACING, SPACING,
+                                        1-2*SPACING, 1-2*SPACING])
 
-        # append scatter plot for each variable pair
-        for j in np.arange(i):
-            Figure.append_trace(go.Scatter(x=data[i][3] / data[i][2],
-                                           y=data[j][3] / data[j][2],
-                                           text=str(1.0),
-                                           mode='markers',
-                                           marker=dict(color='grey',
-                                                       size=1)),
-                                i+1, j+1)
-
-        # add histograms on diagonal
-        Figure.append_trace(go.Histogram(x=np.array(Gradients[:, i] /
-                                                    Std[i]).squeeze(),
-                                         marker=dict(color='red')),
-                            i+1, i+1)
+        # generate names
+        Names.append(Symbols[i])
 
-    for i in np.arange(N):
+        # extract suffix to classify feature
+        Suffix = Symbols[i][str.rfind(str(Symbols[i]), '_'):].strip()
 
-        # append scatter plot for each variable pair
-        for j in np.arange(i+1, N):
-            rho = np.sum(((data[i][3] - data[i][1]) / data[i][2]) * \
-                        ((data[j][3] - data[j][1]) / data[j][2]))
-
-            Figure.append_trace(go.Scatter(x=[],
-                                           y=[],
-                                           text=str(rho),
-                                           mode='markers',
-                                           marker=dict(color='grey',
-                                                       size=1)),
-                                i+1, j+1)
-
-        # add histograms on diagonal
-        Figure.append_trace(go.Histogram(x=np.array(Gradients[:, i] /
-                                                    Std[i]).squeeze(),
-                                         marker=dict(color='red')),
-                            i+1, i+1)
-
-    # perform layouts for individual subtypes
-    for i in np.arange(N):
-        for j in np.arange(N):
-
-            Index = i*N + j + 1
-
-            if (j < i):
-
-                # calculate index of lower triangular plot
-                Index = i*N + j + 1
-
-                # update x,y axis layout for scatter plots
-                Figure['layout']['xaxis'+str(Index)].update(autorange=True,
-                                                            showgrid=False,
-                                                            zeroline=False,
-                                                            showline=True,
-                                                            autotick=True,
-                                                            ticks='',
-                                                            showticklabels=False,
-                                                            linecolor='#636363',
-                                                            linewidth=1)
-                Figure['layout']['yaxis'+str(Index)].update(autorange=True,
-                                                            showgrid=False,
-                                                            zeroline=False,
-                                                            showline=True,
-                                                            autotick=True,
-                                                            ticks='',
-                                                            showticklabels=False,
-                                                            linecolor='#636363',
-                                                            linewidth=1)
-
-            elif j == i:
-
-                # update histogram layouts
-                Figure['layout']['yaxis'+str(Index)].update(autorange=True,
-                                                            showgrid=False,
-                                                            zeroline=False,
-                                                            showline=False,
-                                                            autotick=True,
-                                                            ticks='',
-                                                            showticklabels=False)
+        if Suffix == '_Clinical':
+
+            # get unique values to determine if binary or continuous
+            Unique = np.unique(Raw[:, i])
+
+            # process based on variable type
+            if Unique.size == 2:
+
+                # extract and plot mutant and wild-type survival profiles
+                kmf.fit(Survival[Raw[:, i] == Unique[0]],
+                        Censored[Raw[:, i] == Unique[0]] == 1,
+                        label=Symbols[i] + str(Unique[0]))
+                kmf.plot(ax=Axes)
+                kmf.fit(Survival[Raw[:, i] == Unique[1]],
+                        Censored[Raw[:, i] == Unique[1]] == 1,
+                        label=Symbols[i] + str(Unique[1]))
+                kmf.plot(ax=Axes)
+                plt.ylim(0, 1)
 
             else:
 
-                # update x,y axis layout for scatter plots
-                Figure['layout']['xaxis'+str(Index)].update(autorange=True,
-                                                            showgrid=False,
-                                                            zeroline=False,
-                                                            showline=False,
-                                                            autotick=True,
-                                                            ticks='',
-                                                            showticklabels=False)
-                Figure['layout']['yaxis'+str(Index)].update(autorange=True,
-                                                            showgrid=False,
-                                                            zeroline=False,
-                                                            showline=False,
-                                                            autotick=True,
-                                                            ticks='',
-                                                            showticklabels=False)
-
-    # generate plot
-    py.offline.plot(Figure, filename='PairScatter')
+                # determine median value
+                Median = np.median(Raw[:, i])
+
+                # extract and altered and unaltered survival profiles
+                kmf.fit(Survival[Raw[:, i] > Median],
+                        Censored[Raw[:, i] > Median] == 1,
+                        label=Symbols[i] + " > " + str(Median))
+                kmf.plot(ax=Axes)
+                kmf.fit(Survival[Raw[:, i] <= Median],
+                        Censored[Raw[:, i] <= Median] == 1,
+                        label=Symbols[i] + " <= " + str(Median))
+                kmf.plot(ax=Axes)
+                plt.ylim(0, 1)
+
+        elif Suffix == '_Mut':
+
+            # extract and plot mutant and wild-type survival profiles
+            kmf.fit(Survival[Raw[:, i] == 1],
+                    Censored[Raw[:, i] == 1] == 1,
+                    label=Symbols[i] + " Mutant")
+            kmf.plot(ax=Axes)
+            kmf.fit(Survival[Raw[:, i] == 0],
+                    Censored[Raw[:, i] == 0] == 1,
+                    label=Symbols[i] + " Mutant")
+            kmf.plot(ax=Axes)
+            plt.ylim(0, 1)
+
+        elif Suffix == '_CNV':
+
+            # determine if alteration is amplification or deletion
+            Amplified = np.mean(Raw[:, i]) > 0
+
+            # extract and plot altered and unaltered survival profiles
+            if Amplified:
+                kmf.fit(Survival[Raw[:, i] > 0],
+                        Censored[Raw[:, i] > 0] == 1,
+                        label=Symbols[i] + " Amplified")
+                kmf.plot(ax=Axes)
+                kmf.fit(Survival[Raw[:, i] <= 0],
+                        Censored[Raw[:, i] <= 0] == 1,
+                        label=Symbols[i] + " not Amplified")
+                kmf.plot(ax=Axes)
+            else:
+                kmf.fit(Survival[Raw[:, i] < 0],
+                        Censored[Raw[:, i] < 0] == 1,
+                        label=Symbols[i] + " Deleted")
+                kmf.plot(ax=Axes)
+                kmf.fit(Survival[Raw[:, i] >= 0],
+                        Censored[Raw[:, i] >= 0] == 1,
+                        label=Symbols[i] + " not Deleted")
+                kmf.plot(ax=Axes)
+            plt.ylim(0, 1)
+
+        elif Suffix == '_CNVArm':
+
+            # determine if alteration is amplification or deletion
+            Amplified = np.mean(Raw[:, i]) > 0
+
+            # extract and plot altered and unaltered survival profiles
+            if Amplified:
+                kmf.fit(Survival[Raw[:, i] > 0.25],
+                        Censored[Raw[:, i] > 0.25] == 1,
+                        label=Symbols[i] + " Amplified")
+                kmf.plot(ax=Axes)
+                kmf.fit(Survival[Raw[:, i] <= 0.25],
+                        Censored[Raw[:, i] <= 0.25] == 1,
+                        label=Symbols[i] + " not Amplified")
+                kmf.plot(ax=Axes)
+            else:
+                kmf.fit(Survival[Raw[:, i] < -0.25],
+                        Censored[Raw[:, i] < -0.25] == 1,
+                        label=Symbols[i] + " Deleted")
+                kmf.plot(ax=Axes)
+                kmf.fit(Survival[Raw[:, i] >= -0.25],
+                        Censored[Raw[:, i] >= -0.25] == 1,
+                        label=Symbols[i] + " not Deleted")
+                kmf.plot(ax=Axes)
+            plt.ylim(0, 1)
+
+        elif (Suffix == '_Protein') or (Suffix == '_mRNA'):
+
+            # determine median expression
+            Median = np.median(Raw[:, i])
+
+            # extract and altered and unaltered survival profiles
+            kmf.fit(Survival[Raw[:, i] > Median],
+                    Censored[Raw[:, i] > Median] == 1,
+                    label=Symbols[i] + " Higher Expression")
+            kmf.plot(ax=Axes)
+            kmf.fit(Survival[Raw[:, i] <= Median],
+                    Censored[Raw[:, i] <= Median] == 1,
+                    label=Symbols[i] + " Lower Expression")
+            kmf.plot(ax=Axes)
+            plt.ylim(0, 1)
+
+        else:
+            raise ValueError('Unrecognized feature type')
+
+    return Figures, Names
+
+
+def _FixSymbols(Symbols, Length=WRAP):
+    """
+    Removes trailing and leading whitespace and wraps long labels
+    """
+
+    # remove whitespace and wrap
+    Fixed = ['\n'.join(wrap(Symbol.strip().replace('_', ' '), Length))
+             for Symbol in Symbols]
+
+    return Fixed

From 4dc87b7932407e38992758fe93e83f5ddb3a755f Mon Sep 17 00:00:00 2001
From: Lee Cooper <cooperle@gmail.com>
Date: Fri, 19 Aug 2016 00:52:08 -0400
Subject: [PATCH 2/5] Updated submodule name to "analysis".

---
 survivalnet/__init__.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/survivalnet/__init__.py b/survivalnet/__init__.py
index b02d01a..c1d408a 100644
--- a/survivalnet/__init__.py
+++ b/survivalnet/__init__.py
@@ -8,7 +8,7 @@
 from .train import train
 
 # sub-packages with no internal dependencies
-from . import sensitivity
+from . import analysis
 
 # must be imported before Bayesian_Optimizaiton
 #from .CostFunction import cost_func, aggr_st_cost_func, st_cost_func
@@ -26,5 +26,5 @@
     # sub-packages
     'model',
     'optimization',
-    'sensitivity',
+    'analysis',
 )

From d27dc06e84afd1323b47dd3d76086d79854f55c4 Mon Sep 17 00:00:00 2001
From: Lee Cooper <cooperle@gmail.com>
Date: Fri, 19 Aug 2016 00:53:41 -0400
Subject: [PATCH 3/5] Moved matrix conversion into function.

---
 survivalnet/analysis/RiskCohort.py | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/survivalnet/analysis/RiskCohort.py b/survivalnet/analysis/RiskCohort.py
index c358b23..d3ef7ef 100644
--- a/survivalnet/analysis/RiskCohort.py
+++ b/survivalnet/analysis/RiskCohort.py
@@ -36,9 +36,12 @@ def RiskCohort(Model, Features):
     # initialize container for risk gradient profiles
     Gradients = np.zeros(Features.shape)
 
+    # copy input to matrix for Theano
+    Matrix = np.matrix(Features)
+
     # iterate through samples, calculating risk gradient profile for each
     for i in np.arange(Features.shape[0]):
-        Gradients[i, :] = _RiskBackpropagate(Model, Features[i, :])
+        Gradients[i, :] = _RiskBackpropagate(Model, Matrix[i, :])
 
     return Gradients
 

From cf7f54d9f92efbca6af12959f1e8305b113b9d96 Mon Sep 17 00:00:00 2001
From: Lee Cooper <cooperle@gmail.com>
Date: Fri, 19 Aug 2016 01:20:50 -0400
Subject: [PATCH 4/5] Added stdout messaging. Removed unused input parameter.
 Reduced default value for N.

---
 survivalnet/analysis/Visualize.py | 8 +++++++-
 1 file changed, 7 insertions(+), 1 deletion(-)

diff --git a/survivalnet/analysis/Visualize.py b/survivalnet/analysis/Visualize.py
index 54a0d44..7268e72 100644
--- a/survivalnet/analysis/Visualize.py
+++ b/survivalnet/analysis/Visualize.py
@@ -35,7 +35,7 @@
 
 
 def Visualize(Model, Normalized, Raw, Symbols, Survival, Censored,
-              GeneSet=False, N=30, Tau=0.05, Path=None):
+              N=10, Tau=0.05, Path=None):
     """
     Generate visualizations of risk profiles. Backpropagation is used to
 
@@ -72,21 +72,27 @@ def Visualize(Model, Normalized, Raw, Symbols, Survival, Censored,
             Corrected[i] = Prefix[i] + Suffix[i]
 
     # generate risk derivative profiles for cohort
+    print "Generting risk gradient profiles..."
     Gradients = RiskCohort(Model, Normalized)
 
     # generate ranked box plot series
+    print "Generating risk gradient boxplot..."
     RBFig = RankedBox(Gradients, Symbols, N)
 
     # generate paired scatter plot
+    print "Generating paired scatter plots..."
     PSFig = PairScatter(Gradients, Symbols, N)
 
     # generate cluster plot
+    print "Generating cluster analysis..."
     CFig = RiskCluster(Gradients, Raw, Symbols, N, Tau)
 
     # generate Kaplan-Meier plots for individual features
+    print "Generating Kaplan-Meier plots..."
     KMFigs, KMNames = KMPlots(Raw, Symbols, Survival, Censored, N)
 
     # save figures
+    print "Saving figures..."
     if Path is not None:
 
         # save standard figures

From 0ef86deb195914ff78c4b97b1dff075c3cb07e76 Mon Sep 17 00:00:00 2001
From: Lee Cooper <cooperle@gmail.com>
Date: Fri, 19 Aug 2016 02:55:29 -0400
Subject: [PATCH 5/5] Fixed imports, added docstrings.

---
 survivalnet/analysis/RiskCluster.py  |   8 +-
 survivalnet/analysis/Visualize.py    | 115 ++++++++++++++++++---------
 survivalnet/analysis/__init__.py     |   4 +-
 survivalnet/optimization/__init__.py |   1 -
 4 files changed, 85 insertions(+), 43 deletions(-)

diff --git a/survivalnet/analysis/RiskCluster.py b/survivalnet/analysis/RiskCluster.py
index a8946df..d66af47 100644
--- a/survivalnet/analysis/RiskCluster.py
+++ b/survivalnet/analysis/RiskCluster.py
@@ -1,5 +1,5 @@
 import matplotlib as mpl
-import matplotlib.pyplot as pylab
+import matplotlib.pyplot as plt
 import numpy as np
 import scipy.cluster.hierarchy as sch
 import scipy.spatial.distance as dist
@@ -85,7 +85,7 @@ def RiskCluster(Gradients, Raw, Symbols, N=30, Tau=0.05):
     Normalized = Normalized.transpose()
 
     # generate figure
-    Figure = pylab.figure(figsize=(WINDOW_WIDTH, WINDOW_HEIGHT))
+    Figure = plt.figure(figsize=(WINDOW_WIDTH, WINDOW_HEIGHT))
 
     # cluster samples and generate dendrogram
     SampleDist = dist.pdist(Normalized.T, 'correlation')
@@ -137,7 +137,7 @@ def RiskCluster(Gradients, Raw, Symbols, N=30, Tau=0.05):
     Heatmap = Figure.add_axes([HEATMAP_X, HEATMAP_Y, HEATMAP_W, HEATMAP_H],
                               frame_on=False)
     Heatmap.matshow(Reordered, aspect='auto', origin='lower',
-                    cmap=pylab.cm.bwr)
+                    cmap=plt.cm.bwr)
     Heatmap.set_xticks([])
     Heatmap.set_yticks([])
 
@@ -175,7 +175,7 @@ def RiskCluster(Gradients, Raw, Symbols, N=30, Tau=0.05):
     cnv = Figure.add_axes([TRACK_X, TRACK_Y,
                            TRACK_W, TRACK_H - len(SigMut)*TRACK],
                           frame_on=False)
-    cnv.matshow(CNVs, aspect='auto', origin='lower', cmap=pylab.cm.bwr)
+    cnv.matshow(CNVs, aspect='auto', origin='lower', cmap=plt.cm.bwr)
     for i in range(len(SigCNV)):
         cnv.text(-SPACING, i / np.float(len(SigCNV)) +
                  1/np.float(2*len(SigCNV)),
diff --git a/survivalnet/analysis/Visualize.py b/survivalnet/analysis/Visualize.py
index 7268e72..bbf86f4 100644
--- a/survivalnet/analysis/Visualize.py
+++ b/survivalnet/analysis/Visualize.py
@@ -4,8 +4,8 @@
 from statsmodels.nonparametric.smoothers_lowess import lowess
 from textwrap import wrap
 
-from . import RiskCohort
-from . import RiskCluster
+from .RiskCohort import RiskCohort
+from .RiskCluster import RiskCluster
 
 # define colors for positive risk (red) and negative risk (blue)
 REDFACE = '#DE2D26'
@@ -41,19 +41,39 @@ def Visualize(Model, Normalized, Raw, Symbols, Survival, Censored,
 
     Parameters:
     -----------
-
     Model : class
-    Model defined by finetuning
+    Model generated by finetuning.
+
+    Normalized : array_like
+    Numpy array containing normalized feature values used in training /
+    finetuning. These are used to examine associations between feature values
+    and cluster assignments. Features are in columns and samples are in rows.
 
-    Features : array_like
-    An N x P array containing the normalized (z-scored) features used in model
-    finetuning. Contains P features from N total patients.
+    Raw : array_like
+    Numpy array containing raw, unnormalized feature values. These are used to
+    examine associations between feature values and cluster assignments.
+    Features are in columns and samples are in rows.
 
     Symbols : array_like
-    P-length list of strings describing model inputs
+    List containing strings describing features. See Notes below for
+    restrictions on symbol names.
+
+    Survival : array_like
+    Array containing death or last followup values.
+
+    Censored : array_like
+    Array containing vital status at last followup. 1 (alive) or 0 (deceased).
 
-    N : integer
-    Number of features to analyze.
+    N : scalar
+    Number of features to include in analysis. Features are scored by absolute
+    mean gradient and the highest N magnitude features will be used
+    to generate the plot.
+
+    Tau : scalar
+    Threshold for statistical significance when examining cluster associations.
+
+    Path : string
+    Path to store .pdf versions of plots generated.
     """
 
     # modify duplicate symbols where needed - append index to each instance
@@ -85,11 +105,11 @@ def Visualize(Model, Normalized, Raw, Symbols, Survival, Censored,
 
     # generate cluster plot
     print "Generating cluster analysis..."
-    CFig = RiskCluster(Gradients, Raw, Symbols, N, Tau)
+    CFig, Labels = RiskCluster(Gradients, Raw, Symbols, N, Tau)
 
     # generate Kaplan-Meier plots for individual features
     print "Generating Kaplan-Meier plots..."
-    KMFigs, KMNames = KMPlots(Raw, Symbols, Survival, Censored, N)
+    KMFigs, KMNames = KMPlots(Gradients, Raw, Symbols, Survival, Censored, N)
 
     # save figures
     print "Saving figures..."
@@ -103,21 +123,24 @@ def Visualize(Model, Normalized, Raw, Symbols, Survival, Censored,
             Figure.savefig(Path + 'KM.' + KMNames[i] + '.pdf')
 
 
-def RankedBox(Gradients, Symbols, N=30):
+def RankedBox(Gradients, Symbols, N=10):
     """
     Generates boxplot series of feature gradients ranked by absolute magnitude.
 
     Parameters:
     ----------
+    Gradients : array_like
+    Numpy array containing feature/sample gradients obtained by RiskCohort.
+    Features are in columns and samples are in rows.
 
-    Gradients: numpy matrix
-    a matrix containing feature weights.
-
-    Symbols: numpy nd array
-    a matrix of feature Symbols.
+    Symbols : array_like
+    List containing strings describing features. See Notes below for
+    restrictions on symbol names.
 
-    N: integer value
-    number of featurs to display in barchart.
+    N : scalar
+    Number of features to include in analysis. Features are scored by absolute
+    mean gradient and the highest N magnitude features will be used
+    to generate the plot.
 
     Returns
     -------
@@ -186,22 +209,25 @@ def RankedBox(Gradients, Symbols, N=30):
     return Figure
 
 
-def PairScatter(Gradients, Symbols, N=30):
+def PairScatter(Gradients, Symbols, N=10):
     """
     Generates boxplot series of feature gradients ranked by absolute magnitude.
 
     Parameters:
     ----------
 
-    Risk_Gradients: numpy matrix
-    a matrix containing feature weights.
+    Gradients : array_like
+    Numpy array containing feature/sample gradients obtained by RiskCohort.
+    Features are in columns and samples are in rows.
 
-    Symbols: numpy nd array
-    a matrix of feature Symbols.
-
-    N: integer value
-    number of featurs to display in barchart.
+    Symbols : array_like
+    List containing strings describing features. See Notes below for
+    restrictions on symbol names.
 
+    N : scalar
+    Number of features to include in analysis. Features are scored by absolute
+    mean gradient and the highest N magnitude features will be used
+    to generate the plot.
     """
 
     # calculate means, standard deviations
@@ -258,21 +284,36 @@ def PairScatter(Gradients, Symbols, N=30):
     return Figure
 
 
-def KMPlots(Raw, Symbols, Survival, Censored, N=30):
+def KMPlots(Gradients, Raw, Symbols, Survival, Censored, N=10):
     """
     Generates KM plots for individual features ranked by absolute magnitude.
 
     Parameters:
     ----------
 
-    Gradients: numpy matrix
-    a matrix containing feature weights.
+    Gradients : array_like
+    Numpy array containing feature/sample gradients obtained by RiskCohort.
+    Features are in columns and samples are in rows.
 
-    Symbols: numpy nd array
-    a matrix of feature Symbols.
+    Raw : array_like
+    Numpy array containing raw, unnormalized feature values. These are used to
+    examine associations between feature values and cluster assignments.
+    Features are in columns and samples are in rows.
+
+    Symbols : array_like
+    List containing strings describing features. See Notes below for
+    restrictions on symbol names.
 
-    N: integer value
-    number of featurs to display in barchart.
+    Survival : array_like
+    Array containing death or last followup values.
+
+    Censored : array_like
+    Array containing vital status at last followup. 1 (alive) or 0 (deceased).
+
+    N : scalar
+    Number of features to include in analysis. Features are scored by absolute
+    mean gradient and the highest N magnitude features will be used
+    to generate the plot.
 
     Returns
     -------
@@ -284,6 +325,8 @@ def KMPlots(Raw, Symbols, Survival, Censored, N=30):
 
     Notes
     -----
+    Suffixes like '_Mut' and '_CNV' that are generated by the package
+    tcgaintegrator to identify feature types are required analysis.
     Note this uses feature values as opposed to back-propagated risk gradients.
     """
 
@@ -292,7 +335,7 @@ def KMPlots(Raw, Symbols, Survival, Censored, N=30):
     Names = []
 
     # generate mean values
-    Means = np.asarray(np.mean(Raw, axis=0))
+    Means = np.asarray(np.mean(Gradients, axis=0))
 
     # sort features by mean absolute gradient
     Order = np.argsort(-np.abs(Means))
diff --git a/survivalnet/analysis/__init__.py b/survivalnet/analysis/__init__.py
index a243f21..051fca3 100644
--- a/survivalnet/analysis/__init__.py
+++ b/survivalnet/analysis/__init__.py
@@ -2,15 +2,15 @@
 
 # must be imported after RiskCohort
 from .Visualize import PairScatter
-from .Visualize import RankedBar
 from .Visualize import RankedBox
+from .RiskCluster import RiskCluster
 from .Visualize import Visualize
 
 # list functions and classes available for public use
 __all__ = (
     'PairScatter',
-    'RankedBar',
     'RankedBox',
+    'RiskCluster',
     'RiskCohort',
     'Visualize',
 )
diff --git a/survivalnet/optimization/__init__.py b/survivalnet/optimization/__init__.py
index 698942b..ff05471 100644
--- a/survivalnet/optimization/__init__.py
+++ b/survivalnet/optimization/__init__.py
@@ -11,7 +11,6 @@
 	'BFGS',
 	'isOverfitting',
 	'GLDS',
-	'LineSearch',
 	'Optimization',
 	'SurvivalAnalysis',
 )