# Publication Data

In [None]:
import numpy as np
import pandas as pd
import os
import glob

import matplotlib.pyplot as plt
import seaborn as sns

from matplotlib import rcParams
rcParams['font.family'] = 'sans-serif'
rcParams['font.sans-serif'] = ['Arial']

## Load Data

In [None]:
df = pd.read_excel('final_data.xlsx')
df.shape

## Feature Engineering

In [None]:
order_ = ['pre', 'd0', 'un']


pull_cols = ['pull__ 1', 'pull__ 2', 'pull__ 3', 'pull__ 4', 'pull__ 5',
             'pull__ 6', 'pull__ 7', 'pull__ 8', 'pull__ 9', 'pull__10', 
             'pull__11', 'pull__12', 'pull__13', 'pull__14', 'pull__15', 
             'pull__16']
pull_cols = ['pull__'+str(i+1) for i in range(16)]
df_temp = df[pull_cols]>0
df['pull_slices'] = df_temp.sum(axis=1)


push_cols = ['push__ 1', 'push__ 2', 'push__ 3', 'push__ 4', 'push__ 5', 
             'push__ 6', 'push__ 7', 'push__ 8', 'push__ 9', 'push__10', 
             'push__11', 'push__12', 'push__13', 'push__14', 'push__15', 
             'push__16']
push_cols = ['push__'+str(i+1) for i in range(16)]
df_temp = df[push_cols]>0
df['push_slices'] = df_temp.sum(axis=1)

shear_cols = ['shear__ 1', 'shear__ 2', 'shear__ 3', 'shear__ 4', 'shear__ 5', 
              'shear__ 6', 'shear__ 7', 'shear__ 8', 'shear__ 9', 'shear__10', 
              'shear__11', 'shear__12', 'shear__13', 'shear__14', 'shear__15', 
              'shear__16']
shear_cols = ['shear__'+str(i+1) for i in range(16)]
df_temp = df[shear_cols]>0
df['shear_slices'] = df_temp.sum(axis=1)

df['shape_aniso'] = df['max_ferret_dia']/df['equiv_dia']

## Filter data to manually inspected valid cells

In [None]:
df = df[df['is_disp_checked']==1]

## Data count

In [None]:
df[['drug', 'induce_type', 'cell']].groupby(['drug', 'induce_type']).agg('count')

## Select drug condition for which you want to plot the data

In [None]:
drug = 'dmso'
drug_ = ' DMSO'

df_ = df
df_ = df_[df_['drug']==drug]

## Plots comparing DART metrics aross conditions

In [None]:
fig_size = (3.5,3.5)

In [None]:
col = 'pull_slices'
col_name = 'Contractile Slices'

fig, ax = plt.subplots(1,1, sharex=False, figsize=fig_size)
sns.boxplot(x='induce_type', y=col, data=df_, order=['un', 'd0', 'pre'],  ax=ax, width=0.5,
            showmeans=True, meanline=True, meanprops={'linewidth':2, 'color':'0.85'})
sns.swarmplot(x='induce_type', y=col, data=df_, order=['un', 'd0', 'pre'], ax=ax, color='k', size=4)
ax.set_yticks([y*4 for y in range(5)])
ax.set_ylabel(col_name)
ax.set_xlabel('')
ax.set_xticklabels(['Epithelial', 'Transitory', 'Mesenchymal'])
fig.autofmt_xdate()
sns.set_context("talk")
sns.despine();
fig.savefig("plot_fig/contractile.pdf", bbox_inches='tight')

In [None]:
col = 'push_slices'
col_name = 'Protrusive Slices'

fig, ax = plt.subplots(1,1, sharex=False, figsize=fig_size)
sns.boxplot(x='induce_type', y=col, data=df_, order=['un', 'd0', 'pre'], ax=ax, width=0.5,
            showmeans=True, meanline=True, meanprops={'linewidth':2, 'color':'0.85'})
sns.swarmplot(x='induce_type', y=col, data=df_, order=['un', 'd0', 'pre'], ax=ax, color='k', size=4)
ax.set_yticks([y*4 for y in range(5)])
ax.set_ylabel(col_name)
ax.set_xlabel('')
ax.set_xticklabels(['Epithelial', 'Transitory', 'Mesenchymal'])
fig.autofmt_xdate()
sns.set_context("talk")
sns.despine();
fig.savefig("plot_fig/protrusive.pdf", bbox_inches='tight')

In [None]:
col = 'shear_slices'
col_name = 'Circumferential Slices'

fig, ax = plt.subplots(1,1, sharex=False, figsize=fig_size)
sns.boxplot(x='induce_type', y=col, data=df_, order=['un', 'd0', 'pre'], ax=ax, width=0.5,
            showmeans=True, meanline=True, meanprops={'linewidth':2, 'color':'0.85'})
sns.swarmplot(x='induce_type', y=col, data=df_, order=['un', 'd0', 'pre'], ax=ax, color='k', size=4)
ax.set_yticks([y*4 for y in range(5)])
ax.set_ylabel(col_name)
ax.set_xlabel('')
ax.set_xticklabels(['Epithelial', 'Transitory', 'Mesenchymal'])
fig.autofmt_xdate()
sns.set_context("talk")
sns.despine();
fig.savefig("plot_fig/circumferential_disp.pdf", bbox_inches='tight')

In [None]:
col = 'shape_aniso'
col_name = 'Shape Anisotropy'

fig, ax = plt.subplots(1,1, sharex=False, figsize=fig_size)
sns.boxplot(x='induce_type', y=col, data=df_, order=['un', 'd0', 'pre'],  ax=ax, width=0.5,
            showmeans=True, meanline=True, meanprops={'linewidth':2, 'color':'0.85'})
sns.swarmplot(x='induce_type', y=col, data=df_, order=['un', 'd0', 'pre'], ax=ax, color='k', size=4)
ax.set_yticks([y+1 for y in range(3)])
ax.set_ylabel(col_name)
ax.set_xlabel('')
ax.set_xticklabels(['Epithelial', 'Transitory', 'Mesenchymal'])
fig.autofmt_xdate()
sns.set_context("talk")
sns.despine();
fig.savefig("plot_fig/shape_aniso.pdf", bbox_inches='tight')

In [None]:
f, ax = plt.subplots(1,1, figsize=(6,6))

c1 = '#3274a1'
c2 = '#e1812c'
c3 = '#3b913b'

df_temp = df_[df_['induce_type']=='un']
df_temp['pull_slices'] = df_temp['pull_slices'] + np.random.normal(0, 0.3, df_temp.shape[0])
df_temp['push_slices'] = df_temp['push_slices'] + np.random.normal(0, 0.3, df_temp.shape[0])
plt.scatter(df_temp['pull_slices'], df_temp['push_slices'], marker='v', label='Epithelial', color=c1)

df_temp = df_[df_['induce_type']=='d0']
df_temp['pull_slices'] = df_temp['pull_slices'] + np.random.normal(0, 0.3, df_temp.shape[0])
df_temp['push_slices'] = df_temp['push_slices'] + np.random.normal(0, 0.3, df_temp.shape[0])
plt.scatter(df_temp['pull_slices'], df_temp['push_slices'], marker='s', label='Transitory', color=c2)

df_temp = df_[df_['induce_type']=='pre']
df_temp['pull_slices'] = df_temp['pull_slices'] + np.random.normal(0, 0.3, df_temp.shape[0])
df_temp['push_slices'] = df_temp['push_slices'] + np.random.normal(0, 0.3, df_temp.shape[0])
plt.scatter(df_temp['pull_slices'], df_temp['push_slices'], marker='o', label='Mesenchymal', color=c3)
# sns.scatterplot(x='pull_slices', y='axisAngleR_4', hue='induce_type', data=df_,
#                 x_jitter=True, y_jitter=True, alpha = 1)
# ax.set_title(drug_)
ax.set_ylabel('Protrusive Slices')
ax.set_xlabel('Contractile Slices')
ax.set_ylim(ymax=17, ymin=-1)
ax.set_xlim(xmax=17, xmin=-1)
ax.set_xticks([x*4 for x in range(5)])
ax.set_yticks([x*4 for x in range(5)])
ax.legend()

sns.set_context("talk")
sns.despine();
f.savefig("plot_fig/scatter.pdf", bbox_inches='tight')

In [None]:
f, ax = plt.subplots(1,1, figsize=(6, 6))

c1 = '#3274a1'
c2 = '#e1812c'
c3 = '#3b913b'

df_temp = df_[df_['induce_type']=='un']
df_temp['shape_aniso'] = df_temp['shape_aniso'] + np.random.normal(0, 0.3, df_temp.shape[0])
df_temp['volume'] = df_temp['volume'] + np.random.normal(0, 0.3, df_temp.shape[0])
plt.scatter(df_temp['shape_aniso'], df_temp['volume'], marker='v', label='Epithelial', color=c1)

df_temp = df_[df_['induce_type']=='d0']
df_temp['shape_aniso'] = df_temp['shape_aniso'] + np.random.normal(0, 0.3, df_temp.shape[0])
df_temp['volume'] = df_temp['volume'] + np.random.normal(0, 0.3, df_temp.shape[0])
plt.scatter(df_temp['shape_aniso'], df_temp['volume'], marker='s', label='Transitory', color=c2)

df_temp = df_[df_['induce_type']=='pre']
df_temp['shape_aniso'] = df_temp['shape_aniso'] + np.random.normal(0, 0.3, df_temp.shape[0])
df_temp['volume'] = df_temp['volume'] + np.random.normal(0, 0.3, df_temp.shape[0])
plt.scatter(df_temp['shape_aniso'], df_temp['volume'], marker='o', label='Mesenchymal', color=c3)
ax.set_ylabel('Volume')
ax.set_xlabel('Shape anisotropy')
ax.legend()

sns.set_context("talk")
sns.despine();
f.savefig("plot_fig/scatter_volume_shapeaniso.pdf", bbox_inches='tight')

### Output data to excel files for statistical test in Sigma plot

In [None]:
output_cols = ['pull_slices', 'push_slices', 'shear_slices', 'shape_aniso']

df_temp = df_[df_['induce_type'] == 'un']
df_temp = df_temp[output_cols]
df_temp = df_temp.rename(dict(zip(output_cols, ['un_'+x for x in output_cols])), axis='columns')
df_temp.to_excel("plot_fig/DART_epithelial.xls")

df_temp = df_[df_['induce_type'] == 'd0']
df_temp = df_temp[output_cols]
df_temp = df_temp.rename(dict(zip(output_cols, ['d0_'+x for x in output_cols])), axis='columns')
df_temp.to_excel("plot_fig/DART_induced_emt.xls")

df_temp = df_[df_['induce_type'] == 'pre']
df_temp = df_temp[output_cols]
df_temp = df_temp.rename(dict(zip(output_cols, ['pre_'+x for x in output_cols])), axis='columns')
df_temp.to_excel("plot_fig/DART_mesenchymal.xls")

## Output data to excel files to plot DARTs

In [None]:
df_ = df
df_ = df_[df_['is_disp_checked']==1]


pull_cols = ['pull__'+str(i+1) for i in range(16)]
push_cols = ['push__'+str(i+1) for i in range(16)]
shear_cols = ['push__'+str(i+1) for i in range(16)]

def pull_push(m, n):
    return (df_[m]>0) +2*(df_[n]>0)


name_list = ['dart_'+str(i+1) for i in range(16)]
for m, n, name in zip(pull_cols, push_cols, name_list):
    df_[name] = pull_push(m, n)

output_cols = ['well', 'cell', 'drug', 'induce_type'] + name_list + shear_cols
df_[output_cols].to_excel("plot_dart04.xlsx")

# Classifier

## Load packages

In [None]:
import numpy as np
import matplotlib.pyplot as plt

from sklearn import svm, datasets
from sklearn.model_selection import train_test_split
from sklearn.metrics import confusion_matrix
from sklearn.utils.multiclass import unique_labels
from sklearn import preprocessing
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import accuracy_score

## Function to plot confusion matrix

In [None]:
def plot_confusion_matrix(y_true, y_pred, classes,
                          normalize=False,
                          title=None,
                          cmap=plt.cm.Blues):
    """
    This function prints and plots the confusion matrix.
    Normalization can be applied by setting `normalize=True`.
    """
    if not title:
        if normalize:
            title = 'Normalized confusion matrix'
        else:
            title = 'Confusion matrix, without normalization'

    # Compute confusion matrix
    cm = confusion_matrix(y_true, y_pred)
    # Only use the labels that appear in the data
#     classes = classes[unique_labels(y_true, y_pred)]
    if normalize:
        cm = cm.astype('float') / cm.sum(axis=1)[:, np.newaxis]
        print("Normalized confusion matrix")
    else:
        print('Confusion matrix, without normalization')

    print(cm)

    fig, ax = plt.subplots(figsize=(4.1,3.5))
    im = ax.imshow(cm, interpolation='nearest', cmap=cmap, vmin=0, vmax=1)
    ax.figure.colorbar(im, ax=ax)
    # We want to show all ticks...
    ax.set(xticks=np.arange(cm.shape[1]),
           yticks=np.arange(cm.shape[0]),
           # ... and label them with the respective list entries
           xticklabels=classes, yticklabels=classes,
           title=title,
           ylabel='True label',
           xlabel='Predicted label')

    # Rotate the tick labels and set their alignment.
    plt.setp(ax.get_xticklabels(), rotation=45, ha="right",
             rotation_mode="anchor")

    # Loop over data dimensions and create text annotations.
    fmt = '.2f' if normalize else 'd'
    thresh = cm.max() / 2.
    for i in range(cm.shape[0]):
        for j in range(cm.shape[1]):
            ax.text(j, i, format(cm[i, j], fmt),
                    ha="center", va="center",
                    color="white" if cm[i, j] > thresh else "black")
    fig.tight_layout()
    sns.set_context("talk")
    sns.despine();
    return ax, fig

## Prepare training/test data

In [None]:
train_drug = 'dmso'
test_drug = drug

cols = ['pull_slices', 'push_slices', 'shear_slices', 'shape_aniso']

X_train = df[df['drug']==train_drug][cols]
Y_train = df[df['drug']==train_drug]['induce_type']

#label encoder
le = preprocessing.LabelEncoder()
Y_train = le.fit_transform(Y_train)

X_test = df[df['drug']==test_drug][cols]
Y_test = df[df['drug']==test_drug]['induce_type']
Y_test = le.transform(Y_test)

le.classes_

## Decision tree with all the features

In [None]:
clf = DecisionTreeClassifier(max_depth=5, min_samples_split=5, min_samples_leaf=5)
clf.fit(X_train, Y_train)
Y_pred = clf.predict(X_test)

classes = ['E', 'T', 'M']
d = {0:1, 1:2, 2:0}
Y_test_ = np.array([d[x] for x in Y_test])
Y_pred_ = np.array([d[x] for x in Y_pred])

# ax, f = plot_confusion_matrix(Y_test, Y_pred, classes=le.classes_, normalize=True,
#                       title='Decision tree, Confusion matrix')
ax, f = plot_confusion_matrix(Y_test_, Y_pred_, classes=classes, normalize=True,
                      title=' ')
f.savefig("plot_fig/decision_tree.pdf", bbox_inches='tight')

In [None]:
accuracy_score(Y_test_, Y_pred_)

## Hand tuned classifer

In [None]:
def hand_classifier(df):
    
    def helper(row):
        if (row['pull_slices']<13) and (row['push_slices']<=3):
            return 1
        elif (row['pull_slices']>=13) and (row['push_slices']<=6):
            return 0
        else:
            return 2
    
    return df.apply(helper, axis=1)
Y_pred = hand_classifier(X_test)

classes = ['E', 'T', 'M']
d = {0:1, 1:2, 2:0}
Y_test_ = np.array([d[x] for x in Y_test])
Y_pred_ = np.array([d[x] for x in Y_pred])

# ax, f = plot_confusion_matrix(Y_test_, Y_pred_, classes=le.classes_, normalize=True,
#                       title='Hand-tuned classifier, confusion matrix')
ax, f = plot_confusion_matrix(Y_test_, Y_pred_, classes=classes, normalize=True,
                      title=' ')
f.savefig("plot_fig/hand.pdf", bbox_inches='tight')

In [None]:
accuracy_score(Y_test_, Y_pred_)