In [None]:
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
from ast import literal_eval

In [None]:
all_data = pd.read_csv('models/ultimate_output_all_data.csv', index_col=0)

all_data.head(10)

In [None]:
all_data.data_type.unique()

In [None]:
# don't look at binary 
binary_data = all_data.loc[all_data.model.isin(['gbt_binary', 'SVC'])].copy()

all_data = all_data.loc[~all_data.model.isin(['gbt_binary', 'SVC'])]


In [None]:
sort_index = all_data.groupby('drug')['r2'].mean()
sort_index.sort_values(inplace=True)
sort_index = sort_index.index.values
sort_index 

In [None]:
fig = plt.figure(figsize=(6, 12))
ax = fig.add_subplot(111)
# ax = sns.swarmplot(
#     data=all_data,
#     x="pearsonr",
#     y="drug", 
#     hue="data_type",
#     ax=ax, 
#     order=sort_index
# )
ax = sns.boxenplot(
    data=all_data,
    x="pearsonr",
    y="drug", 
#     hue="data_type",
    ax=ax, 
    order=sort_index
)
# Put the legend out of the figure
# plt.legend(bbox_to_anchor=(1.05, 1), loc=2, borderaxespad=0.);
plt.savefig("all_model_pearson_by_drug.png", dpi=300, bbox_inches='tight')

In [None]:
fig = plt.figure(figsize=(4, 8))
ax = fig.add_subplot(111)
ax = sns.boxplot(
    data=all_data,
    x="pearsonr",
    y="data_type", 
    hue="model",
    ax=ax, 
#     order=sort_index[-1:]
)
# Put the legend out of the figure
plt.legend(bbox_to_anchor=(1.05, 1), loc=2, borderaxespad=0.);
plt.savefig("model_vs_dtype.png", dpi=300, bbox_inches='tight')

In [None]:
fig = plt.figure(figsize=(4, 6))
ax = fig.add_subplot(111)
ax = sns.boxplot(
    data=all_data.loc[all_data.drug.isin(sort_index[-1:])],
    x="pearsonr",
    y="drug", 
    hue="model",
    ax=ax, 
    order=sort_index[-1:]
)
# Put the legend out of the figure
plt.legend(bbox_to_anchor=(1.05, 1), loc=2, borderaxespad=0.);
plt.savefig("venetoclax_pearson_by_model.png", dpi=300, bbox_inches='tight')

In [None]:
fig = plt.figure(figsize=(4, 6))
ax = fig.add_subplot(111)
ax = sns.violinplot(
    data=all_data.loc[all_data.drug.isin(sort_index[-1:])],
    x="pearsonr",
    y="drug", 
    hue="data_type",
    ax=ax, 
    order=sort_index[-1:]
)
# Put the legend out of the figure
plt.legend(bbox_to_anchor=(1.05, 1), loc=2, borderaxespad=0.);
plt.savefig("venetoclax_pearson_by_data.png", dpi=300, bbox_inches='tight')

In [None]:
from collections import Counter
from functools import reduce

def get_feature_counts(features):
    c = Counter()
    for k in features:
        current_features = Counter(k.split('|'))
        c += current_features
    return c
c=0
all_counts = []
for i, d in all_data.loc[all_data.drug.isin(sort_index[-1:])].groupby(['data_type', 'model']):

    counts = pd.Series(get_feature_counts(d.feature_names))
    counts = counts.to_frame(name='_'.join(i))
#     counts.reset_index(inplace=True)
#     counts.rename({'index': 'feature'}, axis=1, inplace=True)
#     counts['feature'] = counts['index']
#     del counts['index']
#     print(counts)
    
    
    all_counts.append(counts)
#     if c ==2:
#         break
#     c+=1
axis=1
all_counts = pd.concat(all_counts, axis=1)
all_counts.fillna(0, inplace=True)
# all_counts = reduce(lambda x, y: pd.merge(x, y, on = 'feature', ), all_counts)
all_counts.head()
all_counts.sort_values(by='phospho_EN')

In [None]:
all_counts.sum(axis=1).sort_values(ascending=False).head(20)

In [None]:
sns.clustermap(all_counts, col_cluster=True, row_cluster=True);
plt.savefig("features_all.png", dpi=300, bbox_inches='tight')