In [8]:
## analyze metadata output from R script

## load packages
import numpy as np
import pandas as pd
from plotnine import *
from plotnine.data import *

ggplot_classic = theme(panel_background = element_rect(fill='white'), panel_border=element_rect(color='black', size=1), panel_grid_major = element_blank(), panel_grid_minor = element_blank(), legend_box = element_rect(fill="white", size = 0.5)) 


In [2]:
# read in metadata
metadata = pd.read_csv('miRFPenrich_metadat_09-18-17.csv')

## proportions of miRFPpos are not strictly equal between cells; take the mean value of these and make that the 'actual_mean_prop_miRFp column'
mean_vals = metadata.groupby(['activated', 'expected.prop.miRFP']).apply(lambda x: pd.Series(np.mean(x.prop_miRFPpos), index = ['actual_mean_miRFP'])).reset_index()

## merge the two dataframes
metadata = metadata.merge(mean_vals, on = ['activated', 'expected.prop.miRFP'])

## melt to get all of the statistics on a single plot
metadata = pd.melt(metadata, id_vars = ['sample', 'activated', 'actual_mean_miRFP'], value_vars = ['AUC_values', 'spec', 'sens'])

## filter for activated cells only
metadata = metadata.loc[metadata.activated == True, :]

## rename some values
metadata.replace(to_replace = {'AUC_values':'AUC', 'sens': 'sensitivity', 'spec':'specificity'}, inplace = True)

metadata.iloc[:10,:]

Unnamed: 0,sample,activated,actual_mean_miRFP,variable,value
0,U2OS activated_B01_008,True,0.12735,AUC,0.936239
1,U2OS activated_B05_013,True,0.12735,AUC,0.912663
2,U2OS activated_C04_018,True,0.12735,AUC,0.919494
3,U2OS activated_B02_009,True,0.5488,AUC,0.922089
4,U2OS activated_B06_014,True,0.5488,AUC,0.906555
5,U2OS activated_C03_017,True,0.5488,AUC,0.889521
6,U2OS activated_B03_010,True,0.014947,AUC,0.934733
7,U2OS activated_B03_011,True,0.014947,AUC,0.951691
8,U2OS activated_C02_016,True,0.014947,AUC,0.986587
9,U2OS activated_C06_020,True,0.014947,AUC,0.947351


In [3]:
## second dataframe to get the mean value of the three results
summary_stats = metadata.groupby(['activated', 'actual_mean_miRFP', 'variable']).apply(lambda x: pd.Series([np.mean(x.value)], index = ['mean_val'])).reset_index()

## remove unactivated samples
summary_stats = summary_stats.loc[summary_stats.activated == True, :]

summary_stats

Unnamed: 0,activated,actual_mean_miRFP,variable,mean_val
0,True,0.014947,AUC,0.955091
1,True,0.014947,sensitivity,0.755003
2,True,0.014947,specificity,0.992848
3,True,0.038002,AUC,0.930926
4,True,0.038002,sensitivity,0.814704
5,True,0.038002,specificity,0.992861
6,True,0.12735,AUC,0.922799
7,True,0.12735,sensitivity,0.816193
8,True,0.12735,specificity,0.990305
9,True,0.5488,AUC,0.906055


In [9]:
## plot
p1 = ggplot(aes(x = 'actual_mean_miRFP', y = 'value', color = 'variable'), data = metadata) + stat_summary(fun_data = 'mean_sdl', size = 1) + geom_line(aes(y = 'mean_val'), data = summary_stats) + geom_point(alpha = 0.5, color = 'black') + scale_color_manual(['#d7191c','#fdae61','#2c7bb6']) 

## make some thematic changes
p_final = p1 + ggplot_classic + scale_x_continuous(name = 'Percent miRFP-Positive', breaks = np.arange(0,0.7,0.1), labels = ['{0}{1}'.format(int(label*100) , '%') for label in np.arange(0,0.7,0.1)], limits = [0,0.7]) + scale_y_continuous(name = '') + theme(axis_text_x=element_text(rotation=45, hjust=1))
p_final

ggsave(p_final, 'Fig1D_metadata_final.png', width = 3, height = 2, dpi = 600)

  from_inches(height, units), units))
  warn('Filename: {}'.format(filename))


In [5]:
['{0}{1}'.format(int(label*100) , '%') for label in np.arange(0,0.6,0.1)]

['0%', '10%', '20%', '30%', '40%', '50%']