# Summarized _ex vivos_ 
Here we will take the results of SCFA prediction from all four _ex vivo_ datasets, Z-score them and compare them directly. 

In [None]:
import pandas as pd
import numpy as np
import scipy

from plotnine import *
%matplotlib inline

## Collect and Normalize Data
First we'll collect the prediction data for all of our datasets, normalize it by Z-scoring results from each study, and combining the results into one dataframe

In [None]:
studyA = pd.read_csv('../results/studyA.csv')
studyA['study'] = 'StudyA'
studyB = pd.read_csv('../results/studyB.csv')
studyB['study'] = 'StudyB'
studyC = pd.read_csv('../results/studyC.csv').dropna()
studyC['study'] = 'StudyC'
studyD = pd.read_csv('../results/studyD.csv')
studyD['study'] = 'StudyD'
studies = [studyA, studyB, studyC, studyD]
toZscore = ['acetatePredicted','acetateMeasured',
               'butyratePredicted','butyrateMeasured',
               'propionatePredicted', 'propionateMeasured']
for study in studies:  
    toZscoreTemp = [column for column in study.columns if column in toZscore]
    for col in toZscoreTemp:
        study[col] = scipy.stats.zscore(study[col])
scfas = pd.concat([studyA, studyB, studyC, studyD])

scfas['treatment'] = scfas['treatment'].str.capitalize()
scfas['treatment'] = scfas['treatment'].str.replace('Fos','FOS')
scfas

## Reformat Data
Format the data into long form for plotting

In [None]:
res = pd.melt(scfas,
                id_vars = ['treatment','study', 'donor'],
                value_vars = ['acetatePredicted','acetateMeasured',
               'butyratePredicted','butyrateMeasured',
               'propionatePredicted', 'propionateMeasured'],
                value_name = 'Z-Score', 
                var_name = 'analyte')
res['source'] = res['analyte'].str.split("([A-Z][^A-Z]*)").str[1:]
res['source'] =res['source'].apply(lambda x: ''.join(x))

res['analyte'] = res['analyte'].str.split("([A-Z][^A-Z]*)").str[0]
res = pd.pivot_table(res,
                    index = ['treatment','study','donor','analyte'],
                     columns = 'source',
                     values = 'Z-Score').reset_index()
res['analyte'] = res['analyte'].str.capitalize()
res['study'] = (res['study'].str.replace('StudyA','Study A (1:5)')
                .str.replace('StudyB','Study B (1:19)')
                .str.replace('StudyC', 'Study C (1:5)')
                .str.replace('StudyD','Study D (1:3)'))
res.sort_values(by = 'analyte', ascending = False)
res

## Facet Plot Z-Scored Data
Make a facet plot of z-scored SCFA prediction data, spanning all four datasets (Fig. 3)

In [None]:
fig1 = (ggplot(res, aes(x = 'Measured', y = 'Predicted'))
    +geom_smooth(method = 'lm', linetype = '--')
    +geom_point(aes(color = 'treatment'), size = 6)
    +scale_color_manual(limits = ['Control','Pectin', 'Inulin', 'FOS'], 
                        values = ['cornflowerblue', 'mediumseagreen', 'coral', 'purple'])
    +labs(title='',
          x='Z-Score(measured, mmol/L/h)',
          y = 'Z-Score(predicted, mmol/gDW/h)',
          color = 'Treatment',fill = 'Treatment')
    +facet_grid(['analyte','study'], scales= 'free')
    +theme(figure_size = (12, 16),
        text = element_text(size=20, color = 'black'),panel_background=element_rect(fill = "white",
                                    colour = "white",size = 0.5, linetype = "solid"),panel_grid= element_blank(),
                                    axis_line = element_line(size = 2, linetype = "solid",colour = "black"),
                                    legend_position='right',axis_text_x=element_text(rotation = 45, hjust = 1.5))


)
fig1

## Summarized Ex Vivo Plots
Make a plot of predictions across all four datasets summarized together. (Fig. 2B)

In [None]:
fig2 = (ggplot(res, aes(x = 'Measured', y = 'Predicted'))
    +geom_smooth(method = 'lm', linetype = '--')
    +geom_point(aes(color = 'study'), size = 6)
    +scale_color_manual(limits = ['Study A (1:5)','Study B (1:19)', 'Study C (1:5)', 'Study D (1:3)'], 
                        values = ['#3B429F', '#AA7DCE', '#F4A5AE', '#A8577E'])
    +labs(title='',
          x='Z-Score(measured, mmol/L/h)',
          y = 'Z-Score(predicted, mmol/gDW/h)',
          color = 'Study (Dilution)',fill = 'Study (Dilution)')
    +facet_wrap('analyte', scales= 'free')
    +theme(figure_size = (12, 6),
        text = element_text(size=20, color = 'black'),panel_background=element_rect(fill = "white",
                                    colour = "white",size = 0.5, linetype = "solid"),panel_grid= element_blank(),
                                    axis_line = element_line(size = 2, linetype = "solid",colour = "black"),
                                    legend_position='right',axis_text_x=element_text(rotation = 20, hjust = 1))


)
fig2

In [None]:
scipy.stats.pearsonr(res[res['analyte']=='Acetate']['Measured'],
                     res[res['analyte']=='Acetate']['Predicted'])