# Exploring Variance in Measurement Between Data Sets

In [1]:
import numpy as np
import pandas as pd 
import altair as alt
import prot.viz
colors = prot.viz.plotting_style()
prot.viz.altair_theme()


In this notebook, I will compare and contrast the variance in observed copy numbers across the different data sets to assess a measure of uncertainty in the direct protein abundance measurements. 

In [30]:
# Load the compiled complex number data sets and restrict to moderate growth rates. 
data = pd.read_csv('../../data/compiled_annotated_complexes.csv', comment='#')
glucose = data[(data['growth_rate_hr'] >= 0.45) & (data['growth_rate_hr'] <= 0.55)]
# Group by complex and compute mean for each dataset.
glucose = glucose.groupby(['complex', 'dataset', 'complex_annotation'])['n_units'].mean().reset_index()

# Summarize
glucose = glucose.groupby(['complex_annotation'])['n_units'].agg(('mean', 'var')).reset_index()
glucose.dropna(inplace=True)
glucose['noise'] = glucose['var'].values / glucose['mean'].values

In [31]:
chart = alt.Chart(glucose)
point = chart.mark_point(size=10).encode(
        x=alt.X('mean:Q',  scale={'type':'log'}),
        y=alt.Y('noise:Q', scale={'type': 'log'}))
point

<VegaLite 3 object>

If you see this message, it means the renderer has not been properly enabled
for the frontend that you are using. For more information, see
https://altair-viz.github.io/user_guide/troubleshooting.html


In [17]:
glucose.head()

Unnamed: 0,complex_annotation,mean,var
4,&alpha;-ketoglutarate:H+ symporter,203.628955,1949.179002
5,&alpha;2-macroglobulin,37.22341,330.299
7,&beta; sliding clamp,234.21848,12431.16499
8,"&beta;-1,6-galactofuranosyltransferase WbbI",567.138646,52692.867296
9,"&beta;-D-glucoside glucohydrolase, periplasmic",216.717565,12721.086701


In [15]:
glucose

Unnamed: 0,complex_annotation,mean,var
0,&alpha;-D-glucose-1-phosphate phosphatase YihX,186.346878,
1,&alpha;-D-xyloside xylohydrolase,1.773231,
2,&alpha;-dehydro-&beta;-deoxy-<i>D</i>-glucarat...,0.968906,
3,&alpha;-galactosidase,6.910469,
4,&alpha;-ketoglutarate:H+ symporter,203.628955,1949.179002
...,...,...,...
1949,zinc finger domain-containing protein YbiI,15.556368,
1950,zinc ribbon domain-containing protein YfgJ,25.652723,
1951,zinc ribbon domain-containing protein YjdM,523.060686,217469.126280
1952,zinc-binding GTPase YeiR,26.970511,
