## Prepare the environment

First we'll configure IPython to add matplotlib plots inline. Then we'll import various functions that we'll need for generating the report. 

In [1]:
%matplotlib inline

from os.path import join, exists, expandvars
import pandas as pd
import numpy as np
from seaborn import heatmap
from seaborn import clustermap
import matplotlib.pyplot as plt
from taxcompare.eval_framework import (get_expected_tables_lookup, 
                                       find_and_process_result_tables,
                                       compute_mock_results,
                                       compute_mantel,
                                       generate_pr_scatter_plots,
                                       boxplot_from_data_frame,
                                       heatmap_from_data_frame,
                                       method_by_dataset_a1,
                                       method_by_dataset_a2,
                                       performance_rank_comparisons,
                                       parameter_comparisons)

## Configure local environment-specific values

**This is the only cell that you will need to edit to generate reports locally.**

In [2]:
## project_dir should be the directory where you've downloaded (or cloned) the 
## short-read-tax-assignment repository. 
project_dir = expandvars("$HOME/data/short-read-tax-assignment")

precomputed_results_dir = expandvars("$HOME/data/short-read-tax-assignment/data/precomputed-results/")
expected_results_dir = join(precomputed_results_dir, "simulated-community")

## results_dirs should contain the directory or directories where
## results can be found. By default, this is just the precomputed 
## results included with the project. If other results should be included, 
## absolute paths to those directories should be added to this list.
results_dirs = \
 [precomputed_results_dir,
  ]

## Taxonomic level at which analyses should be performed. Edit this to
## the desired taxonomic level. 
# 2: phylum, 3: class, 4: order, 5: family, 6: genus, 7: species
taxonomic_level = 6

## Minimum number of times an OTU must be observed for it to be included in analyses. Edit this
## to analyze the effect of the minimum count on taxonomic results.
min_count = 1

# set to true if select tables should be written as Excel files (useful for publication)
write_xls_files = True

taxonomy_level_names = ['Kingdom', 'Phylum', 'Class', 'Order', 'Family', 'Genus', 'Species']
bacterial_reference_taxonomy_fp = expandvars("$HOME/data/gg_13_8_otus/taxonomy/97_otu_taxonomy.txt")
bacterial_reference_taxa = set()
for line in open(bacterial_reference_taxonomy_fp, 'U'):
    fields = line.strip().split('\t')
    bacterial_reference_taxa.add(tuple(fields[1].split('; ')[:taxonomic_level]))

In [3]:
# Uncomment for test runs (looks at a small subset of the data)

# bacterial_reference_taxa = set(list(bacterial_reference_taxa)[:25])

In [4]:
# Define the subdirectories where the data should be, and confirm that they exist.
simulated_results_dirs = [join(results_dir,"simulated-community") for results_dir in results_dirs]

for simulated_results_dir in simulated_results_dirs:
    assert exists(simulated_results_dir), "Simulated community result directory doesn't exist: %s" % simulated_results_dir

## Find simulated community pre-computed tables, expected tables, and "query" tables

Next we'll use the paths defined above to find all of the tables that will be compared. These include the *pre-computed result* tables (i.e., the ones that the new methods will be compared to), the *expected result* tables (i.e., the tables containing the known composition of the mock microbial communities), and the *query result* tables (i.e., the tables generated with the new method(s) that we want to compare to the *pre-computed result* tables).

In [5]:
results = []
for simulated_results_dir in simulated_results_dirs:
    results += find_and_process_result_tables(simulated_results_dir)

In [6]:
expected_tables = get_expected_tables_lookup(expected_results_dir, level=taxonomic_level)

In [7]:
# Filter to only tables that we're interested in here. These are the tables corresponding
# to the best performing methods.
tables_of_interest = set([('B1-iter0', 'gg_13_8_otus', 'sortmerna', '0.51:0.8:1:0.8:0.001:full-ref'),
                          ('B1-iter0', 'gg_13_8_otus', 'rdp', '0.5:full-ref'),
                          ('B1-iter0', 'gg_13_8_otus', 'uclust', '0.51:0.8:1:full-ref'),
                          ('B1-iter0', 'gg_13_8_otus', 'blast', '0.001:full-ref'),
                          ])

_results = []
for e in results:
    if e[0:4] in tables_of_interest:
        _results.append(e)
results = _results

In [8]:
results

[('B1-iter0',
  'gg_13_8_otus',
  'blast',
  '0.001:full-ref',
  '/home/ubuntu/data/short-read-tax-assignment/data/precomputed-results/simulated-community/B1-iter0/gg_13_8_otus/blast/0.001:full-ref/table.biom'),
 ('B1-iter0',
  'gg_13_8_otus',
  'rdp',
  '0.5:full-ref',
  '/home/ubuntu/data/short-read-tax-assignment/data/precomputed-results/simulated-community/B1-iter0/gg_13_8_otus/rdp/0.5:full-ref/table.biom'),
 ('B1-iter0',
  'gg_13_8_otus',
  'uclust',
  '0.51:0.8:1:full-ref',
  '/home/ubuntu/data/short-read-tax-assignment/data/precomputed-results/simulated-community/B1-iter0/gg_13_8_otus/uclust/0.51:0.8:1:full-ref/table.biom')]

Evalution 1: Compute and summarize precision, recall, and F-measure for mock communities
----------------------------------------------------------------------------------------

In this evaluation, we compute and summarize precision, recall, and F-measure of each result (pre-computed and query) based on the known composition of the mock communities. We then summarize the results in two ways: first with boxplots, and second with a table of the top methods based on their F-measures. 

This is a qualitative evaluation, effectively telling us about the ability of the different methods to report the taxa that are present in each sample. These metrics are not concerned with the abundance of the different taxa.

In [None]:
taxa_of_interest = bacterial_reference_taxa

taxa_specific_results = []

for taxon_of_interest in taxa_of_interest:
    try:
        query_mock_results = compute_mock_results(results, expected_tables, taxonomy_level=taxonomic_level,
                                      min_count=min_count, taxa_to_keep=taxon_of_interest)
    except ValueError:
        continue
    taxa_specific_results.append(query_mock_results)
        
        
    

Missing taxonomic information in table /home/ubuntu/data/short-read-tax-assignment/data/precomputed-results/simulated-community/B1-iter0/gg_13_8_otus/uclust/0.51:0.8:1:full-ref/table.biom, skipping.
Missing taxonomic information in table /home/ubuntu/data/short-read-tax-assignment/data/precomputed-results/simulated-community/B1-iter0/gg_13_8_otus/uclust/0.51:0.8:1:full-ref/table.biom, skipping.

In [10]:
indices = []
data = []
for taxon, result in zip(bacterial_reference_taxa, taxa_specific_results):
    values = result['F-measure']
    methods = result['Method']
    indices.append(list(taxon))
    data.append(list(values))
index = pd.MultiIndex.from_tuples(indices, names=taxonomy_level_names[:taxonomic_level])
metric_by_taxon = pd.DataFrame(np.array(data), columns=list(methods), index=index)
metric_by_taxon.sort_index(axis=0, inplace=True)

# testing...
# metric_by_taxon['rdp']['k__Archaea', 'p__Nanoarchaeota', 'c__[Nanoarchaeoti]'] = 1.0
# metric_by_taxon['uclust']['k__Archaea', 'p__Nanoarchaeota', 'c__[Nanoarchaeoti]'] = 1.0
# metric_by_taxon['sortmerna']['k__Archaea', 'p__Nanoarchaeota', 'c__[Nanoarchaeoti]'] = 1.0

TypeError: Cannot infer number of levels from empty list

In [11]:
metric_by_taxon

NameError: name 'metric_by_taxon' is not defined

Are method performances correlated? In other words, do all methods do well/bad on the same taxa?

In [None]:
print metric_by_taxon.corr()

What is the performance (F-measure, here) by taxa? 

In [None]:
def collpase_zero_variance_taxa(df):

    index_names = df.index.names
    column_names = df.columns
    result = df.copy()
    for i in range(len(index_names)):
        new_indices = []
        new_data = []
        for d, g in result.groupby(level=range(i+1)):
            if len(g) == 1:
                new_indices.append(g.index[0])
                new_data.append(g.get_values()[0])
            elif not g.var().any():
                if isinstance(d, tuple):
                    n = list(d)
                else:
                    n = [d]
                n += ["All"] * (len(index_names) - len(n))
                new_indices.append(n)
                new_data.append(g.get_values()[0])
            else:
                new_indices.extend(g.index)
                new_data.extend(g.get_values())
        new_index = pd.MultiIndex.from_tuples(new_indices, names=index_names)
        result = pd.DataFrame(new_data, columns=column_names, index=new_index)
    return result

metric_by_collapsed_taxon = collpase_zero_variance_taxa(metric_by_taxon)
metric_by_collapsed_taxon

Grouped by taxonomy

In [None]:
height = len(metric_by_collapsed_taxon.index) * 0.35
width = len(metric_by_collapsed_taxon.columns) * 1

metric_by_collapsed_taxon.sort_index(inplace=True)

# Based on SO answer: http://stackoverflow.com/a/18238680
fig = plt.figure(figsize=(width, height))
heatmap(metric_by_collapsed_taxon, vmin=0, vmax=1, cmap='Reds', annot=True)

Grouped by similarity in performance of taxonomic classifiers

In [None]:
#x = metric_by_collapsed_taxon[(metric_by_collapsed_taxon.mean(axis=1) < 1) & (metric_by_collapsed_taxon.mean(axis=1) > -1)]
height = len(metric_by_collapsed_taxon.index) * 0.35
width = len(metric_by_collapsed_taxon.columns) * 1
#fig = plt.figure(figsize=(width, height))
clustermap(metric_by_collapsed_taxon, vmin=0, vmax=1, cmap='Reds', figsize=(width, height), col_cluster=False)

In [None]:
for e in metric_by_collapsed_taxon.index:
    print ' '.join(e)