In [None]:
import sys
sys.path.append("..")

from scipy import stats
from analysis.Analysis_Set import Analysis_Set
from workspace import Database as db
from workspace import Workspace as ws
from analysis.Sequence_Library import Sequence_Library

from analysis import statistics as virus_stats
from analysis import amino_acids as AA_analysis
from analysis import plotting as virus_plotting

import pandas
from plotly import offline as plotly_offline
plotly_offline.init_notebook_mode(connected=True)

In [None]:
EXPERIMENT_NAME = "20171220_452_458_Substitution_Panel"
SAMPLE_NAME = "REB_Starting_Virus"
COUNT_THRESHOLD = 300

In [None]:
ws.set_experiment(EXPERIMENT_NAME)
ws.set_active_alignment(db.get_alignments()[0])

In [None]:
sample = db.get_library(SAMPLE_NAME)

In [None]:
sequence_reads = Sequence_Library(sample)
sequence_counts = sequence_reads.get_sequence_counts(by_amino_acid=True, count_threshold=COUNT_THRESHOLD, filter_invalid=True)
sequence_counts = pandas.DataFrame.from_dict(sequence_counts, orient="index")

In [None]:
amino_acid_counts_by_position = AA_analysis.get_amino_acid_counts_by_position(sequence_counts.index)
amino_acid_biases = AA_analysis.get_amino_acid_codon_biases(["NNKNNKNNKNNKNNKNNKNNK"])

In [None]:
p_values, z_scores = virus_stats.get_significance_of_amino_acid_ratios(
    amino_acid_counts_by_position,
    amino_acid_biases,
    multiple_comparison_correction=True,
    test_type=virus_stats.Test_Type.STANDARDIZATION)
virus_plotting.plot_significance_z_scores(z_scores.drop("#"), interactive=True)

In [None]:
p_values, z_scores = virus_stats.get_significance_of_amino_acid_ratios(
    amino_acid_counts_by_position,
    amino_acid_biases,
    test_type=virus_stats.Test_Type.BINOMIAL_NORMAL_APPROXIMATION)
virus_plotting.plot_significance_z_scores(z_scores.drop("#"), interactive=True)