In [1]:
import pandas as pd
from pathlib import Path
from tnseq2.src.analysis import *
from tnseq2.src.method2_analysis import *
import numpy as np
from scipy.stats import ranksums
import matplotlib.pyplot as plt
import chart_studio
import chart_studio.tools as tls
import chart_studio.plotly as py
import plotly.express as px
%matplotlib inline
import cufflinks as cf
from plotly.offline import download_plotlyjs, init_notebook_mode, plot, iplot
init_notebook_mode(connected=True)
cf.go_offline()
import dash_bio as dashbio

# Table of Contents: <a id='start'></a>

1. [Loading the data](#loading-data)
2. [Method 1](#Method-1)
3. [Method 2](#Method-2)
4. [Compare the results](#Compare)

## Setup: <a id='loading-data'></a>

- Loading the example dataset. 

- `lib10_cnt` dataframe contains raw counts and meta info
- Identify `good_samples` using `calculate_correlation`

In [2]:
counts_dir ="/Users/ansintsova/git_repos/nguyenb_tnseq/data/13_04_results/counts"
outdir = '/Users/ansintsova/git_repos/nguyenb_tnseq/data/01_06'
control_file = Path("/Users/ansintsova/git_repos/nguyenb_tnseq/data/13_04_results")/'controls.txt'
dnaids = ['dnaid1315', 'dnaid1428', 'dnaid1429', 'dnaid2015', 'dnaid2016', 'dnaid2017', 'dnaid2018', 'dnaid2019',
         'dnaid2023', 'dnaid2024', 'dnaid2025', 'dnaid2026', 'dnaid2027', 'dnaid2028', 'dnaid2029' ]
cnt_df = load_files(dnaids, Path(counts_dir))
lib10_cnt = cnt_df[cnt_df.library == 'library_10_2'].copy()
lib10_cnt['sampleIDExp'] = lib10_cnt['sampleID'] + "_"+ lib10_cnt['dnaid'] + "_" + lib10_cnt['experiment']
annotation_df = cnt_df[['barcode', 'ShortName', 'locus_tag', 'phenotype', 'conc']].drop_duplicates()

corr_df, good_samples = calculate_correlation(lib10_cnt, control_file, for_each='sampleIDExp')
good_samples = list(good_samples)
good_samples.remove('unenriched_inoculum_d0_dnaid2017_TV4592A')
lib10_cnt[['barcode', 'cnt', 'ShortName', 'sampleIDExp']].head()

Unnamed: 0,barcode,cnt,ShortName,sampleIDExp
7749,AAAAAAACTGTAGTGCA,5233.0,SL1344_1940,ad927_d1_dnaid2017_TV4592A
7750,AAAAAACGTCCGCGATG,2828.0,lpfD,ad927_d1_dnaid2017_TV4592A
7751,AAAAACATGCCACACTG,1684.0,AAAAACATGCCACACTG,ad927_d1_dnaid2017_TV4592A
7752,AAAAACGCCGTTAAACT,2004.0,SL1344_1068,ad927_d1_dnaid2017_TV4592A
7753,AAAAACTTTATACTTGA,2451.0,SL1344_0033,ad927_d1_dnaid2017_TV4592A


In [3]:
n_samples = collections.Counter([si.split("_")[1] for si in good_samples])
print(f"Number of independent experiments: {lib10_cnt.experiment.nunique()}")
print(f"Number of Samples per day:")
for day in sorted(n_samples.keys()):
    print(f'{day}: {n_samples[day]} samples')

Number of independent experiments: 3
Number of Samples per day:
d0: 5 samples
d1: 14 samples
d2: 9 samples
d3: 7 samples
d4: 6 samples


# Method 1:  <a id='Method-1'></a>

0. For each sample, correlation between WITS dilution and counts is calculated, samples with correlations < 0.8 are discarded. 

1. Raw barcode counts are filtered for abundance. [Why?](#method-1-filter)

2. Filtered counts are input into DESeq2. [What does the output look like?](#method-1-deseq-output)

3. For each **gene** calculate mean fitness (on log2 scale) and a Z-score based on log2FoldChanges and lfcSE calculated by DESeq2. [How is Z-score calculated?](#method-1-z-score)

4. Calculate p-value associated with each Z-score and perform multiple test adjustment. [How is p value calculated?](#method-1-p-value)

### [How to interpret Z-score?](#method-1-z-score-interpret)


In [6]:
method1_fitness, method1_results = analyze_library(lib10_cnt, sample_id="sampleIDExp", 
                          good_samples=good_samples, 
                          dnaid='library10', experiment='2', 
                          control_file=control_file, 
                          to_filter=1000, outdir=outdir)

#method1_fitness.to_csv(Path("/Users/ansintsova/git_repos/avocado/data")/"method1_fitness.csv")
#method1_results.to_csv(Path("/Users/ansintsova/git_repos/avocado/data")/"method1_results.csv")

Counter({'d1': 14, 'd2': 9, 'd3': 7, 'd4': 6})
Filtering Dataset
Running DESeq2
Calculating z-scores
  day  cntrl_FC  cntrl_FC_median  cntrl_sigma
0  d1  1.672239         1.592678     0.264137
1  d2  0.229080         0.222374     0.282213
2  d3  0.240053         0.290581     0.299270
3  d4  0.347708         0.439323     0.314600
Summarizing


In [14]:
method1_results.sample(5, random_state =42)

Unnamed: 0_level_0,locus,num_barcodes,library,barcode,sstart,sseqid,d1_num_samples,d1_fitness_mean,d1_fitness_std,d1_ci,...,d3_zscore,d3_pval,d3_padj,d4_num_samples,d4_fitness_mean,d4_fitness_std,d4_ci,d4_zscore,d4_pval,d4_padj
gene,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
hsdM,SL1344_4456,1.0,library_10_2,CCACTAACCAGGCATCT,4801146.0,FQ312003.1,14.0,-0.07765,,0.596144,...,0.12628,0.899511,0.968357,6.0,0.147017,,1.163749,0.357503,0.720715,0.917358
aadA,SL1344_1199,1.0,library_10_2,AAACCAGCTGTGGCATT,1304228.0,FQ312003.1,14.0,-0.398061,,0.477417,...,-0.075508,0.939811,0.982227,6.0,0.127568,,1.148166,0.475669,0.63431,0.896727
ACCCATATTTTCCTCGA,,1.0,library_10_2,ACCCATATTTTCCTCGA,85797.0,NC_017718.1,14.0,0.024066,,0.639691,...,0.167964,0.866611,0.961178,6.0,-0.164565,,0.9377,-0.207632,0.835517,0.93992
ssaU,SL1344_1356,1.0,library_10_2,AACACTACAGCTGTAAC,1458111.0,FQ312003.1,14.0,-0.073216,,0.597979,...,-0.807997,0.419092,0.732858,6.0,-0.18638,,0.923628,-0.234806,0.814359,0.938917
AGAATTTTAATGAGCCA,,1.0,library_10_2,AGAATTTTAATGAGCCA,,,14.0,0.221219,,0.733364,...,1.305724,0.191646,0.510913,6.0,0.519371,,1.50643,0.811564,0.417042,0.796928


In [9]:
print(f'Tested {method1_results.shape[0]} genes/barcodes')
for day in ['d1', 'd2', 'd3', 'd4']:
    print(f'Number of significant hits on {day}: {method1_results[method1_results[day+"_padj"] < 0.05].shape[0]}')

Tested 1212 genes/barcodes
Number of significant hits on d1: 830
Number of significant hits on d2: 36
Number of significant hits on d3: 42
Number of significant hits on d4: 46


# Method 2
<a id='Method-2'></a>

0. For each sample, correlation between WITS dilution and counts is calculated, samples with correlations < 0.8 are discarded.
1. Raw barcode counts are run through DESeq2 VST transformation.
2. Calculate mean inoculum value for each barcode based on all inoculum samples.
3. Calculate fitness for each barcode as abundance on a given day compared to the inoculum.
4. Calculate fitness for each **gene** as the median of fitness values of the barcodes mapped to that gene. WT fitness is the median fitness of all WITS barcodes.
    - [How consistent are fitness values across mice?](#method-2-fitness-conistency)
    - [Are the fitness values for control barcodes within expectations?](#method-2-fitness-controls)
5. P-value is calculated using Mann-Whitney U test. Multi-test correction using Benjamini/Hochberg (non-negative). [How is p-value calculated?](#method-2-p-value)
6. Calculate CI as gene fitness relative to  WT fitness. Same test for significance using ssaV mutants as controls. 


In [6]:
vst_df = run_VST_transformation(lib10_cnt, "method2-lib10-2", good_samples, outdir, sample_id='sampleIDExp').set_index('barcode')
#vst_df.to_csv(Path("/Users/ansintsova/git_repos/avocado/data")/"vst_counts.csv")

Generating datasets for DESeq2
Running DESeq2 VST Transformation
Done


In [7]:
all_fitness_df, gene_fitness_df, ci_df, results_df, wt_fitness_df, ssa_ci_df = method2_analysis(vst_df, annotation_df, good_samples, sample_id='sampleID', hits=0.05)
#all_fitness_df.to_csv(Path("/Users/ansintsova/git_repos/avocado/data")/"method2_all_fitness.csv")
#gene_fitness_df.to_csv(Path("/Users/ansintsova/git_repos/avocado/data")/"method2_gene_fitness.csv")
#ci_df.to_csv(Path("/Users/ansintsova/git_repos/avocado/data")/"method2_all_ci.csv")
#results_df.to_csv(Path("/Users/ansintsova/git_repos/avocado/data")/"method2_all_results.csv")

#wt_fitness_df.to_csv(Path("/Users/ansintsova/git_repos/avocado/data")/"method2_wt_fitness.csv")
#ssa_ci_df.to_csv(Path("/Users/ansintsova/git_repos/avocado/data")/"method2_ssa_ci_fitness.csv")

_d1
_d2
_d3
_d4


In [8]:
vst_df.sample(5, random_state=5)
results_df.sample(5, random_state=42)
results_df.ci_hits.sum()

799

In [9]:
results_df.groupby('day').padj.count()
print('Tested 1888 genes/barcodes')
for day in ['d1', 'd2', 'd3', 'd4']:
    print(f'Number of significant hits on {day}: {results_df[(results_df.day == day)&(results_df.ci_padj < 0.05)].shape[0]}')

Tested 1888 genes/barcodes
Number of significant hits on d1: 320
Number of significant hits on d2: 479
Number of significant hits on d3: 0
Number of significant hits on d4: 0


# Comparing Method 1 and Method 2 results

## Method 1: Filtering for abundant barcodes <a id='method-1-filter'></a>

- For some inoculum samples, there are barcodes present at very low abundances
- Including these in the analysis introduces a lot of noise (See example below)

[Back to Method 1](#Method-1)

In [43]:
inoculum_samples = [s for s in lib10_cnt.sampleIDExp.unique() if 'inoculum' in s]
inoculum_counts = lib10_cnt[lib10_cnt.sampleIDExp.isin(inoculum_samples)]

In [53]:
(px.histogram(inoculum_counts, x='cnt', color='sampleIDExp')
.update_layout(title={"text": "Distribution of barcode counts in the inoculum", "x": 0.5}, 
               yaxis_title="Frequency", xaxis_title="Count"))

## Analyze results with Method 1 without filtering:

In [54]:
m1_fitness_unfiltered, method1_results_unfiltered = analyze_library(lib10_cnt, sample_id="sampleIDExp", 
                          good_samples=good_samples, 
                          dnaid='library10', experiment='2', 
                          control_file=control_file, 
                          to_filter=0, outdir=outdir)

Counter({'d1': 17, 'd2': 14, 'd3': 12, 'd4': 9})
Filtering Dataset
Running DESeq2
Calculating z-scores
  day  cntrl_FC  cntrl_FC_median  cntrl_sigma
0  d1  0.783269         0.733629     0.201309
1  d2  0.137980         0.225474     0.206087
2  d3  0.215810         0.192748     0.210567
3  d4 -0.152742        -0.234320     0.220757
Summarizing


In [55]:
print(f'Tested {method1_results_unfiltered.shape[0]} genes/barcodes')
for day in ['d1', 'd2', 'd3', 'd4']:
    print(f'Number of significant hits on {day}: {method1_results_unfiltered[method1_results_unfiltered[day+"_padj"] < 0.05].shape[0]}')

Tested 2133 genes/barcodes
Number of significant hits on d1: 29
Number of significant hits on d2: 41
Number of significant hits on d3: 63
Number of significant hits on d4: 59


In [58]:
to_keep = ['d1_fitness_mean','d1_fitness_std', 'd1_padj']
m1_filtering_comp = method1_results[to_keep].merge(method1_results_unfiltered[to_keep], left_index=True, right_index=True)
fig = px.scatter(m1_filtering_comp, x='d1_fitness_mean_x', y='d1_fitness_mean_y', hover_data=[m1_filtering_comp.index],
                width=1000, height=800, color='d1_fitness_std_y',
                labels={'d1_fitness_mean_x': 'Day 1 Fitness Filtered', 
                       'd1_fitness_mean_y': 'Day 1 Fitness Unfiltered',
                       'd1_fitness_std_y':'Day 1 stdev between barcodes (unfiltered)'},
                )
fig.update_xaxes(range=[-8, 3])
fig.update_yaxes(range=[-8, 3])
fig

## Example 1: *rfbI*

- In the unfiltered data, there are 2 barcodes mapping to *rfbI*, one with very low abundance.
- This barcode has an exaggerated log2FoldChange, and hides the signal from the other barcode present in high abundace

In [60]:
rfbi_bc = ['AACCATAATCCCCCGAT', 'AGCTAATCCCCCTGCCG']
m1_fitness_unfiltered[m1_fitness_unfiltered.barcode.isin(rfbi_bc)].sort_values('day')

Unnamed: 0,barcode,baseMean,log2FoldChange,lfcSE,stat,pvalue,padj,day,n_samples
3230,AACCATAATCCCCCGAT,897.5936,-1.6733,0.88388,-1.893131,0.05834,0.999956,d1,17
3803,AGCTAATCCCCCTGCCG,6.033128,6.506206,2.762674,2.355039,0.018521,0.781322,d1,17
6341,AACCATAATCCCCCGAT,897.5936,-2.282655,0.90498,-2.522327,0.011658,0.510659,d2,14
6914,AGCTAATCCCCCTGCCG,6.033128,3.967322,2.828929,1.402412,0.160792,0.998923,d2,14
9452,AACCATAATCCCCCGAT,897.5936,-3.068495,0.924756,-3.318168,0.000906,0.03969,d3,12
10025,AGCTAATCCCCCTGCCG,6.033128,0.439514,2.9317,0.149918,0.88083,0.991844,d3,12
119,AACCATAATCCCCCGAT,897.5936,-2.245628,0.969004,-2.31746,0.020479,0.351786,d4,9
692,AGCTAATCCCCCTGCCG,6.033128,0.725241,3.072053,0.236077,0.813373,0.990358,d4,9


## Another Example: *rfaL*

In [61]:
rfaL_bc = [c.strip() for c in method1_results_unfiltered.loc['rfaL'].barcode.split(",")]
m1_fitness_unfiltered[m1_fitness_unfiltered.barcode.isin(rfaL_bc)].sort_values('day')

Unnamed: 0,barcode,baseMean,log2FoldChange,lfcSE,stat,pvalue,padj,day,n_samples
3451,ACAGGAAAACACGCGCC,2340.702167,-1.555482,0.868442,-1.791118,0.07327442,0.9999565,d1,17
5305,GCTCTCAATGAGTGGAT,0.464963,3.306925,4.225432,0.782624,0.4338479,0.9999565,d1,17
5617,GTGGACACACCAAGGGC,536.05758,-2.288797,0.966386,-2.368408,0.01786484,0.7813224,d1,17
6562,ACAGGAAAACACGCGCC,2340.702167,-2.436731,0.889294,-2.740072,0.006142565,0.2984902,d2,14
8416,GCTCTCAATGAGTGGAT,0.464963,2.096548,4.332329,0.483931,0.6284349,0.9989229,d2,14
8728,GTGGACACACCAAGGGC,536.05758,-3.369738,0.989469,-3.405604,0.0006601796,0.0466627,d2,14
9673,ACAGGAAAACACGCGCC,2340.702167,-5.166596,0.90891,-5.684385,1.312845e-08,2.268304e-06,d3,12
11527,GCTCTCAATGAGTGGAT,0.464963,1.414546,4.437828,0.318747,0.749918,0.9918443,d3,12
11839,GTGGACACACCAAGGGC,536.05758,-5.596238,1.012117,-5.52924,3.21621e-08,4.546552e-06,d3,12
340,ACAGGAAAACACGCGCC,2340.702167,-7.433947,0.955058,-7.783761,7.039942e-15,2.104239e-12,d4,9


## Method 1: DESeq2 Output  <a id='method-1-deseq-output'></a>

- For each barcode, DESeq2 calculates log2FoldChange, associated error (lfcSE), and FDR corrected p-value (padj)
- Also included how many samples (i.e. mice were included for each day)

[Back to Method 1](#Method-1)

In [62]:
m1_fitness_unfiltered.sample(5)

Unnamed: 0,barcode,baseMean,log2FoldChange,lfcSE,stat,pvalue,padj,day,n_samples
10956,CTAAACCGAAACGGGCC,5312.751463,0.286138,0.482616,0.592888,0.553256,0.991844,d3,12
9399,AAAGGATCACATGGCCT,0.45276,-2.739312,5.849438,-0.468303,0.639568,0.991844,d3,12
1451,CGAACAAGAACGATTGA,4643.631024,0.550007,0.648599,0.847992,0.396443,0.990358,d4,9
9323,TTTGGGGTCGAGGCGAG,3831.164882,0.49078,0.711888,0.689406,0.490568,0.998923,d2,14
7684,CGACAGAGGCGGGTCAC,3.749227,1.49234,2.317961,0.643816,0.519695,0.998923,d2,14


## Method 1: Z-Score Calculation: <a id='method-1-z-score'></a>





[Back to Method 1](#Method-1)

## Method 1: p-values
<a id='method-1-p-value'></a>
[Back to Method 1](#Method-1)

## Method 1: How to interpret Z-score? 
<a id='method-1-z-score-interpret'></a>

- Map between CI and Z-score? Z-score as another measure of CI. 

## Method 2: How consistent are fitness values across mice? 
<a id='method-2-fitness-conistency'></a>


[Back to Method 2](#Method-2)

In [85]:
rsd = gene_fitness_df.groupby(['ShortName', 'day']).agg({'Fitness': ['mean', 'std']}).reset_index()
rsd.columns = ['ShortName', 'day', 'mean', 'std']
rsd['rsd'] = rsd['std']/rsd['mean']


In [162]:
mouse = gene_fitness_df.sampleID.str.split("_", expand=True)[0]
mouse.name = 'mouse'
gene_fitness = pd.concat([gene_fitness_df, mouse], axis=1)

In [167]:
px.strip(gene_fitness[gene_fitness['ShortName'] == 'dcuB'], x='day', y='Fitness', hover_data=['mouse'], 
        log_y = True)

In [193]:
gene_fitness['log2FC'] = np.log2(gene_fitness.Fitness)
t = gene_fitness.pivot(index=['ShortName', 'mouse'] , columns='day', values='log2FC').reset_index()


In [199]:
gene_fitness_df.sample(10)

Unnamed: 0,ShortName,sampleID,Fitness,day
20017,b2145,am488_d1_dnaid2027_TV5563A,0.816744,d1
566,SL1344_0431,ad926_d2_dnaid2017_TV4592A,0.79255,d2
14576,melA,am727_d3_dnaid2029_TV5585A,0.800033,d3
6337,SL1344_1873,ad930_d2_dnaid2017_TV4592A,1.00955,d2
19009,AGAATTTTAATGAGCCA,am730_d3_dnaid2029_TV5585A,1.386785,d3
30351,AGCCACAGACTATAATG,am732_d1_dnaid2028_TV5585A,1.0,d1
6821,celF,ad930_d2_dnaid2017_TV4592A,1.083741,d2
8907,malZ,ad930_d4_dnaid2018_TV4592A,0.659524,d4
21438,SL1344_1806,am730_d2_dnaid2028_TV5585A,0.879331,d2
8776,fidL,ad930_d1_dnaid2017_TV4592A,0.902983,d1


## Method 2: Are the fitness values for control barcodes within expectations? <a id='method-2-fitness-controls'></a>

- Prior to both methods we calculate correlation between different dilutions of WITS barcodes and counts, and discard those with correlation of < 0.8
- For Method 1 we can also look at Z-scores and log2FC for each barcode/each dilution
- For Method 2 we can also look at fitness of each barcode in each mouse and look for outliers 

In [130]:
def get_control_df(fitness, phenotype='wt'):
    fitness.columns = [c.replace("unenriched_", "unenriched-") for c in fitness.columns]
    fitness = fitness.drop(['day'], axis=1)

    wt = fitness[fitness.phenotype == phenotype].dropna(axis=1).drop(['inoculum'], axis=1)
    wt = wt.melt(id_vars=['barcode', 'phenotype', 'conc'], var_name='sampleExpID', value_name='fitness')
    new = wt.sampleExpID.str.split("_", expand=True)
    new.columns = ['mouse', 'day', 'dnaid', 'experiment']
    wt = wt.merge(new, left_index=True, right_index=True)
    return wt

wt = get_control_df(all_fitness_df)
hyb = get_control_df(all_fitness_df, phenotype='hyb')
chey = get_control_df(all_fitness_df, phenotype='chey')
ssaV_invG = get_control_df(all_fitness_df, phenotype='ssaV_invG')

In [54]:
wt_cnt_d1 = lib10_cnt[(lib10_cnt.phenotype == 'wt') & (lib10_cnt.day == 'd1')][['barcode', 'mouse', 'cnt', 'conc']]
wt_cnt_d1['lconc'] = np.log2(wt_cnt_d1.conc)
wt_cnt_d1['lcnt'] = np.log2(wt_cnt_d1.cnt +1)

In [156]:
lib10_cnt.groupby('sampleIDExp').cnt.sum().reset_index().sort_values('cnt').head(10)

Unnamed: 0,sampleIDExp,cnt
35,am487_d1_dnaid2027_TV5563A,241632.0
31,am486_d1_dnaid2027_TV5563A,2068541.0
27,am485_d1_dnaid2027_TV5563A,4104891.0
68,inoculum_d0_dnaid2018_TV4592A,4677199.0
63,am732_d1_dnaid2028_TV5585A,5347488.0
39,am488_d1_dnaid2027_TV5563A,5899761.0
44,am727_d2_dnaid2028_TV5585A,6414817.0
14,ad929_d4_dnaid2018_TV4592A,6594624.0
41,am488_d3_dnaid2027_TV5563A,6684291.0
13,ad929_d3_dnaid2018_TV4592A,7025641.0


In [61]:
px.scatter(wt_cnt_d1.sort_values('mouse'), x="lconc", y="lcnt", facet_col="mouse", facet_col_wrap=3,height=3000, width=800, trendline='ols')

In [159]:
day = 'd1'

In [160]:

wt_d1 = wt[wt.day == day]
fig = px.strip(wt_d1, x='mouse', y=np.log2(wt_d1['fitness']), color='mouse',  hover_data=['conc', 'fitness'],
        template='simple_white', title = f'WT-{day}',
              labels={"y": "log2(Fitness)",
                     "conc": "Dilution", "fitness": "Fitness"})
fig.add_hline(y=0, line_width=3, line_dash="dash")

In [145]:

hyb_d1 = hyb[hyb.day == day]
fig = px.strip(hyb_d1, x='mouse', y=np.log2(hyb_d1['fitness']), color='mouse',  hover_data=['conc', 'fitness'],
               title = f'hyb-{day}',
        template='simple_white', labels={"y": "log2(Fitness)",
                     "conc": "Dilution", "fitness": "Fitness"})
fig.add_hline(y=0, line_width=3, line_dash="dash")

In [146]:

chey_d1 = chey[chey.day == day]
fig = px.strip(chey_d1, x='mouse', y=np.log2(chey_d1['fitness']), color='mouse',  hover_data=['conc', 'fitness'],
               title = f'chey-{day}',
        template='simple_white', labels={"y": "log2(Fitness)",
                     "conc": "Dilution", "fitness": "Fitness"})
fig.add_hline(y=0, line_width=3, line_dash="dash")

In [147]:

ssaV_invG_d1 = ssaV_invG[ssaV_invG.day == day]
fig = px.strip(ssaV_invG_d1, x='mouse', y=np.log2(ssaV_invG_d1['fitness']), color='mouse',  hover_data=['conc', 'fitness'],
               title = f'ssaV_invG-{day}',
        template='simple_white', labels={"y": "log2(Fitness)",
                     "conc": "Dilution", "fitness": "Fitness"})
fig.add_hline(y=0, line_width=3, line_dash="dash")

In [116]:
gene = 'dcuB'
test = vst_gene_cnts[(vst_gene_cnts.ShortName == gene)& (vst_gene_cnts.day != 'd0')]
test_inoculum = vst_gene_cnts[(vst_gene_cnts.ShortName == gene)& (vst_gene_cnts.mouse == 'inoculum')]
fig = px.line(test, x='day', y="norm_count", color='mouse',
         hover_data=['mouse'],color_discrete_sequence= px.colors.qualitative.Dark24, 
             template='simple_white'
             )
fig.add_hline(y=test_inoculum.norm_count.mean(), line_width=3, line_dash="dash", annotation_text="Mean Inoculum Count")
fig.add_hline(y=5.2, line_width=3, line_dash="dash", annotation_text="Detection Limit")
fig.update_traces(mode='markers+lines')


## Method 2: How is p-value calculated?
<a id='method-2-p-value'></a>
[Back to Method 1](#Method-1)

In [95]:
wt_d1.median()

conc       0.000167
fitness    0.990555
dtype: float64

In [94]:
wt_d1[(wt_d1.mouse != 'am487') & (wt_d1.mouse != 'am732')].median()

conc       0.000167
fitness    0.944411
dtype: float64

# Compare the Results <a id='Compare'></a>

[Back to the start](#start)

## Merge Results from Method 1 and 2.

In [507]:
# Melt method1_results:

not_to_melt = ['gene', 'locus', 'num_barcodes', 'library', 'barcode', 'sstart', 'sseqid']
def melt_day(df, var):
    mdf = df.reset_index().melt(id_vars=not_to_melt, value_vars=[c for c in df if var in c], value_name=var, var_name='day')
    mdf['day'] = mdf.day.str.split("_", expand=True)[0]
    return mdf
to_melt = ['padj', 'fitness_mean', 'num_samples', 'zscore', 'ci', 'fitness_std', 'pval']
dfs = []
method1_melted = pd.DataFrame(columns = not_to_melt+['day'])
for v in to_melt:
    print(v)
    mdf = melt_day(method1_results, v).drop_duplicates()
    method1_melted =  method1_melted.merge(mdf, on=not_to_melt+["day"], how='outer')
    print('done')
method1_melted = method1_melted[['gene', 'day', 'padj', 'fitness_mean', 'num_samples', 'zscore', 'ci', 'fitness_std', 'pval']]
method1_melted.columns = [f'{c}_method1' for c in method1_melted.columns] 

padj
done
fitness_mean
done
num_samples
done
zscore
done
ci
done
fitness_std
done
pval
done


In [508]:

method1_melted = method1_melted.rename({'gene_method1': 'gene', 'day_method1': 'day'}, axis=1)


In [509]:
method2_melted = results_df.reset_index()
method2_melted.columns = [f'{c}_method2' for c in method2_melted.columns]
method2_melted = method2_melted.rename({'ShortName_method2': 'gene', 'day_method2': 'day'}, axis=1)

In [304]:
method2_melted

Unnamed: 0,gene,pval_method2,padj_method2,ci_pval_method2,ci_padj_method2,median_fitness_method2,mean_fitness_method2,median_CI_method2,mean_CI_method2,hits_method2,ci_hits_method2,day
0,AAAAACATGCCACACTG,0.293476,0.500978,0.986260,0.986260,1.119034,1.734346,1.194449,1.323248,False,False,d1
1,AAAACGCAGCGCTTGCC,0.398744,0.600342,0.769698,0.933327,1.264869,1.510653,1.406138,1.277696,False,False,d1
2,AAAACTTCATGTACAGA,0.065369,0.225623,0.876825,0.948679,1.104863,1.960143,1.139164,2.140346,False,False,d1
3,AAAATTCTTTCTCCCCG,0.018305,0.111847,0.593428,0.852658,1.000000,1.037107,1.135089,1.109729,False,False,d1
4,AAACAATAGGGGGGTAG,0.418272,0.615508,0.262963,0.564817,0.993225,0.967098,1.069282,1.055919,False,False,d1
...,...,...,...,...,...,...,...,...,...,...,...,...
7547,ytfF,0.009190,0.026012,0.309880,0.461762,1.929222,1.853625,2.728359,3.059657,True,False,d4
7548,ytfG,0.057631,0.100934,0.565992,0.669125,1.648328,2.266072,2.221159,3.115845,False,False,d4
7549,ytfJ,0.757278,0.809593,0.171098,0.359725,0.728348,1.754806,0.851529,2.315289,False,False,d4
7550,ytfM,0.003095,0.017343,0.000487,0.016709,0.291279,0.316050,0.425343,0.420156,True,True,d4


In [510]:
compare = method1_melted.merge(method2_melted, on=['gene', 'day'])
compare['hits_method1'] = compare['padj_method1'] < 0.05

In [511]:
compare

Unnamed: 0,gene,day,padj_method1,fitness_mean_method1,num_samples_method1,zscore_method1,ci_method1,fitness_std_method1,pval_method1,pval_method2,padj_method2,ci_pval_method2,ci_padj_method2,median_fitness_method2,mean_fitness_method2,median_CI_method2,mean_CI_method2,hits_method2,ci_hits_method2,hits_method1
0,AAAAACATGCCACACTG,d1,0.963545,0.801205,17.0,0.070289,1.030174,,0.943964,0.293476,0.500978,0.986260,0.986260,1.119034,1.734346,1.194449,1.323248,False,False,False
1,AAAACTTCATGTACAGA,d1,0.836130,1.025133,17.0,0.354649,1.203149,,0.722852,0.065369,0.225623,0.876825,0.948679,1.104863,1.960143,1.139164,2.140346,False,False,False
2,AAACGGCCGAGTTCGAG,d1,0.405389,-0.319444,17.0,-1.836118,0.473764,,0.066340,0.617476,0.758487,0.043910,0.197858,0.942036,0.806160,0.897077,0.905080,False,False,False
3,AAACGTCCCCACACGGG,d1,0.768527,0.492743,17.0,-0.490198,0.831868,,0.623994,0.055925,0.207034,0.876825,0.948679,1.069822,1.383929,1.141920,1.526946,False,False,False
4,AAAGAAGCTATAAACTA,d1,0.737707,1.156746,17.0,0.567885,1.318072,,0.570113,0.379775,0.584842,0.796155,0.933628,1.039206,2.155005,1.074746,2.264930,False,False,False
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
4707,ysaA,d4,0.994546,0.113334,9.0,-0.113487,0.947544,,0.909644,0.401542,0.483798,0.565992,0.669125,0.800858,2.413499,1.493660,6.834926,False,False,False
4708,ytfF,d4,0.994546,0.920365,9.0,0.993319,1.657829,,0.320555,0.009190,0.026012,0.309880,0.461762,1.929222,1.853625,2.728359,3.059657,True,False,False
4709,ytfG,d4,0.994546,1.192469,9.0,1.181411,2.001941,,0.237440,0.057631,0.100934,0.565992,0.669125,1.648328,2.266072,2.221159,3.115845,False,False,False
4710,ytfJ,d4,0.994546,0.831887,9.0,0.731803,1.559212,,0.464289,0.757278,0.809593,0.171098,0.359725,0.728348,1.754806,0.851529,2.315289,False,False,False


In [512]:
compare['hits'] = compare['ci_hits_method2'].astype(int)*3 + compare['hits_method1'].astype(int)
compare['hits'] = compare['hits'].replace({0: 'Not a hit', 1: 'Method1 Hit', 3: 'Method2 Hit', 4: 'Method 1&2 Hit'})
compare

Unnamed: 0,gene,day,padj_method1,fitness_mean_method1,num_samples_method1,zscore_method1,ci_method1,fitness_std_method1,pval_method1,pval_method2,...,ci_pval_method2,ci_padj_method2,median_fitness_method2,mean_fitness_method2,median_CI_method2,mean_CI_method2,hits_method2,ci_hits_method2,hits_method1,hits
0,AAAAACATGCCACACTG,d1,0.963545,0.801205,17.0,0.070289,1.030174,,0.943964,0.293476,...,0.986260,0.986260,1.119034,1.734346,1.194449,1.323248,False,False,False,Not a hit
1,AAAACTTCATGTACAGA,d1,0.836130,1.025133,17.0,0.354649,1.203149,,0.722852,0.065369,...,0.876825,0.948679,1.104863,1.960143,1.139164,2.140346,False,False,False,Not a hit
2,AAACGGCCGAGTTCGAG,d1,0.405389,-0.319444,17.0,-1.836118,0.473764,,0.066340,0.617476,...,0.043910,0.197858,0.942036,0.806160,0.897077,0.905080,False,False,False,Not a hit
3,AAACGTCCCCACACGGG,d1,0.768527,0.492743,17.0,-0.490198,0.831868,,0.623994,0.055925,...,0.876825,0.948679,1.069822,1.383929,1.141920,1.526946,False,False,False,Not a hit
4,AAAGAAGCTATAAACTA,d1,0.737707,1.156746,17.0,0.567885,1.318072,,0.570113,0.379775,...,0.796155,0.933628,1.039206,2.155005,1.074746,2.264930,False,False,False,Not a hit
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
4707,ysaA,d4,0.994546,0.113334,9.0,-0.113487,0.947544,,0.909644,0.401542,...,0.565992,0.669125,0.800858,2.413499,1.493660,6.834926,False,False,False,Not a hit
4708,ytfF,d4,0.994546,0.920365,9.0,0.993319,1.657829,,0.320555,0.009190,...,0.309880,0.461762,1.929222,1.853625,2.728359,3.059657,True,False,False,Not a hit
4709,ytfG,d4,0.994546,1.192469,9.0,1.181411,2.001941,,0.237440,0.057631,...,0.565992,0.669125,1.648328,2.266072,2.221159,3.115845,False,False,False,Not a hit
4710,ytfJ,d4,0.994546,0.831887,9.0,0.731803,1.559212,,0.464289,0.757278,...,0.171098,0.359725,0.728348,1.754806,0.851529,2.315289,False,False,False,Not a hit


In [513]:
day1= compare[compare.day == 'd1']
fig = px.scatter(day1, x='fitness_mean_method1', y='mean_fitness_method2', color='hits_method2', 
                  log_y=True, hover_data=['gene'], 
                template='simple_white', 
                
                )
fig.update_traces(marker=dict(size=12,
                              line=dict(width=2,
                                        color='DarkSlateGrey')),
                  selector=dict(mode='markers'))
fig

In [476]:
compare.to_csv("/Users/ansintsova/git_repos/avocado/data/compare.csv")

In [474]:
day1= compare[compare.day == 'd1']
fig = px.scatter(day1, x='ci_method1', y='median_CI_method2', color='hits', 
                 log_x=True, log_y=True, hover_data=['gene'],
                template='simple_white', 
                color_discrete_sequence= px.colors.qualitative.Vivid,
                )
fig.update_traces(marker=dict(size=14,
                              line=dict(width=0.5,
                                        color='black')),
                  selector=dict(mode='markers'))
fig.update_yaxes(tickvals=[0.01, 0.1, 0.5,1,2, 10, 100])
fig.update_xaxes(tickvals=[0.01, 0.1, 0.5,1,2, 10, 100])
fig

In [97]:

def get_median_for_gene_on_a_day(df, annotation, day, grp_by='ShortName'):
    df = df.merge(annotation, on='barcode')
    samples = dict.fromkeys(([c for c in df.columns if day in c]), ['median'])
    day_median_value = df.groupby('ShortName').agg(samples)
    day_median_value.columns = [c[0] for c in day_median_value.columns]
    return day_median_value

days = ['_d0', '_d1', '_d2', '_d3', '_d4']
vst_cnt_genes = []
for day in days:
    print(day)
    df = get_median_for_gene_on_a_day(vst_df, annotation_df, day)
    vst_cnt_genes.append(df)
vst_gene_cnts = pd.concat(vst_cnt_genes, axis=1)
vst_gene_cnts.columns =[c.replace('unenriched_inoculum', 'unenriched-inoculum') for c in vst_gene_cnts.columns]
vst_gene_cnts = vst_gene_cnts.reset_index().melt(id_vars='ShortName', var_name='SampleID', value_name='norm_count')


new = vst_gene_cnts.SampleID.str.split('_', expand=True)
new.columns = ['mouse', 'day', 'dnaid', 'experiment']
vst_gene_cnts = pd.concat([vst_gene_cnts, new], axis=1)
vst_gene_cnts.to_csv("/Users/ansintsova/git_repos/avocado/data/vst_gene_counts.csv")

_d0
_d1
_d2
_d3
_d4


In [477]:
vst_gene_cnts

Unnamed: 0,ShortName,SampleID,norm_count,mouse,day,dnaid,experiment
0,AAAAACATGCCACACTG,unenriched-inoculum_d0_dnaid2017_TV4592A,11.363182,unenriched-inoculum,d0,dnaid2017,TV4592A
1,AAAACGCAGCGCTTGCC,unenriched-inoculum_d0_dnaid2017_TV4592A,7.365887,unenriched-inoculum,d0,dnaid2017,TV4592A
2,AAAACTTCATGTACAGA,unenriched-inoculum_d0_dnaid2017_TV4592A,10.657251,unenriched-inoculum,d0,dnaid2017,TV4592A
3,AAAATTCTTTCTCCCCG,unenriched-inoculum_d0_dnaid2017_TV4592A,5.866814,unenriched-inoculum,d0,dnaid2017,TV4592A
4,AAACAATAGGGGGGTAG,unenriched-inoculum_d0_dnaid2017_TV4592A,5.175039,unenriched-inoculum,d0,dnaid2017,TV4592A
...,...,...,...,...,...,...,...
137819,ytfF,am727_d4_dnaid2029_TV5585A,9.974011,am727,d4,dnaid2029,TV5585A
137820,ytfG,am727_d4_dnaid2029_TV5585A,13.691950,am727,d4,dnaid2029,TV5585A
137821,ytfJ,am727_d4_dnaid2029_TV5585A,10.981456,am727,d4,dnaid2029,TV5585A
137822,ytfM,am727_d4_dnaid2029_TV5585A,8.524819,am727,d4,dnaid2029,TV5585A


In [479]:
compare

Unnamed: 0,gene,day,padj_method1,fitness_mean_method1,num_samples_method1,zscore_method1,ci_method1,fitness_std_method1,pval_method1,pval_method2,...,ci_pval_method2,ci_padj_method2,median_fitness_method2,mean_fitness_method2,median_CI_method2,mean_CI_method2,hits_method2,ci_hits_method2,hits_method1,hits
0,AAAAACATGCCACACTG,d1,0.963545,0.801205,17.0,0.070289,1.030174,,0.943964,0.293476,...,0.986260,0.986260,1.119034,1.734346,1.194449,1.323248,False,False,False,Not a hit
1,AAAACTTCATGTACAGA,d1,0.836130,1.025133,17.0,0.354649,1.203149,,0.722852,0.065369,...,0.876825,0.948679,1.104863,1.960143,1.139164,2.140346,False,False,False,Not a hit
2,AAACGGCCGAGTTCGAG,d1,0.405389,-0.319444,17.0,-1.836118,0.473764,,0.066340,0.617476,...,0.043910,0.197858,0.942036,0.806160,0.897077,0.905080,False,False,False,Not a hit
3,AAACGTCCCCACACGGG,d1,0.768527,0.492743,17.0,-0.490198,0.831868,,0.623994,0.055925,...,0.876825,0.948679,1.069822,1.383929,1.141920,1.526946,False,False,False,Not a hit
4,AAAGAAGCTATAAACTA,d1,0.737707,1.156746,17.0,0.567885,1.318072,,0.570113,0.379775,...,0.796155,0.933628,1.039206,2.155005,1.074746,2.264930,False,False,False,Not a hit
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
4707,ysaA,d4,0.994546,0.113334,9.0,-0.113487,0.947544,,0.909644,0.401542,...,0.565992,0.669125,0.800858,2.413499,1.493660,6.834926,False,False,False,Not a hit
4708,ytfF,d4,0.994546,0.920365,9.0,0.993319,1.657829,,0.320555,0.009190,...,0.309880,0.461762,1.929222,1.853625,2.728359,3.059657,True,False,False,Not a hit
4709,ytfG,d4,0.994546,1.192469,9.0,1.181411,2.001941,,0.237440,0.057631,...,0.565992,0.669125,1.648328,2.266072,2.221159,3.115845,False,False,False,Not a hit
4710,ytfJ,d4,0.994546,0.831887,9.0,0.731803,1.559212,,0.464289,0.757278,...,0.171098,0.359725,0.728348,1.754806,0.851529,2.315289,False,False,False,Not a hit


In [376]:
vst_gene_cnts[(vst_gene_cnts.mouse == 'inoculum') &(vst_gene_cnts.ShortName == 'dcuB')].norm_count.mean()

10.27419392425106

# Counts

In [514]:
gene = 'dcuB'
test = vst_gene_cnts[(vst_gene_cnts.ShortName == gene)& (vst_gene_cnts.day != 'd0')]
test_inoculum = vst_gene_cnts[(vst_gene_cnts.ShortName == gene)& (vst_gene_cnts.mouse == 'inoculum')]
fig = px.line(test, x='day', y="norm_count", color='mouse',
         hover_data=['mouse'],color_discrete_sequence= px.colors.qualitative.Dark24, 
             template='simple_white'
             )
fig.add_hline(y=test_inoculum.norm_count.mean(), line_width=3, line_dash="dash", annotation_text="Mean Inoculum Count")
fig.add_hline(y=5.2, line_width=3, line_dash="dash", annotation_text="Detection Limit")
fig.update_traces(mode='markers+lines')



In [523]:
gene = 'rfaI'
test = vst_gene_cnts[(vst_gene_cnts.ShortName == gene)& (vst_gene_cnts.day != 'd0')]
test_inoculum = vst_gene_cnts[(vst_gene_cnts.ShortName == gene)& (vst_gene_cnts.mouse == 'inoculum')]
test['dayN'] = test.day.replace({'d1':1, 'd2':2, 'd3':3, 'd4':4})
fig = px.scatter(test, x='dayN', y="norm_count", color='mouse',
         hover_data=['mouse'],color_discrete_sequence= px.colors.qualitative.Dark24, 
             template='simple_white', 
             )
fig.add_hline(y=test_inoculum.norm_count.mean(), line_width=3, line_dash="dash", annotation_text="Mean Inoculum Count")
fig.add_hline(y=5.2, line_width=3, line_dash="dash", annotation_text="Detection Limit")



In [524]:
gene = 'dcuB'
test = vst_gene_cnts[(vst_gene_cnts.ShortName == gene)]
test['dayN'] = test.day.replace({'d1':1, 'd2':2, 'd3':3, 'd4':4})
test_inoculum = vst_gene_cnts[(vst_gene_cnts.ShortName == gene)& (vst_gene_cnts.mouse == 'inoculum')]
fig = px.box(test, x='day', y="norm_count", color='day',
         hover_data=['mouse'],
             template='simple_white'
             )
fig.add_hline(y=test_inoculum.norm_count.mean(), line_width=3, line_dash="dash", annotation_text="Mean Inoculum Count")
fig.add_hline(y=5.2, line_width=3, line_dash="dash", annotation_text="Detection Limit")
fig

In [112]:
new = gene_fitness_df.sampleID.str.split('_', expand=True)
new.columns = ['mouse', 'day', 'dnaid', 'experiment']
gene_fitness_annotated = pd.concat([gene_fitness_df, new[['mouse', 'dnaid', 'experiment']]], axis=1)
gene_fitness_annotated.to_csv('/Users/ansintsova/git_repos/avocado/data/gene_fitness_annotated.csv')

In [403]:
gene_fitness_annotated

Unnamed: 0,ShortName,sampleID,Fitness,day,mouse,dnaid,experiment
0,AAAAACATGCCACACTG,ad926_d1_dnaid2017_TV4592A,1.119034,d1,ad926,dnaid2017,TV4592A
1,AAAACGCAGCGCTTGCC,ad926_d1_dnaid2017_TV4592A,1.898760,d1,ad926,dnaid2017,TV4592A
2,AAAACTTCATGTACAGA,ad926_d1_dnaid2017_TV4592A,0.938151,d1,ad926,dnaid2017,TV4592A
3,AAAATTCTTTCTCCCCG,ad926_d1_dnaid2017_TV4592A,1.000000,d1,ad926,dnaid2017,TV4592A
4,AAACAATAGGGGGGTAG,ad926_d1_dnaid2017_TV4592A,1.007408,d1,ad926,dnaid2017,TV4592A
...,...,...,...,...,...,...,...
16987,ytfF,am732_d4_dnaid2029_TV5585A,1.929222,d4,am732,dnaid2029,TV5585A
16988,ytfG,am732_d4_dnaid2029_TV5585A,0.014096,d4,am732,dnaid2029,TV5585A
16989,ytfJ,am732_d4_dnaid2029_TV5585A,0.014454,d4,am732,dnaid2029,TV5585A
16990,ytfM,am732_d4_dnaid2029_TV5585A,0.044341,d4,am732,dnaid2029,TV5585A


In [483]:
ci_df
new = ci_df.sampleID.str.split('_', expand=True)
new.columns = ['mouse', 'day', 'dnaid', 'experiment']
ci_annotated = pd.concat([ci_df, new[['mouse', 'dnaid', 'experiment']]], axis=1)
ci_annotated.to_csv('/Users/ansintsova/git_repos/avocado/data/ci_annotated.csv')

In [115]:
gene = 'dcuB'
test = gene_fitness_annotated[gene_fitness_annotated.ShortName == gene]

fig = px.line(test, x='day', y="Fitness", color='mouse',
         hover_data=['mouse'], color_discrete_sequence= px.colors.qualitative.Dark24, 
             template='simple_white', log_y=True, 
             )
fig.add_hline(y=1, line_width=3, line_dash="dash", annotation_text="Fitness")
fig.update_traces(mode='markers+lines')
fig.update_yaxes(tickvals=[0.01, 0.1, 0.5,1,2, 10, 100])

In [526]:
gene = 'dcuB'
test = gene_fitness_annotated[gene_fitness_annotated.ShortName == gene]

fig = px.box(test, x='day', y="Fitness", color='day',
         hover_data=['mouse'],
             template='simple_white', log_y=True
             )
fig.update_yaxes(tickvals=[0.01, 0.1, 0.5,1,2, 10, 100])
fig.add_hline(y=1, line_width=3, line_dash="dash", annotation_text="Detection Limit")

In [486]:
gene_fitness[gene_fitness.mouse == 'am731'].day.unique()

array(['d1', 'd2'], dtype=object)

In [490]:
gene_fitness_df[gene_fitness_df.sampleID.str.startswith('am731')].day.unique()

array(['d1', 'd2'], dtype=object)

In [494]:
gene_fitness_annotated[gene_fitness_annotated.ShortName == 'hybA']

Unnamed: 0,ShortName,sampleID,Fitness,day,mouse,dnaid,experiment
1298,hybA,ad926_d1_dnaid2017_TV4592A,0.039839,d1,ad926,dnaid2017,TV4592A
3186,hybA,ad927_d1_dnaid2017_TV4592A,0.019011,d1,ad927,dnaid2017,TV4592A
5074,hybA,ad928_d1_dnaid2017_TV4592A,0.034216,d1,ad928,dnaid2017,TV4592A
6962,hybA,ad929_d1_dnaid2017_TV4592A,0.034923,d1,ad929,dnaid2017,TV4592A
8850,hybA,ad930_d1_dnaid2017_TV4592A,0.020554,d1,ad930,dnaid2017,TV4592A
10738,hybA,am483_d1_dnaid2027_TV5563A,0.055093,d1,am483,dnaid2027,TV5563A
12626,hybA,am484_d1_dnaid2027_TV5563A,0.111054,d1,am484,dnaid2027,TV5563A
14514,hybA,am485_d1_dnaid2027_TV5563A,0.345835,d1,am485,dnaid2027,TV5563A
16402,hybA,am486_d1_dnaid2027_TV5563A,0.092735,d1,am486,dnaid2027,TV5563A
18290,hybA,am487_d1_dnaid2027_TV5563A,0.072753,d1,am487,dnaid2027,TV5563A
