In [2]:
# Differential expression analysis
library(DESeq2)


In [3]:
# Function to analyze differential gene expression based on TIME POINT

TimeAnalysisDE <- function (counts, exp_info, filename){
dds <- DESeqDataSetFromMatrix(countData = as.matrix(counts), colData = exp_info,
                                    design = ~time.point)
dds <- DESeq(dds)
res <- results(dds)

resSig <- subset(res, (padj < 0.05)& (log2FoldChange > 1 | log2FoldChange < -1))
write.csv(resSig[order(resSig$log2FoldChange, decreasing = TRUE),], paste0(DATA, filename),
            row.names = TRUE, quote = FALSE)

output <-  list("results" = resSig[order(resSig$log2FoldChange, decreasing = TRUE),], "dds"= dds)
return (output)
}

In [4]:
TimeAnalysis_subset <- function(exp_info_sample_names, counts, experiment_info, lane, pos, file_out){
    samples <- filter(exp_info_sample_names, lane.ID != lane & position == pos)[,1]
    counts <- counts[,samples]
    info <- experiment_info[samples,]
    print(info)
    analysis <- TimeAnalysisDE(counts, info, file_out)
    dds <- analysis$dds
    dds <- estimateSizeFactors(dds)
    norm_counts <- counts(dds, normalized=TRUE)
    write.csv(norm_counts, paste0(DATA, unlist(strsplit(file_out, "\\."))[1], "_deseq_norm_counts.csv"), quote = FALSE)
    return(analysis)
}

In [5]:
root="/Users/annasintsova/git_repos/spatial_dynamics_of_gene_expression_in_response_to_T6SS_attack/"
DATA=paste0(root, "tables/")
FIG=paste0(root, "figures/")

### Analyzing Differential Gene Expression between 0 and  30 min on the Dienes Line

In [5]:
counts_file = paste("/Users/annasintsova/git_repos/spatial_dynamics_of_gene_expression_in_response_to_T6SS_attack",
                     "/data/counts/stranded/2018-04-23_counts.csv", sep = "")
counts <- read.table(counts_file, row.names =1, sep = ",", header = TRUE)
experiment_info <- read.csv(paste0(root, "data/ref/study_design.csv"),
                                header = TRUE, row.names =1)
s_names <- c()
for (c in colnames(counts)){
    x <- gsub("X", "S", unlist(strsplit(c, "_"))[[1]])
    s_names <-c(s_names,x)
}
colnames(counts) <- s_names
exp_info_sample_names <- tibble::rownames_to_column(experiment_info, var = "rownames")

In [6]:
dienes_line_results <- TimeAnalysis_subset(exp_info_sample_names, counts, 
                                          experiment_info, "L1", "L-9C", "2018-06-28-dienes-line-0-30.csv")
#dienes_line_results$results

       lane.ID group.ID time.point strain.ID position RIN
S76079      L2    Case9        0''       Mix     L-9C 7.4
S76080      L2    Case9        0''       Mix     L-9C 7.3
S76081      L2    Case9        0''       Mix     L-9C 6.8
S76082      L2   Case10       30''       Mix     L-9C 6.2
S76083      L2   Case10       30''       Mix     L-9C 6.1
S76084      L2   Case10       30''       Mix     L-9C 5.5


factor levels were dropped which had no samples
estimating size factors
estimating dispersions
gene-wise dispersion estimates
mean-dispersion relationship
final dispersion estimates
fitting model and testing


log2 fold change (MAP): time.point 30'' vs 0'' 
Wald test p-value: time.point 30'' vs 0'' 
DataFrame with 24 rows and 6 columns
                         baseMean log2FoldChange     lfcSE      stat
                        <numeric>      <numeric> <numeric> <numeric>
PMI1426                 2603.3788       2.536457 0.2963109  8.560121
PMI1425                 1440.3558       2.219405 0.3207280  6.919898
PMI2396                 2135.7921       2.217305 0.3405866  6.510251
PMI2397                  485.5849       1.802089 0.3317039  5.432823
PMI1427                 2649.0044       1.598964 0.3061397  5.222987
...                           ...            ...       ...       ...
PMI0648                1809.69278      -1.071900 0.2003107 -5.351189
PMI3401                 187.37700      -1.079391 0.2975396 -3.627722
PMI0729                 619.92438      -1.084232 0.3113269 -3.482616
PMI0861               12880.30164      -1.134173 0.2627818 -4.316025
fig|529507.6.peg.3417    40.27845      -1.37

### Analyzing Differential Gene Expression Behind the Merge at 0 and 30 min

In [7]:
WT_periphery_results <- TimeAnalysis_subset(exp_info_sample_names, counts, 
                                          experiment_info, "L1", "BH", "2018-06-28-wt-bh-0-30.csv")
WT_periphery_results$results

       lane.ID group.ID time.point strain.ID position RIN
S76073      L2    Case7       30''       Mix       BH 6.2
S76074      L2    Case7       30''       Mix       BH 6.3
S76075      L2    Case7       30''       Mix       BH 6.9
S76088      L2   Case12        0''       Mix       BH 7.3
S76089      L2   Case12        0''       Mix       BH 7.4
S76090      L2   Case12        0''       Mix       BH 7.4


factor levels were dropped which had no samples
estimating size factors
estimating dispersions
gene-wise dispersion estimates
mean-dispersion relationship
final dispersion estimates
fitting model and testing


log2 fold change (MAP): time.point 30'' vs 0'' 
Wald test p-value: time.point 30'' vs 0'' 
DataFrame with 367 rows and 6 columns
                        baseMean log2FoldChange     lfcSE      stat
                       <numeric>      <numeric> <numeric> <numeric>
PMI0348                 788.2452       3.448874 0.3217504 10.719097
PMI0781                 412.3377       3.304738 0.3562953  9.275278
PMI3226                4790.0397       2.899292 0.3588041  8.080432
PMI1956                 167.1570       2.421253 0.3667462  6.601985
PMI1807                 883.3477       2.371982 0.2431928  9.753506
...                          ...            ...       ...       ...
PMI1343                588.78235      -1.883057 0.4193148 -4.490796
PMI3376               3370.81078      -2.038360 0.2292795 -8.890286
fig|529507.6.peg.1291   22.94104      -2.058813 0.4613959 -4.462141
PMI3598                 45.51311      -2.283743 0.3572940 -6.391776
PMI2149               1170.61275      -2.363668 0.35401

In [8]:
MUT_periphery_results <- TimeAnalysis_subset(exp_info_sample_names, counts, 
                                          experiment_info, "L1", "B9", "2018-06-28-wt-b9-0-30.csv")
MUT_periphery_results$results

       lane.ID group.ID time.point strain.ID position RIN
S76076      L2    Case8       30''       Mix       B9 5.5
S76077      L2    Case8       30''       Mix       B9 6.6
S76078      L2    Case8       30''       Mix       B9 5.7
S76085      L2   Case11        0''       Mix       B9 7.5
S76086      L2   Case11        0''       Mix       B9 6.4
S76087      L2   Case11        0''       Mix       B9 6.7


factor levels were dropped which had no samples
estimating size factors
estimating dispersions
gene-wise dispersion estimates
mean-dispersion relationship
final dispersion estimates
fitting model and testing


log2 fold change (MAP): time.point 30'' vs 0'' 
Wald test p-value: time.point 30'' vs 0'' 
DataFrame with 707 rows and 6 columns
          baseMean log2FoldChange     lfcSE      stat       pvalue         padj
         <numeric>      <numeric> <numeric> <numeric>    <numeric>    <numeric>
PMI1031  1118.3504       4.916655 0.3302088 14.889532 3.854608e-50 1.388815e-46
PMI0781   550.5741       4.700586 0.3798781 12.373931 3.616777e-35 6.515624e-32
PMI0348  1505.6800       4.600672 0.5028233  9.149680 5.710249e-20 2.571753e-17
PMI2408   120.5018       3.746797 0.4964789  7.546740 4.462876e-14 5.955459e-12
PMI1956   159.9439       3.744966 0.3915971  9.563313 1.140459e-21 8.218148e-19
...            ...            ...       ...       ...          ...          ...
PMI0913 3785.43253      -3.023720 0.7010666 -4.313029 1.610333e-05 1.534928e-04
PMI3598   50.08999      -3.272011 0.4342601 -7.534681 4.895316e-14 6.299223e-12
PMI0807 2977.62767      -3.285358 0.4683805 -7.014293 2.311142e-12 1.98

In [9]:
swarm_front_periphery_results <- TimeAnalysis_subset(exp_info_sample_names, counts, 
                                          experiment_info, "L1", "B", "2018-06-28-wt-b-0-30.csv")
swarm_front_periphery_results$results

       lane.ID group.ID time.point strain.ID position RIN
S77462      L3   Case13        0''        HI        B  NA
S77463      L3   Case13        0''        HI        B  NA
S77464      L3   Case13        0''        HI        B  NA
S77465      L3   Case14       30''        HI        B  NA
S77466      L3   Case14       30''        HI        B  NA
S77467      L3   Case14       30''        HI        B  NA


factor levels were dropped which had no samples
estimating size factors
estimating dispersions
gene-wise dispersion estimates
mean-dispersion relationship
final dispersion estimates
fitting model and testing


log2 fold change (MAP): time.point 30'' vs 0'' 
Wald test p-value: time.point 30'' vs 0'' 
DataFrame with 0 rows and 6 columns

### Analysis of Differential Gene Expression at the Swarm Front between 0 and 30 min

In [10]:
swarm_front_results <- TimeAnalysis_subset(exp_info_sample_names, counts, 
                                          experiment_info, "L1", "L-HI", "2018-06-28-wt-swarm-front-0-30.csv")
swarm_front_results$results

       lane.ID group.ID time.point strain.ID position RIN
S76067      L2    Case5       30''        HI     L-HI 6.8
S76068      L2    Case5       30''        HI     L-HI 6.1
S76069      L2    Case5       30''        HI     L-HI 6.1
S76070      L2    Case6        0''        HI     L-HI 6.4
S76071      L2    Case6        0''        HI     L-HI 4.5
S76072      L2    Case6        0''        HI     L-HI 6.1


factor levels were dropped which had no samples
estimating size factors
estimating dispersions
gene-wise dispersion estimates
mean-dispersion relationship
final dispersion estimates
fitting model and testing


log2 fold change (MAP): time.point 30'' vs 0'' 
Wald test p-value: time.point 30'' vs 0'' 
DataFrame with 121 rows and 6 columns
         baseMean log2FoldChange     lfcSE      stat       pvalue         padj
        <numeric>      <numeric> <numeric> <numeric>    <numeric>    <numeric>
PMI1031  1003.517       2.775656 0.2312591 12.002363 3.452952e-33 1.196448e-29
PMI0437  5635.458       2.063356 0.2094392  9.851812 6.731923e-23 5.831528e-20
PMI3038  1196.089       2.044178 0.2278525  8.971497 2.925126e-19 1.013556e-16
PMI2847  4752.583       1.912011 0.1969639  9.707418 2.803483e-22 1.825954e-19
PMI2254 10780.565       1.862599 0.1613418 11.544432 7.875990e-31 1.364515e-27
...           ...            ...       ...       ...          ...          ...
PMI1426 1939.4264      -1.631109 0.2685475 -6.073820 1.249028e-09 6.658279e-08
PMI0176  219.7845      -1.854880 0.2793990 -6.638824 3.161964e-11 2.235960e-09
PMI1425 1318.4807      -1.966218 0.2666194 -7.374625 1.648088e-13 1.730492e-11
PM

### Analysis of Differential Gene Expression at the Dienes Line/Swarm Front at 0 and 4 hours

In [18]:
counts_file = paste("/Users/annasintsova/git_repos/spatial_dynamics_of_gene_expression_in_response_to_T6SS_attack",
                     "/data/counts/reverse/2018-04-25_counts.csv", sep = "")
counts <- read.table(counts_file, row.names =1, sep = ",", header = TRUE)
s_names <- c()
for (c in colnames(counts)){
    x <- gsub("X", "S", unlist(strsplit(c, "_"))[[1]])
    s_names <-c(s_names,x)
}
colnames(counts) <- s_names


In [13]:
swarm_front_4_results <- TimeAnalysis_subset(exp_info_sample_names, counts, 
                                          experiment_info, "L2", "L-HI", "2018-06-28-wt-swarm-front-0-4.csv")
swarm_front_4_results$results

       lane.ID group.ID time.point strain.ID position RIN
S63630      L1    Case1        0''        HI     L-HI  NA
S63631      L1    Case1        0''        HI     L-HI  NA
S63632      L1    Case1        0''        HI     L-HI  NA
S63633      L1    Case2         4'        HI     L-HI  NA
S63634      L1    Case2         4'        HI     L-HI  NA
S63635      L1    Case2         4'        HI     L-HI  NA


factor levels were dropped which had no samples
estimating size factors
estimating dispersions
gene-wise dispersion estimates
mean-dispersion relationship
final dispersion estimates
fitting model and testing


log2 fold change (MAP): time.point 4' vs 0'' 
Wald test p-value: time.point 4' vs 0'' 
DataFrame with 215 rows and 6 columns
                        baseMean log2FoldChange     lfcSE      stat
                       <numeric>      <numeric> <numeric> <numeric>
PMI0474               144.426100       3.950637 0.3824828 10.328926
PMI1283                12.183354       3.401635 0.7749701  4.389376
PMI0235                 7.113928       3.188586 0.8576869  3.717657
PMI1709               233.867685       2.727142 0.3754651  7.263370
fig|529507.6.peg.2778  10.644931       2.726832 0.8104134  3.364741
...                          ...            ...       ...       ...
PMI0013                 6.505418      -3.088101 0.8645297 -3.572001
PMI2148                10.078518      -3.178758 0.8130289 -3.909773
PMI3267                57.163982      -3.188819 0.5768765 -5.527733
PMI2435                 6.697351      -3.280723 0.8576114 -3.825420
PMI2285                21.573583      -3.706715 0.7578696 -

In [14]:
dienes_line_4_results <- TimeAnalysis_subset(exp_info_sample_names, counts, 
                                          experiment_info, "L2", "L-9C", "2018-06-28-wt-dienes-line-0-4.csv")
dienes_line_4_results$results

       lane.ID group.ID time.point strain.ID position RIN
S63636      L1    Case3        0''       Mix     L-9C  NA
S63638      L1    Case3        0''       Mix     L-9C  NA
S63639      L1    Case4         4'       Mix     L-9C  NA
S63641      L1    Case4         4'       Mix     L-9C  NA


factor levels were dropped which had no samples
estimating size factors
estimating dispersions
gene-wise dispersion estimates
mean-dispersion relationship
final dispersion estimates
fitting model and testing


log2 fold change (MAP): time.point 4' vs 0'' 
Wald test p-value: time.point 4' vs 0'' 
DataFrame with 90 rows and 6 columns
                       baseMean log2FoldChange     lfcSE      stat       pvalue
                      <numeric>      <numeric> <numeric> <numeric>    <numeric>
PMI0474                62.01907       3.382885 0.6408443  5.278795 1.300365e-07
fig|529507.6.peg.2778  15.33429       3.070472 0.8719521  3.521376 4.293127e-04
PMIP19                 54.91956       2.994731 0.6923662  4.325357 1.522847e-05
fig|529507.6.peg.2392  13.31491       2.858944 0.8818635  3.241935 1.187210e-03
fig|529507.6.peg.1026  68.34178       2.858905 0.5879001  4.862909 1.156733e-06
...                         ...            ...       ...       ...          ...
PMI3279               110.98739      -3.548970 0.6426570 -5.522340 3.345152e-08
PMI2794                45.19923      -3.674224 0.6866013 -5.351321 8.731456e-08
PMI2824                68.89319      -3.678898 0.7681170 -4.789502 1.671958e