## Explore the results associated to idr0093
This notebook retrieves analytical results associated to the IDR study and plots the results

## Tasks
* Connect to IDR
* Load the CSV files attached to the study
* Plot the results

## Collect parameters

In [None]:
# Parameters:

## Load the libraries

In [3]:
library(magrittr)
library(ggplot2)
library(readr)
library(romero.gateway)

Loading required package: rJava

Loading required package: jpeg

Loading required package: httr


*** Welcome to rOMERO 0.4.10 ***




## Log in to the IDR 

In [4]:
server <- OMEROServer(host = 'wss://idr.openmicroscopy.org/omero-ws', port = 443L, username='public', password='public')
server <- connect(server)
paste('Successfully logged in as', server@user$getUserName())

## Load the results associated to the study

In [None]:
screenId <- 2751
fileLibrary <- 'screenA-library.csv'
fileProcessed <- 'screenA-processed.csv'

### Find the attachments

In [None]:
omeScreen <- loadObject(server, 'ScreenData', screenId)
fileLibraryAnnotation <- getAnnotations(server, 'ScreenData', getOMEROID(omeScreen), nameFilter = fileLibrary)
fileProcessedAnnotation <- getAnnotations(server, 'ScreenData', getOMEROID(omeScreen), nameFilter = fileProcessed)
annotationFileLibraryID = as.integer(fileLibraryAnnotation$FileID)
annotationFileLibraryID <- annotationFileLibraryID[[length(annotationFileLibraryID)]]
annotationFileProcessedID = as.integer(fileProcessedAnnotation$FileID)
annotationFileProcessedID <- annotationFileProcessedID[[length(annotationFileProcessedID)]]

## Load the results associated to the plate
We load the plate and find the results linked to it and extract its number from the file name.

In [None]:
omePlate <- loadObject(server, 'PlateData', plateId)

In [None]:
annotations <- getAnnotations(server, 'PlateData', getOMEROID(omePlate))
names <- annotations$Name
ids <- annotations$FileID
i = 1
for (n in names) {
    if (startsWith(n, 'single_cell_features_raw')) {
        annotationFileRawSingleCellID <- ids[[i]]
        plateNumber <- sub('.*_([0-9]+).*','\\1',n)
    } else if (startsWith(n, 'single_cell_features_processed')) {
        annotationFileProcessedSingleCellID <- ids[[i]]
    }
    i <- i+1
}

### Load the annotations as R-Dataframes

In [None]:
lib <- loadCSV(server, annotationFileLibraryID)
well_summaries <- loadCSV(server, annotationFileProcessedID)
raw_single_cell <- loadCSV(server, annotationFileRawSingleCellID)
processed_single_cell <- loadCSV(server, annotationFileProcessedSingleCellID)

## Close the connection 

In [5]:
disconnect(server)

## Show distributions of phenotypes
Example to show joining of library and summaries files to plot distributions of a quantitative phenotype.

In [None]:
lib %>%
  dplyr::left_join(well_summaries,by=c("Plate","Well")) %>%
  dplyr::select(residuals_mean_EU_model,phenotype_mean_EU_model,`Gene.Symbol`) %>%
  dplyr::mutate(negative_control = `Gene.Symbol`=="Scrambled") %>%
  dplyr::filter(!is.na(`Gene.Symbol`)) %>%
  ggplot(aes(x=residuals_mean_EU_model,fill=as.factor(phenotype_mean_EU_model))) + 
  geom_histogram(bins = 100) + 
  facet_grid(negative_control~.,labeller = "label_both") +
  scale_fill_discrete("Mean EU phenotype") +
  theme_minimal()

## Show top hits
Example to show joining of library and summaries files to list all mean-EU hits on the selected plate. 

In [None]:
lib %>%
  dplyr::left_join(well_summaries,by=c("Plate","Well")) %>%
  dplyr::filter(phenotype_mean_EU_model != 0) %>%
  dplyr::filter(Plate==plateNumber) %>%
  dplyr::select(`Gene.Symbol`,residuals_mean_EU_model,phenotype_mean_EU_model) %>%
  dplyr::arrange(dplyr::desc(residuals_mean_EU_model)) 

## Compare raw and processed
Example to show comparison of raw and processed feature values. Note that offset arises from background correction (subtraction of a constant value) and slope change arises from plate-to-plate correction (plate-wise multiplication to equalise scrambled medians). 

In [None]:
processed_single_cell %>%
  dplyr::filter(Well=="G06") %>%
  dplyr::inner_join(raw_single_cell,by=c("Object_ID","Plate","Well","well_pos_y","well_pos_x"),suffix=c("_processed","_raw")) %>%
  ggplot(aes(x=Nuclei_Intensity_Mean_EU_raw,Nuclei_Intensity_Mean_EU_processed)) +
  geom_point() +
  geom_abline() +
  theme_minimal() +
  expand_limits(x=0,y=0)

## Single cell data with library info
Example to show joining of library, summaries and processed single cell data to plot the single cell EU measurements as a function of protein content and cell cycle stage, for all mean-EU ‘hits’ on the selected plate. 

In [19]:
processed_single_cell <- processed_single_cell[sample(1:nrow(processed_single_cell), 150000, replace=FALSE),]

In [None]:
processed_single_cell %>%
  dplyr::filter(Plate==plateNumber) %>%
  dplyr::left_join(well_summaries,by=c("Plate","Well")) %>%
  dplyr::left_join(lib,by = c("Plate", "Well")) %>% {
    ggplot() +
      geom_point(data=dplyr::filter(.,`Gene.Symbol`=="Scrambled") %>% 
                            dplyr::select(Cell_Intensity_Sum_SE,
                                          Nuclei_Intensity_Sum_EU),
                          mapping=aes(x=Cell_Intensity_Sum_SE,
                                      y=Nuclei_Intensity_Sum_EU), color="grey80", size=0.1) +
      geom_point(data=dplyr::filter(.,phenotype_mean_EU_model!=0 & `Gene.Symbol` !="KIF11" ) %>% 
                            dplyr::select(phenotype_mean_EU_model,
                                          Cell_Intensity_Sum_SE,
                                          Nuclei_Intensity_Sum_EU,
                                          Cell_Cycle_Stage,
                                          `Gene.Symbol`,
                                          Well) %>%
                            dplyr::mutate(well_id = paste(`Gene.Symbol`,"(",Well,")")) %>%
                            dplyr::distinct(),
                          mapping=aes(x=Cell_Intensity_Sum_SE,
                                      y=Nuclei_Intensity_Sum_EU,
                                      col=Cell_Cycle_Stage), size=0.1) +
      geom_text(data=dplyr::filter(.,phenotype_mean_EU_model!=0 & `Gene.Symbol` !="KIF11" ) %>% 
                            dplyr::select(phenotype_mean_EU_model,
                                          n_cells,
                                          `Gene.Symbol`,
                                          Well) %>%
                            dplyr::mutate(well_id = paste(`Gene.Symbol`,"(",Well,")")) %>%
                            dplyr::distinct(),
                         mapping=aes(x=-Inf,y=Inf,label=paste("#cells =",n_cells)),
                         hjust=-0.1,vjust=1,size=8/.pt,fontface="plain",family="sans") +
      facet_wrap(phenotype_mean_EU_model~well_id,labeller = "label_both",ncol=3) +
      theme_minimal(base_size = 8) +
      ggtitle(paste("Plate", plateNumber, sep=" "))
  }

### License 

License
Copyright (C) 2021 University of Dundee. All Rights Reserved.

This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 2 of the License, or (at your option) any later version.

This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program; if not, write to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.