In [None]:
# Setting up the Colab environment. DO NOT EDIT!
import os
import warnings
warnings.filterwarnings("ignore")

try:
    import otter

except ImportError:
    ! pip install -q otter-grader==4.0.0
    import otter

if not os.path.exists('walkthrough-tests'):
    zip_files = [f for f in os.listdir() if f.endswith('.zip')]
    assert len(zip_files)>0, 'Could not find any zip files!'
    assert len(zip_files)==1, 'Found multiple zip files!'
    ! unzip {zip_files[0]}

grader = otter.Notebook(colab=True,
                        tests_dir = 'walkthrough-tests')

# Walkthrough

## Learning Objectives
At the end of this learning activity you will be able to:

* Practice summarize observations by sample using `groupby`.
* Measure the uncertainty of the estimate of the mean.
* Distinguish when to use parametric and non-parametric estimates of error.
* Practice merging two dataframes.

This week we will start looking at the imaging data we discussed with Dr. Gaskill.
In this experiment, they used pH responsive beads that flouresce when in the low pH environment of the phagasome.
With this technology, they exposed cells to different levels of dopamine and measured the uptake of these beads.
They did this using a _high content_ imager which automates the process of scanning a plate, detecting cell boundaries, and _spots_ of flourescing beads.

This imager returns a giant spreadsheet where each cell is a row and the columns are the cell area, bead count, and intensity.
This dataset of a single 96-well plate has over **315,000 cells** measured across 60 samples of 20 conditions performed in triplicate.

---------------------------------------------

In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

%matplotlib inline

In [None]:
cell_level_data = pd.read_csv('pHrodo_DMEM.csv')
cell_level_data.head()

## Sumarize by sample

### Q1: How many cells are in each well?

In [None]:
# Use `groupby` to count the number of cells per well

cells_per_well = ...
cells_per_well.describe()

In [None]:
grader.check("q1_cells_per_well")

In [None]:
cells_per_well.plot(kind='box')

The count ranges from 257 to 794 with an average of 525 cells per well.

## Measuring phagocytosis

Each cell can take up 0 or more pH beads.
Our biological question is whether dopamine changes the amount of beads that are taken up by the cells.

In [None]:
sns.histplot(data = cell_level_data,
             x = 'SpotCountCh2',
             bins = np.arange(0, 100),
             stat = 'percent')

From our graph, we can see that most cells took up 0 beads and then about 10% took up 1, ~5% took up two, etc.

We hypothesize that dopamine treatment will increase the average number of beads taken up by cells.

In [None]:
# Visually
sns.barplot(data = cell_level_data,
            y = 'Well',
            x = 'SpotCountCh2')

The length of the bars indicates the average number of spots per cell while the black hashes indicate the 95% CI of that estimate.

In [None]:
# Numerically
well_level_data = cell_level_data.groupby('Well')['SpotCountCh2'].agg(['mean', 'sem', 'count'])
well_level_data.head()

## Decoding samples

Up to now we've been treating all of our without knowing which treatment they came from.
Now that we've collapsed our data to a single representative number for each sample, we can merge with our plate map.

In [None]:
# Load in plate map
plate_map = pd.read_csv('plate_map.csv')

# Treat concentration as a category instead of a number
plate_map['pHrodo_conc_ug'] = pd.Categorical(plate_map['pHrodo_conc_ug'])

plate_map.head()

This function helps visualize how the plate is layed out.

In [None]:
def fancy_pivot(df):
    import re
    # Extract row letters and column numbers from the 'well' column
    df['row'] = df['well'].apply(lambda x: re.match(r'([A-H])', x).group(1))
    df['col'] = df['well'].apply(lambda x: int(re.match(r'[A-H]([0-9]{1,2})', x).group(1)))

    # Concatenate all other columns as 'V1-V2-V3' format
    value_columns = [col for col in df.columns if col not in ['well', 'row', 'col']]
    df['values'] = df[value_columns].astype(str).agg('-'.join, axis=1)

    # Create pivot table with aggfunc as 'first'
    pivot_table = df.pivot_table(index='row', columns='col', values='values', aggfunc='first')

    return pivot_table


fancy_pivot(plate_map)

Merge the plate map with the well level aggregates.

In [None]:
sample_level_data = pd.merge(plate_map, well_level_data,
                             left_on = 'well', right_index = True)
sample_level_data.head()

Now we can visualize the well level aggregates by the treatment condition.

In [None]:
ax = sns.barplot(data = sample_level_data,
            hue = 'pHrodo_conc_ug',
            x = 'DA_Tx',
            y = 'mean', errorbar=('se', 2), alpha=0.5)

sns.stripplot(data = sample_level_data,
            hue = 'pHrodo_conc_ug',
            x = 'DA_Tx',
            y = 'mean', dodge=True, legend=False, ax=ax)

ax.set_ylabel('mean(bead count)')

### Q2: Describe the graph

_Points:_ 5

In [None]:
# Which experimental condition (pHrodo_conc_ug) had less noise in the measurement?
# Answer 5.0 or 7.5
q2a = ...


In [None]:
# Does this graph show evidence that dopamine increases the amount of beads phagocytosed?
# Anwser 'yes' or 'no'
q2b = ...


In [None]:
grader.check("q2_graph")

In the next few weeks well cover strategies to quantify our hypothesis using techniques like ANOVAs and multiple regression.

---------------------------------------------

## Submission

Check:
 - That all tables and graphs are rendered properly.
 - Code completes without errors by using `Restart & Run All`.
 - All checks **pass**.
 
Then save the notebook and the `File` -> `Download` -> `Download .ipynb`. Upload this file to BBLearn.