In [1]:
execfile('../src/utils.py')
execfile('../src/cellslib.py')
from toolz import assoc

In [2]:
configs = [{'path': '/notebooks/moldev-data/original/07-15-2015/[ ID_1 ] PNA probe 50-6 nM ssC HS JS (60X) 07.13.2015.txt',
            'savepath': '/notebooks/moldev-data/analyzed/07-15-2015/ssC PNA-Refactor.csv',
            'probe': 'PNA',
            'assay': 'ssC'},
           {'path': '/notebooks/moldev-data/original/07-15-2015/[ ID_2 ] PNA probe 50-6 nM APB HS JS (60X) 07.15.2015.txt',
            'savepath': '/notebooks/moldev-data/analyzed/07-15-2015/APB PNA-Refactor.csv',
            'probe': 'PNA',
            'assay': 'APB'}]

Here are the latest data files. One file per slide (one APB slide and one ssC). Row A = U2OS and Row B = 143B for all slides. Column 1 = 50 nM, Column 2 = 25 nM, Column 3 = 12.5 nM and Column 4 = 6.25 nM of PNA probe for all slides. Run these in the current DATAVIS version. I am very interested in seeing what the nuclear integrated intensity distributions look like in these data so that we can do cell cycle phase separation ASAP.
 
Jonah, please correct any of the above if I’ve made a mistake but I think that is accurate in terms of labeling and orientation.

In [3]:
cell_dict = {"U2OS": ['A01','A02','A03','A04'],
             "143B": ['B01','B02','B03','B04']}

concentrations = [50,25,12.5,6.2]
units = 'nM'

date = '07-15-2015'

In [4]:
# Concentration:[String]} -> DataFrame[Plate Name, Plate ID, Well Name, Concentration,Unit]
def create_dilution_df(concentrations,unit):
    well_numbers = ['01','02','03','04']

    x = pd.concat([df({'Concentration':concentrations,
                       '_Well Number': well_numbers,
                       '_Well Letter': well_letter}) \
                    for well_letter in ['A','B']])
    
    x['Well Name'] = x['_Well Letter'] + x['_Well Number']
    x = x.drop(['_Well Letter', '_Well Number'],axis=1)    
    x['Plate ID'] = generate_sid()
    x['Concentration Units'] = unit
    return x

In [5]:
cell_types = create_well_df(cell_dict)

In [6]:
concentrations_df = create_dilution_df(concentrations,units)

In [7]:
condition_lookup = pd.merge(concentrations_df,cell_types,on=['Well Name'])
condition_lookup['Date'] = date

In [8]:
condition_lookup

Unnamed: 0,Concentration,Well Name,Plate ID,Concentration Units,Cell Type,Date
0,50.0,A01,6b4e81e3fe2a,nM,U2OS,07-15-2015
1,25.0,A02,6b4e81e3fe2a,nM,U2OS,07-15-2015
2,12.5,A03,6b4e81e3fe2a,nM,U2OS,07-15-2015
3,6.2,A04,6b4e81e3fe2a,nM,U2OS,07-15-2015
4,50.0,B01,6b4e81e3fe2a,nM,143B,07-15-2015
5,25.0,B02,6b4e81e3fe2a,nM,143B,07-15-2015
6,12.5,B03,6b4e81e3fe2a,nM,143B,07-15-2015
7,6.2,B04,6b4e81e3fe2a,nM,143B,07-15-2015


In [9]:
# {path:String, savepath: String, label: String} -> SideEffects(SaveFile)
def analyze_and_save(c):
    """ Clean and save raw LNA and PNA data. """
    my_cell_config = assoc(cell_config,'path',c['path'])
    data = pd.merge(get_cell_data(my_cell_config).rename(columns={"Plate ID":"Plate Name"}),
                    condition_lookup,
                    on = 'Well Name')
    
    data['Assay'] = c['assay']
    data['Probe'] = c['probe']

    # Write to file
    data.to_csv(c['savepath'],index=False)
    return data

In [10]:
# String -> String
def rename_column(col):
    """ Rename column col to remove whitespace, backslashes, prefixes,
        and suffixes (esp. large parenthetic suffix). """
    if col.startswith('Cell:'):
        return col.split('(')[0].lstrip("Cell:").rstrip('/').strip(' ')
    else:
        return col.split('(')[0].rstrip('/').strip(' ')

cell_config = dict(
    plate_delimiter = "ATF",
    delimiter = '\t',
    skiprows = 4,
    dropcols = ['Cell ID',
                'Site ID',
                'MEASUREMENT SET ID',
                '.*ObjectID.*',
                'Laser focus score',
                '\.[0-9]*\Z'],
    normcols = [['Normalized_ColocSpot_area_sum (coloc)',
                  ['ColocSpots_area_sum'],
                  ['FITC-TxRed_coloc_area_sum']],
                ['Normalized_ColocSpot_area_sum (all)',
                  ['ColocSpots_area_sum'],
                  ['FITC-TxRed_all_area_sum']],
        
                ['Normalized coloc spots (by FITC & TxRed)',
                  ['# of Coloc Spots'],
                  ['# of FITC spots', '# of TxRed spots']],
                ['Normalized coloc spots (by FITC)',
                  ['# of Coloc Spots'],
                  ['# of FITC spots']],
                ['Normalized coloc spots (by TxRed)',
                  ['# of Coloc Spots'],
                  ['# of TxRed spots']]],
    colrename = rename_column,
    check = identity
    )

In [11]:
# analyze_and_save(configs[0])
dataframes = map(analyze_and_save,configs)

In [12]:
sorted(dataframes[0].columns.tolist())

['# of Coloc Spots',
 '# of FITC in ColocSpots',
 '# of FITC spots',
 '# of FITC-TxRed all spots',
 '# of FITC-TxRed in ColocSpots',
 '# of TxRed in ColocSpots',
 '# of TxRed spots',
 'Assay',
 'Cell Type',
 'ColocSpots_area_avg',
 'ColocSpots_area_sum',
 'Concentration',
 'Concentration Units',
 'Date',
 'FITC-TxRed_all_area_avg',
 'FITC-TxRed_all_area_sum',
 'FITC-TxRed_coloc_area_avg',
 'FITC-TxRed_coloc_area_sum',
 'FITC_all_IntIntensity_avg',
 'FITC_all_IntIntensity_sum',
 'FITC_all_Intensity_avg',
 'FITC_all_Intensity_sum',
 'FITC_all_area_avg',
 'FITC_all_area_sum',
 'FITC_coloc_IntIntensity_avg',
 'FITC_coloc_IntIntensity_sum',
 'FITC_coloc_Intensity_avg',
 'FITC_coloc_Intensity_sum',
 'FITC_coloc_area_avg',
 'FITC_coloc_area_sum',
 'Normalized coloc spots (by FITC & TxRed)',
 'Normalized coloc spots (by FITC)',
 'Normalized coloc spots (by TxRed)',
 'Normalized_ColocSpot_area_sum (all)',
 'Normalized_ColocSpot_area_sum (coloc)',
 'NucArea_Avg',
 'NucIntegrated Intensity_Avg',
