# I've turned off saving to file for now. 

In [1]:
import pandas as pd
from pandas import DataFrame as df
from toolz import assoc
from raw import create_well_df,\
                generate_sid,\
                get_cell_data,\
                cell_config

In [2]:
configs = [{'path': '/notebooks/moldev-data/original/07-15-2015/[ ID_1 ] PNA probe 50-6 nM ssC HS JS (60X) 07.13.2015.txt',
            'savepath': '/notebooks/moldev-data/analyzed/07-15-2015/ssC PNA.csv',
            'label': 'PNA ssC'},
           {'path': '/notebooks/moldev-data/original/07-15-2015/[ ID_2 ] PNA probe 50-6 nM APB HS JS (60X) 07.15.2015.txt',
            'savepath': '/notebooks/moldev-data/analyzed/07-15-2015/APB PNA.csv',
            'label': 'PNA APB'}]

Here are the latest data files. One file per slide (one APB slide and one ssC). Row A = U2OS and Row B = 143B for all slides. Column 1 = 50 nM, Column 2 = 25 nM, Column 3 = 12.5 nM and Column 4 = 6.25 nM of PNA probe for all slides. Run these in the current DATAVIS version. I am very interested in seeing what the nuclear integrated intensity distributions look like in these data so that we can do cell cycle phase separation ASAP.
 
Jonah, please correct any of the above if I’ve made a mistake but I think that is accurate in terms of labeling and orientation.

In [3]:
cell_dict = {"U2OS": ['A01','A02','A03','A04'],
             "143B": ['B01','B02','B03','B04']}

dilutions = ['50 nM','25 nM','12.5 nM','6.25 nM']

date = '07-15-2015'

In [4]:
# {Plate Name: [String],Concentration:[String]} -> DataFrame[Plate Name, Plate ID, Well Name, Concentration]
def create_dilution_df(c):
    well_numbers = ['01','02','03','04']

    x = pd.concat([df({'Concentration':c,
                       '_Well Number': well_numbers,
                       '_Well Letter': well_letter}) \
                    for well_letter in ['A','B']])
    
    x['Well Name'] = x['_Well Letter'] + x['_Well Number']
    x = x.drop(['_Well Letter', '_Well Number'],axis=1)    
    x['Plate ID'] = generate_sid()
    return x

In [5]:
cell_types = create_well_df(cell_dict)

In [6]:
concentrations = create_dilution_df(dilutions)

In [7]:
condition_lookup = pd.merge(concentrations,cell_types,on=['Well Name'])
condition_lookup['Date'] = date

In [8]:
# {path:String, savepath: String, label: String} -> SideEffects(SaveFile)
def analyze_and_save(c):
    """ Clean and save raw LNA and PNA data. """
    my_cell_config = assoc(cell_config,'path',c['path'])
    data = pd.merge(get_cell_data(my_cell_config).rename(columns={"Plate ID":"Plate Name"}),
                    condition_lookup,
                    on = 'Well Name')
    
    data['Condition'] = 'ssC' + ' ' + c['label'] + ' ' + data['Concentration'] + ' ' + data['Cell Type']

    # Write to file
    data.to_csv(c['savepath'],index=False)
    return data

In [9]:
# analyze_and_save(configs[0])
dataframes = map(analyze_and_save,configs)

In [10]:
sorted(dataframes[1].columns.tolist())

['# of Coloc Spots',
 '# of FITC in ColocSpots',
 '# of FITC spots',
 '# of FITC-TxRed all spots',
 '# of FITC-TxRed in ColocSpots',
 '# of TxRed in ColocSpots',
 '# of TxRed spots',
 'Cell Type',
 'ColocSpots_area_avg',
 'ColocSpots_area_sum',
 'Concentration',
 'Condition',
 'Date',
 'FITC-TxRed_all_area_avg',
 'FITC-TxRed_all_area_sum',
 'FITC-TxRed_coloc_area_avg',
 'FITC-TxRed_coloc_area_sum',
 'FITC_all_IntIntensity_avg',
 'FITC_all_IntIntensity_sum',
 'FITC_all_Intensity_avg',
 'FITC_all_Intensity_sum',
 'FITC_all_area_avg',
 'FITC_all_area_sum',
 'FITC_coloc_IntIntensity_avg',
 'FITC_coloc_IntIntensity_sum',
 'FITC_coloc_Intensity_avg',
 'FITC_coloc_Intensity_sum',
 'FITC_coloc_area_avg',
 'FITC_coloc_area_sum',
 'Normalized coloc spots (by FITC & TxRed)',
 'Normalized coloc spots (by FITC in coloc)',
 'Normalized coloc spots (by FITC)',
 'Normalized coloc spots (by FITC-TxRed in coloc)',
 'Normalized coloc spots (by TxRed in coloc)',
 'Normalized coloc spots (by TxRed)',
 'Nor