In [1]:
import os
import sys
import time
import numpy as np
import pandas as pd
import tensorflow as tf
import keras
import keras.backend as K
import matplotlib
import seaborn as sns
import matplotlib.pyplot as plt
from keras.optimizers import SGD, Adam
from keras.models import Sequential
from keras.regularizers import l1_l2
from sklearn.metrics import accuracy_score
from collections import OrderedDict, Counter
from tensorflow.python.ops import gen_array_ops
from keras.layers import Dense, Dropout, Flatten
from names import Names as n

In [2]:
pd.set_option('display.max_columns', 500)
pd.set_option('display.width', 10000)
pd.set_option('display.max_colwidth', None)
matplotlib.use("tkagg")
%matplotlib inline

In [3]:
cfu_means = pd.read_csv("cfu_means_v2.csv")

In [4]:
duke_v2 = pd.read_csv(
    "experiment_data/raw/Duke-YeastSTATES-Ethanol-TS-2-0-LiveDeadClassification/processed_duke_2-0_data.csv")
duke_v2["source"] = "duke_v2"


In [5]:
# drop irrelevant columns
duke_v2.drop(list(duke_v2.filter(regex = 'BL')), axis = 1, inplace = True)
duke_v2.drop(list(duke_v2.filter(regex = 'YL2')), axis = 1, inplace = True)
duke_v2.drop(list(duke_v2.filter(regex = 'YL3')), axis = 1, inplace = True)
duke_v2.drop(list(duke_v2.filter(regex = 'YL4')), axis = 1, inplace = True)

In [6]:
strateos = pd.read_csv("experiment_data/raw/sytoxYeastSTATES-LiveDeadClassification-placeholder/pipeline_data.csv")
strateos.rename(columns = {"ethanol": n.inducer_concentration, "time_point": n.timepoint}, inplace=True)
strateos[n.timepoint] = strateos[n.timepoint]/2

In [7]:
strateos[n.inducer_concentration].value_counts()

0.0     2191947
10.0    1556805
20.0    1548106
80.0    1453483
15.0    1125385
Name: inducer_concentration, dtype: int64

In [8]:
# might want to change to left join
strateos = pd.merge(strateos, cfu_means, how="inner", on=["inducer_concentration", "timepoint"])
strateos.sort_values(by=["inducer_concentration", "timepoint"], inplace=True)
strateos.drop(columns="arbitrary_index", inplace=True)
strateos.drop(columns = ["FSC-A", "SSC-A", "BL1-A", "RL1-A", 
                         "FSC-H", "SSC-H", "BL1-H", "RL1-H", 
                         "FSC-W", "SSC-W", "BL1-W", "RL1-W"], inplace=True)
strateos.columns = strateos.columns.str.lstrip('log_')
strateos["source"] = "strateos"

# Remove timepoints that are not 0.0, 0.5, 3.0, or 6.0
strateos = strateos.loc[strateos[n.timepoint].isin([0.0, 0.5, 3.0, 6.0])]


In [9]:
strateos.drop(list(strateos.filter(regex = 'BL')), axis = 1, inplace = True)
strateos

Unnamed: 0,stain,inducer_concentration,timepoint,FSC-A,FSC-H,FSC-W,SSC-A,SSC-H,SSC-W,RL1-A,RL1-H,RL1-W,percent_live,source
826251,1,0.0,0.5,6.020599,4.967066,3.009876,6.020599,4.913613,3.009876,3.341632,2.193125,0.0,98.776667,strateos
826252,1,0.0,0.5,4.880007,4.790356,1.591065,4.548721,4.450695,1.477121,2.127105,1.991226,0.0,98.776667,strateos
826253,1,0.0,0.5,4.929879,4.819866,1.612784,4.629909,4.518553,1.518514,0.602060,2.060698,0.0,98.776667,strateos
826254,1,0.0,0.5,4.937117,4.851020,1.602060,4.684271,4.586632,1.531479,1.939519,2.012837,0.0,98.776667,strateos
826255,1,0.0,0.5,4.680154,4.669940,1.477121,4.560158,4.546617,1.431364,2.060698,2.033424,0.0,98.776667,strateos
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
505646,1,80.0,6.0,4.121822,3.756712,0.000000,2.887617,2.685742,0.000000,0.000000,1.732394,0.0,0.000000,strateos
505647,1,80.0,6.0,4.981198,3.822037,0.000000,0.000000,2.838849,0.000000,0.000000,2.025306,0.0,0.000000,strateos
505648,1,80.0,6.0,5.295772,3.789792,0.000000,0.000000,2.615950,0.000000,0.000000,2.086360,0.0,0.000000,strateos
505649,1,80.0,6.0,5.471633,3.768120,0.000000,0.000000,2.840106,0.000000,0.000000,2.127105,0.0,0.000000,strateos


# Combining Strateos and Duke 2.0 data (all ethanol concentrations at 4 TPs)

In [10]:
duke_v2_and_strateos_combined = pd.concat([duke_v2,
                                           strateos.rename(columns=
                                                           {"RL1-A": "YL1-A", "RL1-H": "YL1-H", "RL1-W": "YL1-W"})])

In [11]:
duke_v2_and_strateos_combined

Unnamed: 0,inducer_concentration,timepoint,stain,FSC-A,SSC-A,YL1-A,FSC-H,SSC-H,YL1-H,FSC-W,SSC-W,YL1-W,percent_live,source
0,0.0,0.0,1.0,6.020600,6.020600,4.900930,5.764440,5.411980,2.940520,3.00988,3.00988,0.0,78.633333,duke_v2
1,0.0,0.0,1.0,5.389450,4.592750,2.597700,5.371090,4.594220,2.414970,1.96379,1.87506,0.0,78.633333,duke_v2
2,0.0,0.0,1.0,5.450140,4.883100,2.403120,5.415370,4.848330,2.233000,1.94448,1.89763,0.0,78.633333,duke_v2
3,0.0,0.0,1.0,5.644360,5.118360,2.866290,5.583750,5.063450,2.638490,1.95424,1.94448,0.0,78.633333,duke_v2
4,0.0,0.0,1.0,5.258560,4.683070,1.806180,5.259830,4.681420,2.181840,1.90309,1.86332,0.0,78.633333,duke_v2
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
505646,80.0,6.0,1.0,4.121822,2.887617,0.000000,3.756712,2.685742,1.732394,0.00000,0.00000,0.0,0.000000,strateos
505647,80.0,6.0,1.0,4.981198,0.000000,0.000000,3.822037,2.838849,2.025306,0.00000,0.00000,0.0,0.000000,strateos
505648,80.0,6.0,1.0,5.295772,0.000000,0.000000,3.789792,2.615950,2.086360,0.00000,0.00000,0.0,0.000000,strateos
505649,80.0,6.0,1.0,5.471633,0.000000,0.000000,3.768120,2.840106,2.127105,0.00000,0.00000,0.0,0.000000,strateos


# Save out final csvs

In [12]:
# duke_v2.to_csv("experiment_data/processed/duke_v2.csv", index=False)
# duke_v2_and_strateos_combined.to_csv("experiment_data/processed/duke_v2_and_strateos_combined.csv", index=False)

In [13]:
duke_v2

Unnamed: 0,inducer_concentration,timepoint,stain,FSC-A,SSC-A,YL1-A,FSC-H,SSC-H,YL1-H,FSC-W,SSC-W,YL1-W,percent_live,source
0,0.0,0.0,1.0,6.02060,6.02060,4.90093,5.76444,5.41198,2.94052,3.00988,3.00988,0.00000,78.633333,duke_v2
1,0.0,0.0,1.0,5.38945,4.59275,2.59770,5.37109,4.59422,2.41497,1.96379,1.87506,0.00000,78.633333,duke_v2
2,0.0,0.0,1.0,5.45014,4.88310,2.40312,5.41537,4.84833,2.23300,1.94448,1.89763,0.00000,78.633333,duke_v2
3,0.0,0.0,1.0,5.64436,5.11836,2.86629,5.58375,5.06345,2.63849,1.95424,1.94448,0.00000,78.633333,duke_v2
4,0.0,0.0,1.0,5.25856,4.68307,1.80618,5.25983,4.68142,2.18184,1.90309,1.86332,0.00000,78.633333,duke_v2
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1613456,80.0,6.0,0.0,5.01983,5.63438,2.68124,5.02845,5.61526,2.58320,1.83885,1.91908,0.00000,0.000000,duke_v2
1613457,80.0,6.0,0.0,4.78530,5.35882,3.03941,4.77970,5.33517,3.08207,1.83251,1.88649,1.04139,0.000000,duke_v2
1613458,80.0,6.0,0.0,4.99094,5.07843,3.13577,4.91230,5.03511,3.12418,1.88649,1.89209,1.17609,0.000000,duke_v2
1613459,80.0,6.0,0.0,4.56064,5.69383,2.90309,4.55811,5.69243,2.91169,1.77815,1.83251,0.00000,0.000000,duke_v2
