In [6]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

from sklearn.model_selection import train_test_split

import statsmodels.formula.api as smf
from sklearn.ensemble import RandomForestClassifier
import numpy as np
from sklearn.linear_model import LinearRegression, LogisticRegression

from sklearn.preprocessing import StandardScaler
from sklearn.metrics import accuracy_score, matthews_corrcoef
from kneed import KneeLocator

from matplotlib.lines import Line2D
from matplotlib.patches import Patch

In [7]:
phase2_df1=pd.read_csv('phase2_df2.csv',low_memory=False)

In [8]:
from scipy import interpolate
from scipy.stats import rankdata
def normalize_quantiles(A, ties=True):
    A = np.asarray(A, dtype=np.float64)
    n_rows, n_cols = A.shape
    if n_cols == 1:
        return A.copy()

    i = np.linspace(0, 1, n_rows)
    S = np.full((n_rows, n_cols), np.nan)
    nobs = np.zeros(n_cols, dtype=int)
    sort_idx = []

    for j in range(n_cols):
        col = A[:, j]
        not_nan = ~np.isnan(col)
        x = col[not_nan]
        nobs[j] = len(x)
        sort_order = np.argsort(x)
        sorted_x = x[sort_order]

        if nobs[j] < n_rows:
            f = interpolate.interp1d(np.linspace(0, 1, nobs[j]), sorted_x,
                                     bounds_error=False, fill_value="extrapolate")
            S[:, j] = f(i)
        else:
            S[:, j] = sorted_x

        sort_idx.append(np.argsort(np.argsort(col[not_nan])))

    m = np.nanmean(S, axis=1)
    A_out = np.full_like(A, np.nan)

    for j in range(n_cols):
        col = A[:, j]
        not_nan = ~np.isnan(col)

        if ties:
            r = rankdata(col[not_nan], method='average')
            quant_pos = (r - 1) / (nobs[j] - 1)
            f = interpolate.interp1d(i, m, bounds_error=False, fill_value="extrapolate")
            A_out[not_nan, j] = f(quant_pos)
        else:
            ranks = sort_idx[j]
            A_out[not_nan, j] = m[ranks.astype(int)]

    return A_out

In [9]:
# Step 1: Filter data for the three plates
plates = ['32v1', '32v2', '32v3']
df_30v =phase2_df1[phase2_df1['plate'].isin(plates)]

# Step 2: Count rows per (plate, mutant_ID, mutated_genes, light_regime)
group_counts = (
    df_30v.groupby(['plate', 'light_regime', 'mutant_ID', 'mutated_genes'])
    .size()
    .reset_index(name='count')
)

# Step 3: For each plate and light_regime, count how many mutants had 1, 2, ... rows
summary = (
    group_counts.groupby(['light_regime','plate', 'count'])
    .size()
    .reset_index(name='n_mutants')
)

# Optional: Sort for easier reading
summary = summary.sort_values(by=['light_regime','plate', 'count'])

# Show result
summary

Unnamed: 0,light_regime,plate,count,n_mutants
0,10min-10min,32v1,1,373
1,10min-10min,32v1,2,2
2,10min-10min,32v1,6,1
3,10min-10min,32v2,1,373
4,10min-10min,32v2,2,2
5,10min-10min,32v2,6,1
6,10min-10min,32v3,1,373
7,10min-10min,32v3,2,2
8,10min-10min,32v3,6,1
9,1min-1min,32v1,1,373


### plate 32 20_ML

In [10]:
def quantile_normalize_light_regime(df, light_regime, plates, y2_cols, tie_handling=True):
    """
    Quantile-normalize all y2_cols across selected plates within a given light regime.
    
    Parameters:
    - df: pandas DataFrame, full dataset
    - light_regime: str, target light regime (e.g. '20h_ML')
    - plates: list of str, target plate names (e.g. ['30v1', '30v2', '30v3'])
    - y2_cols: list of str, column names like ['y2_1', ..., 'y2_44']
    - tie_handling: bool, passed to normalize_quantiles (default=True)
    
    Returns:
    - df_normalized: pandas DataFrame with normalized y2_cols
    """
    # Filter data
    subset_df = df[(df['light_regime'] == light_regime) & (df['plate'].isin(plates))].copy()
    df_normalized = subset_df.copy()

    for timepoint in y2_cols:
        position_values = []
        valid_plate_indices = {}

        for plate in plates:
            plate_df = subset_df[subset_df['plate'] == plate].copy()

            wt_rows = plate_df[plate_df['mutant_ID'] == 'WT'].copy()
            non_wt_rows = plate_df[plate_df['mutant_ID'] != 'WT'].copy()

            wt_rows = wt_rows.sort_values(['mutant_ID', 'mutated_genes', 'well_id'])
            non_wt_rows = non_wt_rows.sort_values(['mutant_ID', 'mutated_genes'])

            sorted_df = pd.concat([wt_rows, non_wt_rows], axis=0)
            values = sorted_df[timepoint].values
            index = sorted_df.index.values

            position_values.append(values)
            valid_plate_indices[plate] = index

        # Validate shape
        lengths = [len(v) for v in position_values]
        if len(set(lengths)) != 1:
            raise ValueError(f"Length mismatch at {timepoint}: {lengths}")

        matrix = np.column_stack(position_values)
        normalized_matrix = normalize_quantiles(matrix, ties=tie_handling)

        # Write back
        for col_idx, plate in enumerate(plates):
            indices = valid_plate_indices[plate]
            df_normalized.loc[indices, timepoint] = normalized_matrix[:, col_idx]

    return df_normalized

In [11]:
# Define inputs
plates = ['32v1', '32v2', '32v3']
y2_cols = [f'y2_{i}' for i in range(1, 45)]

# Run normalization
phase2_32_20h_ML_normalized = quantile_normalize_light_regime(
    df=phase2_df1,
    light_regime='20h_ML',
    plates=plates,
    y2_cols=y2_cols
)

# View a few columns
phase2_32_20h_ML_normalized[['plate', 'mutant_ID', 'mutated_genes', 'well_id'] + y2_cols]

Unnamed: 0,plate,mutant_ID,mutated_genes,well_id,y2_1,y2_2,y2_3,y2_4,y2_5,y2_6,...,y2_35,y2_36,y2_37,y2_38,y2_39,y2_40,y2_41,y2_42,y2_43,y2_44
12256,32v1,LMJ.RY0402.039093,Cre13.g588959,A02,0.337125,0.363232,0.398918,0.380890,0.368193,0.419755,...,0.342701,0.362153,0.368912,0.313220,0.375931,0.332024,0.333768,0.350294,0.330328,0.353977
12257,32v1,LMJ.RY0402.039746,Cre02.g080900,A03,0.283043,0.311643,0.304078,0.313081,0.327203,0.308105,...,0.271680,0.310449,0.283618,0.292698,0.292929,0.275747,0.280295,0.294327,0.283409,0.294456
12258,32v1,LMJ.RY0402.039953,Cre02.g095137,A04,0.396636,0.395912,0.435744,0.398947,0.398943,0.406413,...,0.375729,0.394835,0.388213,0.344398,0.354433,0.357811,0.362712,0.352900,0.348287,0.366722
12259,32v1,LMJ.RY0402.040368,Cre06.g278251,A05,0.313968,0.278032,0.276668,0.323595,0.292500,0.301708,...,0.230075,0.247179,0.242560,0.249665,0.280857,0.225356,0.261917,0.223305,0.213400,0.241634
12260,32v1,LMJ.RY0402.040792,Cre02.g098750,A06,0.240562,0.237090,0.223290,0.241257,0.213918,0.213268,...,0.197040,0.179266,0.168681,0.130283,0.180278,0.194464,0.188764,0.206148,0.161686,0.189904
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
21443,32v3,LMJ.RY0402.199716,Cre14.g621800,P20,0.288520,0.297833,0.210684,0.281150,0.257214,0.282181,...,0.262114,0.205864,0.228469,0.206820,0.252099,0.246684,0.249552,0.249126,0.213400,0.249545
21444,32v3,LMJ.RY0402.056457,Cre09.g414650,P21,0.305180,0.323064,0.332706,0.301640,0.314474,0.340119,...,0.273042,0.258299,0.266638,0.268242,0.277853,0.257704,0.270768,0.297889,0.297905,0.262651
21445,32v3,LMJ.RY0402.208970,Cre07.g314800,P22,0.352598,0.366429,0.355321,0.367571,0.396334,0.386003,...,0.312835,0.313746,0.312711,0.287273,0.288777,0.288588,0.299556,0.323113,0.329710,0.307898
21446,32v3,LMJ.RY0402.193328,Cre17.g720261,P23,0.435735,0.446749,0.435430,0.453330,0.483821,0.460453,...,0.403267,0.361215,0.412177,0.406346,0.390563,0.376439,0.413339,0.407677,0.367324,0.404149


In [12]:
plates = ['32v1', '32v2','32v3']
phase2_32_20h_ML= phase2_df1[(phase2_df1['light_regime'] == '20h_ML') & (phase2_df1['plate'].isin(plates))].copy()
phase2_32_20h_ML[['plate', 'mutant_ID', 'mutated_genes', 'well_id'] + y2_cols[:10]]

Unnamed: 0,plate,mutant_ID,mutated_genes,well_id,y2_1,y2_2,y2_3,y2_4,y2_5,y2_6,y2_7,y2_8,y2_9,y2_10
12256,32v1,LMJ.RY0402.039093,Cre13.g588959,A02,0.328958,0.362116,0.393752,0.372117,0.353067,0.405465,0.382093,0.385411,0.396056,0.386739
12257,32v1,LMJ.RY0402.039746,Cre02.g080900,A03,0.276557,0.310534,0.300252,0.312464,0.316510,0.296851,0.304169,0.354912,0.328165,0.319438
12258,32v1,LMJ.RY0402.039953,Cre02.g095137,A04,0.386559,0.390963,0.425387,0.393451,0.385529,0.395380,0.393000,0.421855,0.398299,0.416593
12259,32v1,LMJ.RY0402.040368,Cre06.g278251,A05,0.305052,0.282208,0.268800,0.319962,0.285988,0.289722,0.308574,0.316284,0.344478,0.295235
12260,32v1,LMJ.RY0402.040792,Cre02.g098750,A06,0.237172,0.212957,0.229862,0.215166,0.204500,0.198787,0.270489,0.225967,0.232496,0.221643
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
21443,32v3,LMJ.RY0402.199716,Cre14.g621800,P20,0.300777,0.303842,0.215373,0.293134,0.279863,0.296507,0.260814,0.224774,0.361449,0.285651
21444,32v3,LMJ.RY0402.056457,Cre09.g414650,P21,0.315325,0.329659,0.331942,0.318189,0.331144,0.347155,0.359735,0.323326,0.361956,0.344891
21445,32v3,LMJ.RY0402.208970,Cre07.g314800,P22,0.361226,0.370656,0.358248,0.380087,0.409866,0.391510,0.398512,0.371397,0.405084,0.407425
21446,32v3,LMJ.RY0402.193328,Cre17.g720261,P23,0.446211,0.460063,0.442745,0.467111,0.500331,0.471448,0.462747,0.463903,0.465502,0.482955


### plate 32 20_HL

In [13]:
# Define inputs
plates = ['32v1', '32v2', '32v3']
y2_cols = [f'y2_{i}' for i in range(1, 45)]

# Run normalization
phase2_32_20h_HL_normalized = quantile_normalize_light_regime(
    df=phase2_df1,
    light_regime='20h_HL',
    plates=plates,
    y2_cols=y2_cols
)

# View a few columns
phase2_32_20h_HL_normalized[['plate', 'mutant_ID', 'mutated_genes', 'well_id'] + y2_cols]

Unnamed: 0,plate,mutant_ID,mutated_genes,well_id,y2_1,y2_2,y2_3,y2_4,y2_5,y2_6,...,y2_35,y2_36,y2_37,y2_38,y2_39,y2_40,y2_41,y2_42,y2_43,y2_44
11873,32v1,LMJ.RY0402.039093,Cre13.g588959,A02,0.210520,0.262727,0.209611,0.220226,0.232161,0.217410,...,0.126189,0.154638,0.165312,0.133385,0.158931,0.129923,0.111523,0.105005,0.126432,0.148726
11874,32v1,LMJ.RY0402.039746,Cre02.g080900,A03,0.132316,0.188372,0.155010,0.131422,0.133374,0.196017,...,0.074847,0.052238,0.029327,0.076208,0.112132,0.071806,0.076690,0.047802,0.156983,0.076885
11875,32v1,LMJ.RY0402.039953,Cre02.g095137,A04,0.219689,0.214836,0.196767,0.250358,0.240134,0.286552,...,0.147969,0.188517,0.113156,0.169510,0.157031,0.140712,0.139802,0.160218,0.160100,0.110718
11876,32v1,LMJ.RY0402.040368,Cre06.g278251,A05,0.190377,0.169846,0.161564,0.172012,0.121879,0.147018,...,0.057678,0.083684,0.043648,-0.017829,0.020502,0.058709,0.051593,0.028634,0.050620,0.063825
11877,32v1,LMJ.RY0402.040792,Cre02.g098750,A06,0.097865,0.114104,0.114230,0.098581,0.129543,0.090251,...,-0.036126,0.066299,0.047815,0.012297,0.017691,0.001833,0.024192,0.056428,0.044984,0.007886
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
21060,32v3,LMJ.RY0402.199716,Cre14.g621800,P20,0.131002,0.150676,0.122701,0.104945,0.139163,0.229963,...,0.137320,0.039760,0.051490,0.088100,0.039589,0.044749,0.098074,0.101099,0.054006,0.086706
21061,32v3,LMJ.RY0402.056457,Cre09.g414650,P21,0.157654,0.184011,0.192018,0.201060,0.190937,0.102434,...,0.099273,0.099030,0.085525,0.115966,0.068982,0.096449,0.107245,0.127901,0.073197,0.087952
21062,32v3,LMJ.RY0402.208970,Cre07.g314800,P22,0.174419,0.198789,0.182175,0.158918,0.211351,0.199045,...,0.136406,0.121154,0.154138,0.160122,0.080333,0.101338,0.069064,0.134719,0.083709,0.125068
21063,32v3,LMJ.RY0402.193328,Cre17.g720261,P23,0.259248,0.291506,0.236702,0.229008,0.300614,0.269641,...,0.180812,0.173167,0.157586,0.208316,0.211248,0.166812,0.141156,0.180810,0.201677,0.189774


In [14]:
plates = ['32v1', '32v2','32v3']
phase2_32_20h_HL= phase2_df1[(phase2_df1['light_regime'] == '20h_HL') & (phase2_df1['plate'].isin(plates))].copy()
phase2_32_20h_HL[['plate', 'mutant_ID', 'mutated_genes', 'well_id'] + y2_cols[:10]]

Unnamed: 0,plate,mutant_ID,mutated_genes,well_id,y2_1,y2_2,y2_3,y2_4,y2_5,y2_6,y2_7,y2_8,y2_9,y2_10
11873,32v1,LMJ.RY0402.039093,Cre13.g588959,A02,0.194952,0.247895,0.193503,0.196964,0.217036,0.197219,0.202037,0.228075,0.127445,0.241193
11874,32v1,LMJ.RY0402.039746,Cre02.g080900,A03,0.112704,0.171467,0.136726,0.105732,0.113760,0.174634,0.171662,0.153645,0.136434,0.109463
11875,32v1,LMJ.RY0402.039953,Cre02.g095137,A04,0.206504,0.202089,0.181691,0.225025,0.223594,0.267681,0.257939,0.178387,0.137301,0.190893
11876,32v1,LMJ.RY0402.040368,Cre06.g278251,A05,0.178030,0.156143,0.143881,0.147130,0.108564,0.128174,0.128836,0.104669,0.112921,0.114593
11877,32v1,LMJ.RY0402.040792,Cre02.g098750,A06,0.073017,0.096578,0.092203,0.066138,0.111367,0.065858,0.017939,0.053192,0.041920,0.110379
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
21060,32v3,LMJ.RY0402.199716,Cre14.g621800,P20,0.137649,0.161222,0.140738,0.129108,0.151815,0.247921,0.216561,0.189336,0.157762,0.104232
21061,32v3,LMJ.RY0402.056457,Cre09.g414650,P21,0.161173,0.196101,0.192384,0.213265,0.199553,0.127500,0.158739,0.146971,0.161396,0.147736
21062,32v3,LMJ.RY0402.208970,Cre07.g314800,P22,0.173033,0.205997,0.185587,0.174385,0.217647,0.219741,0.182115,0.165758,0.230611,0.178679
21063,32v3,LMJ.RY0402.193328,Cre17.g720261,P23,0.258336,0.301222,0.236917,0.239698,0.304916,0.288488,0.215001,0.270252,0.220519,0.282791


### 32 plate 2h-2h


In [15]:
# Define inputs
plates = ['32v1', '32v2']
y2_cols = [f'y2_{i}' for i in range(1, 49)]

# Run normalization
phase2_32_2h_2h_normalized = quantile_normalize_light_regime(
    df=phase2_df1,
    light_regime='2h-2h',
    plates=plates,
    y2_cols=y2_cols
)

# View a few columns
phase2_32_2h_2h_normalized[['plate', 'mutant_ID', 'mutated_genes', 'well_id'] + y2_cols]

Unnamed: 0,plate,mutant_ID,mutated_genes,well_id,y2_1,y2_2,y2_3,y2_4,y2_5,y2_6,...,y2_39,y2_40,y2_41,y2_42,y2_43,y2_44,y2_45,y2_46,y2_47,y2_48
12639,32v1,LMJ.RY0402.039093,Cre13.g588959,A02,0.205219,0.175320,0.231087,0.217251,0.186917,0.160914,...,0.599387,0.621258,0.209373,0.155214,0.118449,0.146728,0.581482,0.617822,0.598872,0.601276
12640,32v1,LMJ.RY0402.039746,Cre02.g080900,A03,0.150126,0.146706,0.190581,0.162852,0.179625,0.080297,...,0.572335,0.537154,0.070759,0.072227,0.127631,0.129538,0.578228,0.562362,0.545823,0.587500
12641,32v1,LMJ.RY0402.039953,Cre02.g095137,A04,0.201709,0.120142,0.192320,0.222559,0.185540,0.188117,...,0.571353,0.604911,0.201137,0.205760,0.240452,0.205291,0.569671,0.597813,0.613906,0.580101
12642,32v1,LMJ.RY0402.040368,Cre06.g278251,A05,0.178123,0.103595,0.138644,0.157111,0.140499,0.124926,...,0.501988,0.532651,0.073538,0.064898,0.050177,0.087607,0.498211,0.516941,0.519456,0.483634
12643,32v1,LMJ.RY0402.040792,Cre02.g098750,A06,0.102557,0.081281,0.112830,0.008759,0.111709,0.063904,...,0.491418,0.465057,0.055056,0.038434,0.060362,0.045709,0.472206,0.454379,0.489737,0.448703
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
16847,32v2,LMJ.RY0402.243149,Cre16.g688350,P20,0.362917,0.309502,0.329106,0.305796,0.329464,0.274399,...,0.677781,0.678124,0.209693,0.155932,0.168026,0.185619,0.651485,0.673798,0.677462,0.691807
16848,32v2,LMJ.RY0402.057143,Cre02.g095063,P21,0.218843,0.215343,0.236373,0.186354,0.202286,0.256737,...,0.623394,0.610935,0.199075,0.203350,0.233245,0.173822,0.614930,0.623982,0.617596,0.635542
16849,32v2,LMJ.RY0402.140370,Cre07.g325754,P22,0.264173,0.186494,0.281152,0.219236,0.191913,0.185837,...,0.592914,0.613720,0.195277,0.240808,0.169919,0.242216,0.592656,0.613167,0.599031,0.592395
16850,32v2,LMJ.RY0402.248337,Cre01.g009700,P23,,,,,,,...,,,,,,,,,,


In [16]:
plates = ['32v1', '32v2','32v3']
phase2_32_2h_2h= phase2_df1[(phase2_df1['light_regime'] == '2h-2h') & (phase2_df1['plate'].isin(plates))].copy()
phase2_32_2h_2h[['plate', 'mutant_ID', 'mutated_genes', 'well_id'] + y2_cols[:10]]

Unnamed: 0,plate,mutant_ID,mutated_genes,well_id,y2_1,y2_2,y2_3,y2_4,y2_5,y2_6,y2_7,y2_8,y2_9,y2_10
12639,32v1,LMJ.RY0402.039093,Cre13.g588959,A02,0.211410,0.183080,0.246538,0.244394,0.209742,0.178042,0.244158,0.241746,0.536163,0.599595
12640,32v1,LMJ.RY0402.039746,Cre02.g080900,A03,0.151867,0.155219,0.203340,0.183596,0.200369,0.079821,0.180953,0.204326,0.489890,0.539909
12641,32v1,LMJ.RY0402.039953,Cre02.g095137,A04,0.208432,0.118789,0.204662,0.249599,0.208479,0.207087,0.253689,0.297690,0.597114,0.577680
12642,32v1,LMJ.RY0402.040368,Cre06.g278251,A05,0.184207,0.104412,0.144644,0.175537,0.160118,0.134655,0.123056,0.163738,0.475588,0.501092
12643,32v1,LMJ.RY0402.040792,Cre02.g098750,A06,0.112458,0.077705,0.122838,0.021656,0.129974,0.067148,0.081719,0.069337,0.437474,0.527028
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
16847,32v2,LMJ.RY0402.243149,Cre16.g688350,P20,0.355859,0.289867,0.301927,0.272887,0.288687,0.250397,0.272966,0.233381,0.645780,0.690969
16848,32v2,LMJ.RY0402.057143,Cre02.g095063,P21,0.210717,0.206330,0.219784,0.164735,0.176119,0.235396,0.152244,0.129355,0.533706,0.580052
16849,32v2,LMJ.RY0402.140370,Cre07.g325754,P22,0.252849,0.177099,0.255769,0.192203,0.166746,0.167650,0.198809,0.194440,0.555214,0.599836
16850,32v2,LMJ.RY0402.248337,Cre01.g009700,P23,,,,,,,,,,


### 32 plate 10min-10min

In [17]:
# Define inputs
plates = ['32v1', '32v2','32v3']
y2_cols = [f'y2_{i}' for i in range(1, 85)]

# Run normalization
phase2_32_10min_10min_normalized = quantile_normalize_light_regime(
    df=phase2_df1,
    light_regime='10min-10min',
    plates=plates,
    y2_cols=y2_cols
)

# View a few columns
phase2_32_10min_10min_normalized[['plate', 'mutant_ID', 'mutated_genes', 'well_id'] + y2_cols]

Unnamed: 0,plate,mutant_ID,mutated_genes,well_id,y2_1,y2_2,y2_3,y2_4,y2_5,y2_6,...,y2_75,y2_76,y2_77,y2_78,y2_79,y2_80,y2_81,y2_82,y2_83,y2_84
17235,32v2,LMJ.RY0402.251080,Cre14.g632750,A02,0.232868,0.606618,0.186690,0.587841,0.209249,0.594902,...,0.220511,0.583478,0.602956,0.212372,0.247298,0.583255,0.561304,0.102582,0.187277,0.622162
17236,32v2,LMJ.RY0402.234983,Cre02.g095126,A03,0.196455,0.577698,0.190138,0.576029,0.171518,0.567689,...,0.108659,0.574168,0.559740,0.130531,0.136797,0.552815,0.553625,0.155304,0.141742,0.576863
17237,32v2,LMJ.RY0402.193328,Cre17.g720261,A04,0.226141,0.645999,0.226859,0.626745,0.271977,0.618871,...,0.166874,0.613964,0.613460,0.172719,0.170745,0.618655,0.637996,0.173539,0.176455,0.624651
17238,32v2,LMJ.RY0402.146540,Cre10.g434500,A05,0.215155,0.582264,0.193211,0.583433,0.187876,0.574818,...,0.156160,0.586691,0.603594,0.145460,0.222462,0.610270,0.576432,0.188806,0.187924,0.597230
17239,32v2,LMJ.RY0402.234933,Cre01.g022900,A06,0.257832,0.640535,0.244456,0.627472,0.233001,0.620009,...,0.223794,0.625985,0.635424,0.248775,0.236141,0.634076,0.646950,0.245521,0.205458,0.630238
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
31401,32v1,LMJ.RY0402.048478,Cre06.g270450,P20,0.314727,0.612476,0.263251,0.612843,0.273131,0.622767,...,0.211091,0.607013,0.567586,0.179295,0.239970,0.557028,0.601564,0.209889,0.192340,0.569228
31402,32v1,LMJ.RY0402.065081,Cre12.g493050,P21,0.235811,0.624847,0.195677,0.635632,0.216833,0.634830,...,0.108517,0.632321,0.603634,0.183582,0.184493,0.633718,0.653554,0.192459,0.191238,0.662745
31403,32v1,LMJ.RY0402.165706,Cre02.g098350,P22,0.197556,0.554492,0.200197,0.568938,0.238758,0.540544,...,0.142429,0.565481,0.584017,0.158574,0.175961,0.558879,0.595478,0.189167,0.152877,0.577270
31404,32v1,LMJ.RY0402.068059,Cre02.g098350,P23,0.154690,0.571350,0.277924,0.531359,0.178420,0.521354,...,0.192524,0.552127,0.586646,0.236652,0.201967,0.581617,0.576392,0.204449,0.175999,0.579394


In [18]:
plates = ['32v1', '32v2','32v3']
phase2_32_10min_10min= phase2_df1[(phase2_df1['light_regime'] == '10min-10min') & (phase2_df1['plate'].isin(plates))].copy()
phase2_32_10min_10min[['plate', 'mutant_ID', 'mutated_genes', 'well_id'] + y2_cols]

Unnamed: 0,plate,mutant_ID,mutated_genes,well_id,y2_1,y2_2,y2_3,y2_4,y2_5,y2_6,...,y2_75,y2_76,y2_77,y2_78,y2_79,y2_80,y2_81,y2_82,y2_83,y2_84
17235,32v2,LMJ.RY0402.251080,Cre14.g632750,A02,0.239271,0.606479,0.194127,0.587358,0.222805,0.596291,...,0.252317,0.580131,0.606479,0.251131,0.283272,0.582984,0.556968,0.113904,0.220315,0.629747
17236,32v2,LMJ.RY0402.234983,Cre02.g095126,A03,0.197867,0.576329,0.196826,0.576545,0.176848,0.565719,...,0.119822,0.570147,0.554579,0.152682,0.158260,0.550994,0.550030,0.178591,0.168094,0.576705
17237,32v2,LMJ.RY0402.193328,Cre17.g720261,A04,0.232750,0.655117,0.232889,0.633144,0.294809,0.625673,...,0.192068,0.619413,0.618663,0.200231,0.197542,0.624964,0.647268,0.199989,0.208856,0.632365
17238,32v2,LMJ.RY0402.146540,Cre10.g434500,A05,0.220193,0.580988,0.199471,0.583246,0.196545,0.573102,...,0.178308,0.584678,0.608213,0.171261,0.255631,0.613762,0.574893,0.216148,0.220739,0.603099
17239,32v2,LMJ.RY0402.234933,Cre01.g022900,A06,0.270476,0.650266,0.252912,0.634921,0.248619,0.626757,...,0.258093,0.630951,0.645737,0.286175,0.269762,0.644700,0.659935,0.281973,0.237170,0.638421
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
31401,32v1,LMJ.RY0402.048478,Cre06.g270450,P20,0.251046,0.615118,0.213801,0.605493,0.215804,0.612954,...,0.149685,0.599942,0.580485,0.124755,0.167171,0.566461,0.599521,0.149966,0.135350,0.579156
31402,32v1,LMJ.RY0402.065081,Cre12.g493050,P21,0.203030,0.620378,0.174619,0.619462,0.181658,0.618857,...,0.085417,0.613948,0.601112,0.127212,0.131324,0.612325,0.627385,0.136736,0.135190,0.638349
31403,32v1,LMJ.RY0402.165706,Cre02.g098350,P22,0.182754,0.573639,0.178345,0.580316,0.195983,0.558757,...,0.107206,0.577251,0.590752,0.111970,0.125806,0.569892,0.597064,0.135276,0.110593,0.585463
31404,32v1,LMJ.RY0402.068059,Cre02.g098350,P23,0.152155,0.587086,0.222435,0.547723,0.160724,0.545348,...,0.135228,0.566805,0.592587,0.156062,0.140524,0.586832,0.586089,0.146136,0.123071,0.587784


### plate 32 1min-1min

In [20]:
# Define inputs
plates = ['32v1', '32v3']
y2_cols = [f'y2_{i}' for i in range(1, 89)]

# Run normalization
phase2_32_1min_1min_normalized = quantile_normalize_light_regime(
    df=phase2_df1,
    light_regime='1min-1min',
    plates=plates,
    y2_cols=y2_cols
)

# View a few columns
phase2_32_1min_1min_normalized[['plate', 'mutant_ID', 'mutated_genes', 'well_id'] + y2_cols]

Unnamed: 0,plate,mutant_ID,mutated_genes,well_id,y2_1,y2_2,y2_3,y2_4,y2_5,y2_6,...,y2_79,y2_80,y2_81,y2_82,y2_83,y2_84,y2_85,y2_86,y2_87,y2_88
11490,32v1,LMJ.RY0402.039093,Cre13.g588959,A02,0.210945,0.540010,0.213075,0.527023,0.175127,0.531782,...,0.154852,0.483702,0.093452,0.497239,0.135738,0.442541,0.161249,0.486252,0.141248,0.471875
11491,32v1,LMJ.RY0402.039746,Cre02.g080900,A03,0.152069,0.485122,0.119838,0.451069,0.127372,0.515751,...,0.147486,0.477749,0.069552,0.476921,0.101003,0.439054,0.100291,0.461419,0.105872,0.458064
11492,32v1,LMJ.RY0402.039953,Cre02.g095137,A04,0.242085,0.543822,0.241183,0.521121,0.188100,0.532962,...,0.177956,0.512506,0.206405,0.481430,0.151997,0.506704,0.149585,0.500070,0.187277,0.489881
11493,32v1,LMJ.RY0402.040368,Cre06.g278251,A05,0.072558,0.485994,0.135090,0.468810,0.113304,0.469493,...,0.086394,0.422896,0.088781,0.450678,0.115967,0.416644,0.095433,0.452613,0.077468,0.448147
11494,32v1,LMJ.RY0402.040792,Cre02.g098750,A06,0.098593,0.435336,0.048337,0.422348,0.100291,0.422378,...,0.059721,0.360307,0.040875,0.378409,0.065307,0.324540,0.024890,0.340528,0.043740,0.351397
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
20677,32v3,LMJ.RY0402.199716,Cre14.g621800,P20,0.154984,0.505537,0.107173,0.506780,0.160936,0.464202,...,0.093870,0.452315,0.151014,0.404510,0.111463,0.416644,0.084082,0.395230,0.130449,0.471205
20678,32v3,LMJ.RY0402.056457,Cre09.g414650,P21,0.153183,0.498792,0.129340,0.440861,0.145823,0.473838,...,0.134171,0.459472,0.127346,0.447808,0.138805,0.450486,0.105644,0.469306,0.100308,0.427846
20679,32v3,LMJ.RY0402.208970,Cre07.g314800,P22,0.187969,0.518364,0.194582,0.481425,0.203763,0.482940,...,0.114315,0.454079,0.066174,0.476259,0.089365,0.430380,0.125377,0.402831,0.127233,0.484938
20680,32v3,LMJ.RY0402.193328,Cre17.g720261,P23,0.248378,0.603067,0.263156,0.568113,0.256224,0.563058,...,0.198088,0.509536,0.198153,0.474203,0.202499,0.481349,0.171899,0.551048,0.215263,0.543186


In [21]:
plates = ['32v1', '32v2','32v3']
phase2_32_1min_1min= phase2_df1[(phase2_df1['light_regime'] == '1min-1min') & (phase2_df1['plate'].isin(plates))].copy()
phase2_32_1min_1min[['plate', 'mutant_ID', 'mutated_genes', 'well_id'] + y2_cols]

Unnamed: 0,plate,mutant_ID,mutated_genes,well_id,y2_1,y2_2,y2_3,y2_4,y2_5,y2_6,...,y2_79,y2_80,y2_81,y2_82,y2_83,y2_84,y2_85,y2_86,y2_87,y2_88
11490,32v1,LMJ.RY0402.039093,Cre13.g588959,A02,0.202436,0.533086,0.199285,0.519535,0.156386,0.523333,...,0.136775,0.465928,0.075863,0.483230,0.121485,0.427227,0.146542,0.475057,0.124707,0.460690
11491,32v1,LMJ.RY0402.039746,Cre02.g080900,A03,0.141499,0.476958,0.103579,0.437662,0.107005,0.505921,...,0.128896,0.459810,0.044384,0.461497,0.083487,0.423334,0.081704,0.448480,0.082954,0.446646
11492,32v1,LMJ.RY0402.039953,Cre02.g095137,A04,0.233858,0.537572,0.229277,0.512299,0.169666,0.523894,...,0.161174,0.497308,0.194471,0.466288,0.134733,0.497406,0.136601,0.488689,0.173369,0.479915
11493,32v1,LMJ.RY0402.040368,Cre06.g278251,A05,0.067067,0.478227,0.117594,0.454138,0.090157,0.462422,...,0.063509,0.394472,0.069552,0.436979,0.098206,0.400083,0.077506,0.437094,0.056985,0.435151
11494,32v1,LMJ.RY0402.040792,Cre02.g098750,A06,0.090473,0.421974,0.021365,0.408502,0.078222,0.400880,...,0.034941,0.342549,0.014078,0.361587,0.049600,0.297388,0.018041,0.303321,0.028565,0.324647
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
20677,32v3,LMJ.RY0402.199716,Cre14.g621800,P20,0.165112,0.512962,0.124486,0.519465,0.179494,0.473093,...,0.115742,0.474999,0.165513,0.423185,0.128832,0.433531,0.099595,0.418128,0.148103,0.482396
20678,32v3,LMJ.RY0402.056457,Cre09.g414650,P21,0.163805,0.508217,0.149226,0.452258,0.164287,0.481581,...,0.152842,0.480450,0.141307,0.463050,0.153077,0.463581,0.122845,0.481899,0.123473,0.444142
20679,32v3,LMJ.RY0402.208970,Cre07.g314800,P22,0.194924,0.524702,0.210748,0.494306,0.220651,0.494605,...,0.134775,0.476612,0.088303,0.491494,0.108984,0.448600,0.140439,0.424193,0.143721,0.496079
20680,32v3,LMJ.RY0402.193328,Cre17.g720261,P23,0.254538,0.603972,0.271884,0.575243,0.266362,0.569314,...,0.214812,0.525009,0.211095,0.490296,0.216499,0.493271,0.187439,0.555161,0.225941,0.548658


### plate 32 30s 30s

In [23]:
# Define inputs
plates = ['32v1','32v2', '32v3']
y2_cols = [f'y2_{i}' for i in range(1, 89)]

# Run normalization
phase2_32_30s_30s_normalized = quantile_normalize_light_regime(
    df=phase2_df1,
    light_regime='30s-30s',
    plates=plates,
    y2_cols=y2_cols
)

# View a few columns
phase2_32_30s_30s_normalized[['plate', 'mutant_ID', 'mutated_genes', 'well_id'] + y2_cols]

Unnamed: 0,plate,mutant_ID,mutated_genes,well_id,y2_1,y2_2,y2_3,y2_4,y2_5,y2_6,...,y2_79,y2_80,y2_81,y2_82,y2_83,y2_84,y2_85,y2_86,y2_87,y2_88
13022,32v1,LMJ.RY0402.039093,Cre13.g588959,A02,0.229205,0.517555,0.167096,0.532123,0.249256,0.508059,...,0.180955,0.495390,0.200347,0.467818,0.200522,0.460365,0.209561,0.452078,0.210119,0.476787
13023,32v1,LMJ.RY0402.039746,Cre02.g080900,A03,0.133043,0.488543,0.107797,0.504582,0.213215,0.461453,...,0.169459,0.468750,0.148825,0.468879,0.129254,0.456945,0.121799,0.429298,0.182448,0.443576
13024,32v1,LMJ.RY0402.039953,Cre02.g095137,A04,0.260197,0.545112,0.197932,0.459241,0.269379,0.527664,...,0.222772,0.531093,0.277612,0.510448,0.264312,0.526277,0.215913,0.526916,0.252726,0.509400
13025,32v1,LMJ.RY0402.040368,Cre06.g278251,A05,0.111468,0.433570,0.143170,0.434116,0.174265,0.452269,...,0.125316,0.379165,0.115002,0.412426,0.108260,0.435566,0.179087,0.438527,0.089163,0.407978
13026,32v1,LMJ.RY0402.040792,Cre02.g098750,A06,0.105442,0.438257,0.104689,0.448644,0.091029,0.432625,...,0.107206,0.343271,0.094101,0.366542,0.135395,0.431370,0.073669,0.408056,0.051685,0.370495
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
21826,32v3,LMJ.RY0402.199716,Cre14.g621800,P20,0.199136,0.446629,0.060074,0.386942,0.156242,0.490891,...,0.053764,0.460158,0.182189,0.472902,0.160170,0.520248,0.153445,0.445634,-0.005877,0.449794
21827,32v3,LMJ.RY0402.056457,Cre09.g414650,P21,0.219411,0.532032,0.206900,0.470667,0.182144,0.449256,...,0.198228,0.470254,0.190596,0.509766,0.170579,0.482018,0.137138,0.463973,0.145261,0.538782
21828,32v3,LMJ.RY0402.208970,Cre07.g314800,P22,0.149390,0.532564,0.153032,0.454233,0.276810,0.502601,...,0.023389,0.455894,0.192147,0.478606,0.139946,0.364187,0.158022,0.458849,0.031955,0.532048
21829,32v3,LMJ.RY0402.193328,Cre17.g720261,P23,0.320683,0.637214,0.340951,0.572944,0.256814,0.585458,...,0.343305,0.587574,0.302050,0.543803,0.304132,0.600849,0.261274,0.530571,0.207139,0.531053


In [22]:
plates = ['32v1', '32v2','32v3']
phase2_32_1min_1min= phase2_df1[(phase2_df1['light_regime'] == '1min-1min') & (phase2_df1['plate'].isin(plates))].copy()
phase2_32_1min_1min[['plate', 'mutant_ID', 'mutated_genes', 'well_id'] + y2_cols]

Unnamed: 0,plate,mutant_ID,mutated_genes,well_id,y2_1,y2_2,y2_3,y2_4,y2_5,y2_6,...,y2_79,y2_80,y2_81,y2_82,y2_83,y2_84,y2_85,y2_86,y2_87,y2_88
11490,32v1,LMJ.RY0402.039093,Cre13.g588959,A02,0.202436,0.533086,0.199285,0.519535,0.156386,0.523333,...,0.136775,0.465928,0.075863,0.483230,0.121485,0.427227,0.146542,0.475057,0.124707,0.460690
11491,32v1,LMJ.RY0402.039746,Cre02.g080900,A03,0.141499,0.476958,0.103579,0.437662,0.107005,0.505921,...,0.128896,0.459810,0.044384,0.461497,0.083487,0.423334,0.081704,0.448480,0.082954,0.446646
11492,32v1,LMJ.RY0402.039953,Cre02.g095137,A04,0.233858,0.537572,0.229277,0.512299,0.169666,0.523894,...,0.161174,0.497308,0.194471,0.466288,0.134733,0.497406,0.136601,0.488689,0.173369,0.479915
11493,32v1,LMJ.RY0402.040368,Cre06.g278251,A05,0.067067,0.478227,0.117594,0.454138,0.090157,0.462422,...,0.063509,0.394472,0.069552,0.436979,0.098206,0.400083,0.077506,0.437094,0.056985,0.435151
11494,32v1,LMJ.RY0402.040792,Cre02.g098750,A06,0.090473,0.421974,0.021365,0.408502,0.078222,0.400880,...,0.034941,0.342549,0.014078,0.361587,0.049600,0.297388,0.018041,0.303321,0.028565,0.324647
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
20677,32v3,LMJ.RY0402.199716,Cre14.g621800,P20,0.165112,0.512962,0.124486,0.519465,0.179494,0.473093,...,0.115742,0.474999,0.165513,0.423185,0.128832,0.433531,0.099595,0.418128,0.148103,0.482396
20678,32v3,LMJ.RY0402.056457,Cre09.g414650,P21,0.163805,0.508217,0.149226,0.452258,0.164287,0.481581,...,0.152842,0.480450,0.141307,0.463050,0.153077,0.463581,0.122845,0.481899,0.123473,0.444142
20679,32v3,LMJ.RY0402.208970,Cre07.g314800,P22,0.194924,0.524702,0.210748,0.494306,0.220651,0.494605,...,0.134775,0.476612,0.088303,0.491494,0.108984,0.448600,0.140439,0.424193,0.143721,0.496079
20680,32v3,LMJ.RY0402.193328,Cre17.g720261,P23,0.254538,0.603972,0.271884,0.575243,0.266362,0.569314,...,0.214812,0.525009,0.211095,0.490296,0.216499,0.493271,0.187439,0.555161,0.225941,0.548658


## plate 32 5min-5min

In [24]:
# Define inputs
plates = ['32v1', '32v3']
y2_cols = [f'y2_{i}' for i in range(1, 89)]

# Run normalization
phase2_32_5min_5min_normalized = quantile_normalize_light_regime(
    df=phase2_df1,
    light_regime='5min-5min',
    plates=plates,
    y2_cols=y2_cols
)

# View a few columns
phase2_32_5min_5min_normalized[['plate', 'mutant_ID', 'mutated_genes', 'well_id'] + y2_cols]

Unnamed: 0,plate,mutant_ID,mutated_genes,well_id,y2_1,y2_2,y2_3,y2_4,y2_5,y2_6,...,y2_79,y2_80,y2_81,y2_82,y2_83,y2_84,y2_85,y2_86,y2_87,y2_88
13405,32v1,LMJ.RY0402.039093,Cre13.g588959,A02,0.160770,0.552276,0.207059,0.559862,0.209375,0.538199,...,0.134221,0.531431,0.178901,0.504964,0.089067,0.536683,0.113587,0.533614,0.103476,0.538609
13406,32v1,LMJ.RY0402.039746,Cre02.g080900,A03,0.065904,0.512611,0.127044,0.445046,0.133832,0.479356,...,0.120598,0.493876,0.088498,0.475771,0.091151,0.497396,0.078781,0.482458,0.072083,0.430744
13407,32v1,LMJ.RY0402.039953,Cre02.g095137,A04,0.203800,0.582047,0.217652,0.545422,0.204956,0.561044,...,0.121408,0.552555,0.157628,0.529094,0.124956,0.543431,0.177294,0.587206,0.194339,0.541934
13408,32v1,LMJ.RY0402.040368,Cre06.g278251,A05,0.093009,0.481676,0.088249,0.457340,0.136640,0.508244,...,0.155634,0.490286,0.116102,0.497567,0.133587,0.483962,0.072439,0.474800,0.121092,0.513005
13409,32v1,LMJ.RY0402.040792,Cre02.g098750,A06,0.068923,0.443791,0.120496,0.412259,0.087138,0.397994,...,0.038900,0.398078,0.044496,0.357614,0.017609,0.376166,0.074682,0.412246,0.011923,0.407920
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
22975,32v3,LMJ.RY0402.199716,Cre14.g621800,P20,0.145996,0.522512,0.121247,0.467027,0.093870,0.452512,...,0.100026,0.458118,0.060029,0.458052,0.110182,0.470175,0.085147,0.455879,0.105264,0.489359
22976,32v3,LMJ.RY0402.056457,Cre09.g414650,P21,0.141560,0.561910,0.151925,0.510563,0.149250,0.495172,...,0.129314,0.501007,0.066538,0.494460,0.072921,0.468422,0.107544,0.474360,0.129666,0.510294
22977,32v3,LMJ.RY0402.208970,Cre07.g314800,P22,0.223211,0.551270,0.171679,0.540401,0.227851,0.517672,...,0.117961,0.497231,0.085888,0.492196,0.154429,0.518777,0.140318,0.522873,0.177225,0.553929
22978,32v3,LMJ.RY0402.193328,Cre17.g720261,P23,0.285019,0.607838,0.239555,0.594360,0.244690,0.605253,...,0.197913,0.587465,0.159656,0.609090,0.170605,0.577168,0.246693,0.572918,0.235122,0.596795


In [25]:
plates = ['32v1', '32v2','32v3']
phase2_32_5min_5min= phase2_df1[(phase2_df1['light_regime'] == '5min-5min') & (phase2_df1['plate'].isin(plates))].copy()
phase2_32_5min_5min[['plate', 'mutant_ID', 'mutated_genes', 'well_id'] + y2_cols]

Unnamed: 0,plate,mutant_ID,mutated_genes,well_id,y2_1,y2_2,y2_3,y2_4,y2_5,y2_6,...,y2_79,y2_80,y2_81,y2_82,y2_83,y2_84,y2_85,y2_86,y2_87,y2_88
13405,32v1,LMJ.RY0402.039093,Cre13.g588959,A02,0.157087,0.553888,0.198954,0.564746,0.199906,0.539723,...,0.127103,0.526312,0.171743,0.506698,0.087849,0.532555,0.108891,0.533787,0.103334,0.536463
13406,32v1,LMJ.RY0402.039746,Cre02.g080900,A03,0.064784,0.515743,0.125836,0.461769,0.129919,0.486931,...,0.115525,0.487931,0.087765,0.478022,0.089186,0.495418,0.078291,0.482975,0.075634,0.441132
13407,32v1,LMJ.RY0402.039953,Cre02.g095137,A04,0.197985,0.579780,0.211696,0.552507,0.195341,0.562225,...,0.115983,0.544704,0.151149,0.528358,0.121010,0.537090,0.171661,0.586327,0.188476,0.540477
13408,32v1,LMJ.RY0402.040368,Cre06.g278251,A05,0.095177,0.489898,0.088815,0.476895,0.132580,0.513579,...,0.146661,0.484582,0.113050,0.499777,0.129926,0.480248,0.072693,0.475027,0.118319,0.513316
13409,32v1,LMJ.RY0402.040792,Cre02.g098750,A06,0.065698,0.452641,0.119852,0.431484,0.083521,0.410655,...,0.034740,0.403854,0.050378,0.374936,0.015058,0.390355,0.075126,0.414087,0.019238,0.404985
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
22975,32v3,LMJ.RY0402.199716,Cre14.g621800,P20,0.145105,0.520550,0.122194,0.454454,0.093474,0.445213,...,0.104768,0.459696,0.052915,0.450971,0.111581,0.472892,0.086206,0.453567,0.105811,0.488518
22976,32v3,LMJ.RY0402.056457,Cre09.g414650,P21,0.139358,0.558987,0.154956,0.500170,0.154636,0.490793,...,0.135730,0.507046,0.059524,0.491786,0.069632,0.471593,0.112463,0.474477,0.131850,0.511012
22977,32v3,LMJ.RY0402.208970,Cre07.g314800,P22,0.229930,0.549721,0.178237,0.534362,0.239731,0.514594,...,0.123198,0.503296,0.084725,0.490280,0.158739,0.522367,0.145684,0.523438,0.182771,0.555706
22978,32v3,LMJ.RY0402.193328,Cre17.g720261,P23,0.292549,0.607873,0.246607,0.590569,0.255856,0.606151,...,0.211146,0.594359,0.166162,0.612608,0.176469,0.585367,0.251654,0.576591,0.241659,0.599379


### plate 32 1min-5min

In [26]:
# Define inputs
plates = ['32v2', '32v3']
y2_cols = [f'y2_{i}' for i in range(1, 89)]

# Run normalization
phase2_32_1min_5min_normalized = quantile_normalize_light_regime(
    df=phase2_df1,
    light_regime='1min-5min',
    plates=plates,
    y2_cols=y2_cols
)

# View a few columns
phase2_32_1min_5min_normalized[['plate', 'mutant_ID', 'mutated_genes', 'well_id'] + y2_cols]

Unnamed: 0,plate,mutant_ID,mutated_genes,well_id,y2_1,y2_2,y2_3,y2_4,y2_5,y2_6,...,y2_79,y2_80,y2_81,y2_82,y2_83,y2_84,y2_85,y2_86,y2_87,y2_88
17618,32v2,LMJ.RY0402.251080,Cre14.g632750,A02,0.278507,0.623752,0.194488,0.628008,0.206126,0.642532,...,0.182998,0.582765,0.157928,0.604462,0.140931,0.601876,0.135742,0.614715,0.197523,0.617532
17619,32v2,LMJ.RY0402.234983,Cre02.g095126,A03,0.180019,0.575651,0.165166,0.556418,0.145851,0.559392,...,0.132305,0.536128,0.090629,0.553278,0.085826,0.561988,0.063466,0.550699,0.067037,0.561185
17620,32v2,LMJ.RY0402.193328,Cre17.g720261,A04,0.279769,0.637475,0.248450,0.618283,0.228901,0.618999,...,0.219313,0.606997,0.204834,0.620483,0.195344,0.619967,0.191422,0.628903,0.211066,0.612008
17621,32v2,LMJ.RY0402.146540,Cre10.g434500,A05,0.171350,0.596627,0.163187,0.560922,0.161014,0.577225,...,0.142671,0.595421,0.138456,0.597549,0.145576,0.595384,0.075504,0.586782,0.123869,0.583074
17622,32v2,LMJ.RY0402.234933,Cre01.g022900,A06,0.226301,0.640939,0.220749,0.617707,0.224822,0.607093,...,0.182193,0.619727,0.126830,0.615340,0.182147,0.609758,0.143329,0.605451,0.161513,0.597860
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
22592,32v3,LMJ.RY0402.199716,Cre14.g621800,P20,0.127625,0.507706,0.084319,0.496444,0.104901,0.520642,...,0.019369,0.406961,0.047411,0.433830,0.024273,0.486087,-0.011064,0.487723,0.050732,0.510604
22593,32v3,LMJ.RY0402.056457,Cre09.g414650,P21,0.182785,0.548232,0.177114,0.536920,0.084998,0.563383,...,0.133886,0.530650,0.110371,0.548928,0.108246,0.535348,0.104300,0.528277,0.115200,0.506251
22594,32v3,LMJ.RY0402.208970,Cre07.g314800,P22,0.184894,0.534300,0.168450,0.550929,0.143542,0.562610,...,0.137538,0.509020,0.143364,0.562947,0.102678,0.503616,0.114767,0.569470,0.138148,0.563726
22595,32v3,LMJ.RY0402.193328,Cre17.g720261,P23,0.287704,0.644320,0.329556,0.626608,0.217971,0.627182,...,0.192094,0.621192,0.171826,0.627062,0.181885,0.623140,0.200070,0.637451,0.207991,0.616931


In [27]:
plates = ['32v1','32v2','32v3']
phase2_32_1min_5min= phase2_df1[(phase2_df1['light_regime'] == '1min-5min') & (phase2_df1['plate'].isin(plates))].copy()
phase2_32_1min_5min[['plate', 'mutant_ID', 'mutated_genes', 'well_id'] + y2_cols]

Unnamed: 0,plate,mutant_ID,mutated_genes,well_id,y2_1,y2_2,y2_3,y2_4,y2_5,y2_6,...,y2_79,y2_80,y2_81,y2_82,y2_83,y2_84,y2_85,y2_86,y2_87,y2_88
17618,32v2,LMJ.RY0402.251080,Cre14.g632750,A02,0.281609,0.628723,0.200140,0.632898,0.210985,0.644089,...,0.184690,0.584851,0.167680,0.604294,0.147836,0.604254,0.143470,0.618303,0.201853,0.619457
17619,32v2,LMJ.RY0402.234983,Cre02.g095126,A03,0.189921,0.579102,0.168849,0.566333,0.151694,0.559568,...,0.135157,0.538429,0.100786,0.552237,0.096562,0.564227,0.073918,0.552378,0.072826,0.564349
17620,32v2,LMJ.RY0402.193328,Cre17.g720261,A04,0.283714,0.641334,0.249982,0.623500,0.234366,0.621006,...,0.219888,0.610079,0.213683,0.617983,0.203678,0.623076,0.200694,0.631061,0.215367,0.613994
17621,32v2,LMJ.RY0402.146540,Cre10.g434500,A05,0.182236,0.600747,0.168623,0.568364,0.165014,0.579879,...,0.144345,0.597197,0.148262,0.598678,0.153561,0.599411,0.085403,0.590677,0.131425,0.584382
17622,32v2,LMJ.RY0402.234933,Cre01.g022900,A06,0.232478,0.644564,0.224993,0.623244,0.230084,0.608873,...,0.183917,0.621344,0.136163,0.613512,0.185526,0.612717,0.153538,0.607127,0.170482,0.598708
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
22592,32v3,LMJ.RY0402.199716,Cre14.g621800,P20,0.116878,0.510121,0.088771,0.493896,0.094530,0.522221,...,0.018108,0.403631,0.041044,0.435558,0.016610,0.492219,-0.029987,0.486824,0.043558,0.510200
22593,32v3,LMJ.RY0402.056457,Cre09.g414650,P21,0.172623,0.546531,0.172283,0.529701,0.073480,0.562878,...,0.130708,0.526894,0.098587,0.550535,0.099264,0.533008,0.096102,0.521689,0.108169,0.505262
22594,32v3,LMJ.RY0402.208970,Cre07.g314800,P22,0.175687,0.534060,0.164864,0.540546,0.138326,0.562232,...,0.135343,0.510959,0.134242,0.564394,0.092177,0.508287,0.104549,0.564561,0.127666,0.560302
22595,32v3,LMJ.RY0402.193328,Cre17.g720261,P23,0.286821,0.640831,0.325809,0.621106,0.211867,0.625740,...,0.189959,0.619686,0.162532,0.627781,0.178358,0.620282,0.189898,0.633567,0.205285,0.614813


In [28]:
phase2_32_quantile1= pd.concat([
    phase2_32_20h_ML_normalized,
    phase2_32_20h_HL_normalized,
    phase2_32_2h_2h_normalized,
    phase2_32_10min_10min_normalized,
    phase2_32_1min_1min_normalized,
    phase2_32_30s_30s_normalized,
    phase2_32_5min_5min_normalized,
    phase2_32_1min_5min_normalized
], ignore_index=True)

In [31]:
phase2_32_quantile1.to_csv('phase2_32_quantile1.csv',index=False)

In [30]:
phase2_32_quantile1.shape

(7660, 726)

In [29]:
plates = ['32v1', '32v2','32v3']
data=phase2_df1[phase2_df1['plate'].isin(plates)]
data.shape

(7660, 726)