In [32]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

from sklearn.model_selection import train_test_split

import statsmodels.formula.api as smf
from sklearn.ensemble import RandomForestClassifier
import numpy as np
from sklearn.linear_model import LinearRegression, LogisticRegression

from sklearn.preprocessing import StandardScaler
from sklearn.metrics import accuracy_score, matthews_corrcoef
from kneed import KneeLocator

from matplotlib.lines import Line2D
from matplotlib.patches import Patch

In [33]:
phase2_df1=pd.read_csv('phase2_df2.csv',low_memory=False)

In [5]:
from scipy import interpolate
from scipy.stats import rankdata

def normalize_quantiles(A, ties=True):
    A = np.asarray(A, dtype=np.float64)
    n_rows, n_cols = A.shape
    if n_cols == 1:
        return A.copy()

    i = np.linspace(0, 1, n_rows)
    S = np.full((n_rows, n_cols), np.nan)
    nobs = np.zeros(n_cols, dtype=int)
    sort_idx = []

    for j in range(n_cols):
        col = A[:, j]
        not_nan = ~np.isnan(col)
        x = col[not_nan]
        nobs[j] = len(x)
        sort_order = np.argsort(x)
        sorted_x = x[sort_order]

        if nobs[j] < n_rows:
            f = interpolate.interp1d(np.linspace(0, 1, nobs[j]), sorted_x,
                                     bounds_error=False, fill_value="extrapolate")
            S[:, j] = f(i)
        else:
            S[:, j] = sorted_x

        sort_idx.append(np.argsort(np.argsort(col[not_nan])))

    m = np.nanmean(S, axis=1)
    A_out = np.full_like(A, np.nan)

    for j in range(n_cols):
        col = A[:, j]
        not_nan = ~np.isnan(col)

        if ties:
            r = rankdata(col[not_nan], method='average')
            quant_pos = (r - 1) / (nobs[j] - 1)
            f = interpolate.interp1d(i, m, bounds_error=False, fill_value="extrapolate")
            A_out[not_nan, j] = f(quant_pos)
        else:
            ranks = sort_idx[j]
            A_out[not_nan, j] = m[ranks.astype(int)]

    return A_out

In [7]:

plates = ['31v1', '31v2', '31v3']
df_30v =phase2_df1[phase2_df1['plate'].isin(plates)]

group_counts = (
    df_30v.groupby(['plate', 'light_regime', 'mutant_ID', 'mutated_genes'])
    .size()
    .reset_index(name='count')
)

# Step 3: For each plate and light_regime, count how many mutants had 1, 2, ... rows
summary = (
    group_counts.groupby(['light_regime','plate', 'count'])
    .size()
    .reset_index(name='n_mutants')
)

# Optional: Sort for easier reading
summary = summary.sort_values(by=['light_regime','plate', 'count'])

# Show result
summary

Unnamed: 0,light_regime,plate,count,n_mutants
0,10min-10min,31v2,1,359
1,10min-10min,31v2,21,1
2,10min-10min,31v3,1,359
3,10min-10min,31v3,21,1
4,1min-1min,31v1,1,359
5,1min-1min,31v1,20,1
6,1min-1min,31v2,1,359
7,1min-1min,31v2,21,1
8,1min-5min,31v1,1,359
9,1min-5min,31v1,20,1


## plate 31 20h ML

In [11]:
def quantile_normalize_plate_group(
    df,
    light_regime,
    plates,
    y2_cols,
    wt_n_replicates=21
):
    """
    Quantile-normalize time series data across plates for a given light regime.

    Parameters:
        df: pandas DataFrame (source data)
        light_regime: str (e.g., '20h_ML')
        plates: list of str (e.g., ['31v1', '31v2', '31v3'])
        y2_cols: list of y2 column names (e.g., ['y2_1', ..., 'y2_44'])
        wt_n_replicates: int (number of WT replicates to align across plates)

    Returns:
        A DataFrame with normalized y2 values for the selected light regime and plates.
    """
    df_subset = df[(df['light_regime'] == light_regime) & (df['plate'].isin(plates))].copy()
    df_normalized = df_subset.copy()

    # Get all unique mutant_ID + mutated_genes combos (excluding WT)
    mutant_keys = (
        df_subset[df_subset['mutant_ID'] != 'WT']
        [['mutant_ID', 'mutated_genes']]
        .drop_duplicates()
        .sort_values(['mutant_ID', 'mutated_genes'])
    )

    for timepoint in y2_cols:
        position_values = []
        valid_plate_indices = {}

        for plate in plates:
            subset = df_subset[df_subset['plate'] == plate].copy()

            # ---- WT processing ----
            wt_rows = subset[subset['mutant_ID'] == 'WT'].copy()
            wt_rows = wt_rows.sort_values(['mutant_ID', 'mutated_genes'])

            if wt_rows.shape[0] < wt_n_replicates:
                missing = wt_n_replicates - wt_rows.shape[0]
                wt_values = np.concatenate([
                    wt_rows[timepoint].values,
                    [np.nan] * missing
                ])
                wt_indices = wt_rows.index.tolist() + [None] * missing
            else:
                wt_subset = wt_rows.head(wt_n_replicates)
                wt_values = wt_subset[timepoint].values
                wt_indices = wt_subset.index.tolist()

            # ---- Mutant processing ----
            mutant_values = []
            mutant_indices = []

            for _, row in mutant_keys.iterrows():
                m_id = row['mutant_ID']
                m_gene = row['mutated_genes']
                match = subset[
                    (subset['mutant_ID'] == m_id) &
                    (subset['mutated_genes'] == m_gene)
                ]
                if match.shape[0] == 0:
                    mutant_values.append(np.nan)
                    mutant_indices.append(None)
                else:
                    mutant_values.append(match[timepoint].values[0])
                    mutant_indices.append(match.index[0])

            # Combine WT + mutant
            values = np.concatenate([wt_values, mutant_values])
            indices = wt_indices + mutant_indices

            position_values.append(values)
            valid_plate_indices[plate] = indices

        # Check row count consistency
        lengths = [len(v) for v in position_values]
        if len(set(lengths)) != 1 or 0 in lengths:
            print(f" Skipping {timepoint} due to mismatch or empty data: {lengths}")
            continue

        # Quantile normalize
        matrix = np.column_stack(position_values)
        # print(matrix)
        normalized_matrix = normalize_quantiles(matrix, ties=True)


        # Write results back
        for col_idx, plate in enumerate(plates):
            indices = valid_plate_indices[plate]
            for row_idx, idx in enumerate(indices):
                if idx is not None:
                    df_normalized.loc[idx, timepoint] = normalized_matrix[row_idx, col_idx]

    return df_normalized


In [12]:
plates_31 = ['31v1', '31v2', '31v3']
y2_columns = [f'y2_{i}' for i in range(1, 45)]

phase2_31_20h_ML_normalized = quantile_normalize_plate_group(
    df=phase2_df1,
    light_regime='20h_ML',
    plates=plates_31,
    y2_cols=y2_columns,
    wt_n_replicates=21
)

phase2_31_20h_ML_normalized[['plate', 'mutant_ID', 'mutated_genes', 'well_id'] + y2_columns]


Unnamed: 0,plate,mutant_ID,mutated_genes,well_id,y2_1,y2_2,y2_3,y2_4,y2_5,y2_6,...,y2_35,y2_36,y2_37,y2_38,y2_39,y2_40,y2_41,y2_42,y2_43,y2_44
9958,31v1,LMJ.RY0402.047650,Cre10.g434650,A02,0.468812,0.549695,0.491279,0.484353,0.475267,0.530712,...,0.457412,0.437931,0.506543,0.454292,0.442339,0.472602,0.466885,0.455352,0.479094,0.484941
9959,31v1,LMJ.RY0402.058045,Cre10.g437300,A03,0.376208,0.406261,0.430477,0.313921,0.396214,0.474309,...,0.417934,0.355485,0.352756,0.365502,0.333255,0.353806,0.339080,0.326757,0.331020,0.312195
9960,31v1,LMJ.RY0402.052609,Cre10.g439350,A04,0.625772,0.662063,0.663038,0.665593,0.679083,0.674148,...,0.671539,0.673847,0.669835,0.663147,0.665873,0.669929,0.668022,0.678910,0.670073,0.678834
9961,31v1,LMJ.RY0402.236960,Cre11.g467675,A05,0.410320,0.481862,0.489836,0.474828,0.459355,0.544632,...,0.461363,0.442982,0.450544,0.450213,0.411845,0.458424,0.429498,0.437721,0.430850,0.442261
9962,31v1,LMJ.RY0402.049580,"Cre17.g728700,Cre12.g496100,Cre12.g496150 & Cr...",A06,0.336180,0.338776,0.375958,0.328332,0.337545,0.332571,...,0.292273,0.309876,0.331581,0.292447,0.305695,0.310849,0.288201,0.292915,0.284700,0.311546
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
18379,31v3,LMJ.RY0402.146100,Cre13.g584901,P20,0.417021,0.437748,0.459386,0.481583,0.444897,0.469971,...,0.434872,0.448692,0.443751,0.431403,0.420319,0.418902,0.439706,0.435963,0.418925,0.387726
18380,31v3,LMJ.RY0402.094443,Cre10.g441850,P21,0.388118,0.360805,0.408366,0.437534,0.423386,0.431661,...,0.396089,0.386851,0.405891,0.426485,0.409368,0.399916,0.371983,0.394022,0.416252,0.377072
18381,31v3,LMJ.RY0402.220921,Cre11.g476050,P22,0.397686,0.427128,0.430995,0.460916,0.407569,0.396525,...,0.405234,0.399311,0.446946,0.449328,0.402951,0.387757,0.407469,0.396726,0.381969,0.417411
18382,31v3,LMJ.RY0402.057187,Cre16.g678050,P23,0.355117,0.386010,0.366560,0.361443,0.394274,0.413960,...,0.338272,0.329681,0.293006,0.326969,0.328134,0.321131,0.310675,0.300621,0.306859,0.319716


In [10]:
plates = ['31v1', '31v2', '31v3']
phase2_31_20h_ML= phase2_df1[(phase2_df1['light_regime'] == '20h_ML') & (phase2_df1['plate'].isin(plates))].copy()
phase2_31_20h_ML[['plate', 'mutant_ID', 'mutated_genes', 'well_id'] + y2_columns]

Unnamed: 0,plate,mutant_ID,mutated_genes,well_id,y2_1,y2_2,y2_3,y2_4,y2_5,y2_6,...,y2_35,y2_36,y2_37,y2_38,y2_39,y2_40,y2_41,y2_42,y2_43,y2_44
9958,31v1,LMJ.RY0402.047650,Cre10.g434650,A02,0.449257,0.506394,0.472331,0.472022,0.466124,0.505023,...,0.448474,0.437939,0.472576,0.456931,0.442587,0.472372,0.453772,0.453276,0.459875,0.472678
9959,31v1,LMJ.RY0402.058045,Cre10.g437300,A03,0.365389,0.394504,0.414607,0.308445,0.394179,0.462885,...,0.415354,0.364256,0.355441,0.377032,0.339298,0.370396,0.345573,0.338282,0.336992,0.323112
9960,31v1,LMJ.RY0402.052609,Cre10.g439350,A04,0.609689,0.663990,0.647972,0.670877,0.676651,0.679586,...,0.676347,0.685151,0.679814,0.675606,0.674434,0.672508,0.684652,0.691461,0.688581,0.678000
9961,31v1,LMJ.RY0402.236960,Cre11.g467675,A05,0.395750,0.462183,0.470840,0.461421,0.448495,0.511851,...,0.451162,0.440918,0.446825,0.455180,0.418218,0.463333,0.430081,0.442013,0.428636,0.438809
9962,31v1,LMJ.RY0402.049580,"Cre17.g728700,Cre12.g496100,Cre12.g496150 & Cr...",A06,0.326382,0.333585,0.366908,0.322873,0.334471,0.333576,...,0.295898,0.319719,0.335568,0.298858,0.313297,0.327452,0.297869,0.306572,0.290940,0.323044
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
18379,31v3,LMJ.RY0402.146100,Cre13.g584901,P20,0.415537,0.431391,0.453878,0.470883,0.437444,0.467190,...,0.426115,0.441382,0.433348,0.416186,0.408916,0.402378,0.428318,0.421154,0.405858,0.373663
18380,31v3,LMJ.RY0402.094443,Cre10.g441850,P21,0.386049,0.357293,0.403901,0.427682,0.414545,0.426936,...,0.381790,0.378833,0.398139,0.412279,0.399266,0.383930,0.354613,0.378185,0.404224,0.360358
18381,31v3,LMJ.RY0402.220921,Cre11.g476050,P22,0.395659,0.422036,0.426888,0.452465,0.399727,0.391448,...,0.393340,0.393079,0.438894,0.431296,0.390728,0.373640,0.391777,0.381441,0.366665,0.403438
18382,31v3,LMJ.RY0402.057187,Cre16.g678050,P23,0.351617,0.382764,0.364827,0.350516,0.383191,0.410381,...,0.326784,0.320409,0.280735,0.310173,0.316747,0.306316,0.297061,0.285890,0.296316,0.305973


## 31 plate 2h-2h

In [15]:
plates_31 = ['31v1','31v3']
y2_columns = [f'y2_{i}' for i in range(1, 49)]

phase2_31_2h_2h_normalized = quantile_normalize_plate_group(
    df=phase2_df1,
    light_regime='2h-2h',
    plates=plates_31,
    y2_cols=y2_columns,
    wt_n_replicates=21
)

phase2_31_2h_2h_normalized[['plate', 'mutant_ID', 'mutated_genes', 'well_id'] + y2_columns]

Unnamed: 0,plate,mutant_ID,mutated_genes,well_id,y2_1,y2_2,y2_3,y2_4,y2_5,y2_6,...,y2_39,y2_40,y2_41,y2_42,y2_43,y2_44,y2_45,y2_46,y2_47,y2_48
10341,31v1,LMJ.RY0402.047650,Cre10.g434650,A02,0.261356,0.368206,0.278322,0.334568,0.373670,0.292783,...,0.690206,0.686310,0.239522,0.240773,0.312451,0.267683,0.698586,0.701882,0.680140,0.681464
10342,31v1,LMJ.RY0402.058045,Cre10.g437300,A03,0.221180,0.217973,0.174999,0.230143,0.178551,0.251728,...,0.630418,0.595174,0.151844,0.205179,0.273962,0.212373,0.600220,0.602431,0.601665,0.608819
10343,31v1,LMJ.RY0402.052609,Cre10.g439350,A04,0.473487,0.528568,0.475939,0.528750,0.506077,0.498417,...,0.766710,0.753773,0.420071,0.493133,0.492671,0.488712,0.746756,0.760903,0.749454,0.752925
10344,31v1,LMJ.RY0402.236960,Cre11.g467675,A05,0.262813,0.225915,0.192792,0.249477,0.247125,0.276227,...,0.686135,0.665764,0.222137,0.253813,0.294342,0.247853,0.681727,0.688552,0.671729,0.685311
10345,31v1,LMJ.RY0402.049580,"Cre17.g728700,Cre12.g496100,Cre12.g496150 & Cr...",A06,0.185630,0.214437,0.173040,0.226448,0.200386,0.180158,...,0.587585,0.568821,0.168332,0.134265,0.177499,0.148136,0.554554,0.561102,0.561276,0.563324
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
18762,31v3,LMJ.RY0402.146100,Cre13.g584901,P20,0.242456,0.261783,0.216187,0.258612,0.248956,0.259655,...,0.633934,0.624520,0.209977,0.259612,0.224981,0.246967,0.645182,0.619325,0.641836,0.634388
18763,31v3,LMJ.RY0402.094443,Cre10.g441850,P21,0.180029,0.172611,0.124969,0.158928,0.161029,0.274021,...,0.620112,0.611043,0.185740,0.167761,0.196006,0.220949,0.630687,0.615734,0.642368,0.624283
18764,31v3,LMJ.RY0402.220921,Cre11.g476050,P22,0.181475,0.227392,0.220433,0.201958,0.243504,0.241093,...,0.598234,0.584602,0.153478,0.183175,0.222871,0.112529,0.625273,0.630462,0.623569,0.604763
18765,31v3,LMJ.RY0402.057187,Cre16.g678050,P23,0.218309,0.200947,0.270507,0.199863,0.173884,0.177428,...,0.549177,0.578030,0.178436,0.186384,0.162338,0.139787,0.571655,0.595477,0.591221,0.574421


In [14]:

plates = ['31v1', '31v2', '31v3']
phase2_31_2h_2h = phase2_df1[
    (phase2_df1['light_regime'] == '2h-2h') &
    (phase2_df1['plate'].isin(plates))
].copy()
phase2_31_2h_2h[['plate', 'mutant_ID', 'mutated_genes', 'well_id'] + y2_columns]

Unnamed: 0,plate,mutant_ID,mutated_genes,well_id,y2_1,y2_2,y2_3,y2_4,y2_5,y2_6,...,y2_35,y2_36,y2_37,y2_38,y2_39,y2_40,y2_41,y2_42,y2_43,y2_44
10341,31v1,LMJ.RY0402.047650,Cre10.g434650,A02,0.265284,0.337608,0.296833,0.334255,0.338882,0.298534,...,0.263768,0.278662,0.703524,0.673826,0.688002,0.691833,0.244315,0.246168,0.269143,0.254216
10342,31v1,LMJ.RY0402.058045,Cre10.g437300,A03,0.226415,0.227517,0.194254,0.237077,0.185025,0.256351,...,0.231470,0.245257,0.633540,0.638182,0.636732,0.606838,0.161019,0.214890,0.259585,0.210393
10343,31v1,LMJ.RY0402.052609,Cre10.g439350,A04,0.431015,0.495661,0.483352,0.490876,0.457430,0.489414,...,0.494368,0.503617,0.752983,0.762520,0.767891,0.759418,0.406029,0.482647,0.495075,0.463857
10344,31v1,LMJ.RY0402.236960,Cre11.g467675,A05,0.266800,0.235242,0.214238,0.255597,0.259276,0.282088,...,0.282047,0.231723,0.701748,0.691217,0.685125,0.671847,0.226384,0.257475,0.263878,0.239701
10345,31v1,LMJ.RY0402.049580,"Cre17.g728700,Cre12.g496100,Cre12.g496150 & Cr...",A06,0.192247,0.224556,0.191742,0.232683,0.209450,0.184807,...,0.189433,0.176071,0.589177,0.577237,0.595048,0.584226,0.176340,0.145746,0.180246,0.147855
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
18762,31v3,LMJ.RY0402.146100,Cre13.g584901,P20,0.238271,0.254672,0.199219,0.249599,0.235888,0.252586,...,0.293086,0.169423,0.615094,0.625681,0.627314,0.615744,0.201043,0.255078,0.223232,0.254836
18763,31v3,LMJ.RY0402.094443,Cre10.g441850,P21,0.173587,0.161543,0.110442,0.154860,0.154155,0.268290,...,0.170634,0.207041,0.629176,0.635738,0.613827,0.600787,0.177046,0.158290,0.191573,0.222869
18764,31v3,LMJ.RY0402.220921,Cre11.g476050,P22,0.174503,0.218238,0.203663,0.194952,0.231521,0.238567,...,0.192386,0.098714,0.624956,0.592141,0.592086,0.571468,0.144342,0.172512,0.220151,0.111781
18765,31v3,LMJ.RY0402.057187,Cre16.g678050,P23,0.212946,0.191623,0.250463,0.192490,0.166892,0.173558,...,0.090739,0.206546,0.591828,0.583627,0.537657,0.565047,0.168348,0.176573,0.159023,0.140886


### 31 plate 10min-10min

In [16]:
plates_31 = ['31v2','31v3']
y2_columns = [f'y2_{i}' for i in range(1, 85)]

phase2_31_10min_10min_normalized = quantile_normalize_plate_group(
    df=phase2_df1,
    light_regime='10min-10min',
    plates=plates_31,
    y2_cols=y2_columns,
    wt_n_replicates=21
)

phase2_31_10min_10min_normalized[['plate', 'mutant_ID', 'mutated_genes', 'well_id'] + y2_columns]

Unnamed: 0,plate,mutant_ID,mutated_genes,well_id,y2_1,y2_2,y2_3,y2_4,y2_5,y2_6,...,y2_75,y2_76,y2_77,y2_78,y2_79,y2_80,y2_81,y2_82,y2_83,y2_84
14937,31v2,LMJ.RY0402.219500,"Cre04.g216850,Cre13.g587850",A02,0.292745,0.699866,0.302623,0.703471,0.256233,0.695686,...,0.276669,0.718761,0.693126,0.256962,0.249510,0.690973,0.682209,0.267804,0.266616,0.691188
14938,31v2,LMJ.RY0402.130430,Cre06.g303200,A03,0.208335,0.622957,0.177139,0.610751,0.242415,0.625890,...,0.211592,0.617886,0.606656,0.181946,0.156189,0.625657,0.622574,0.192183,0.181397,0.616637
14939,31v2,LMJ.RY0402.057187,Cre16.g678050,A04,0.184244,0.555310,0.195251,0.586835,0.161297,0.578452,...,0.164994,0.570282,0.581816,0.131931,0.149104,0.551941,0.563744,0.104330,0.123639,0.588047
14940,31v2,LMJ.RY0402.200883,"Cre09.g404503,Cre09.g404500 & Cre09.g404503",A05,0.141518,0.550839,0.137806,0.545806,0.129804,0.525499,...,0.105163,0.526967,0.513763,0.119870,0.147556,0.537259,0.534640,0.112140,0.111687,0.530085
14941,31v2,LMJ.RY0402.127527,Cre08.g384320,A06,0.161118,0.538929,0.181880,0.571846,0.198310,0.541113,...,0.118183,0.543079,0.535678,0.132599,0.130518,0.528008,0.549705,0.122817,0.155954,0.527157
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
19528,31v3,LMJ.RY0402.146100,Cre13.g584901,P20,0.246666,0.592663,0.229208,0.625299,0.240077,0.597260,...,0.193285,0.604062,0.596525,0.174023,0.153417,0.588547,0.612130,0.195145,0.206763,0.594275
19529,31v3,LMJ.RY0402.094443,Cre10.g441850,P21,0.144655,0.582132,0.181880,0.571846,0.110348,0.582406,...,0.190485,0.591908,0.578702,0.163138,0.167883,0.599512,0.595990,0.142825,0.196839,0.594847
19530,31v3,LMJ.RY0402.220921,Cre11.g476050,P22,0.179970,0.589130,0.180471,0.569786,0.164761,0.577670,...,0.145314,0.574684,0.556199,0.156744,0.130190,0.556623,0.579592,0.143288,0.124941,0.585070
19531,31v3,LMJ.RY0402.057187,Cre16.g678050,P23,0.156152,0.548557,0.196837,0.545026,0.201099,0.506340,...,0.171533,0.536229,0.523467,0.211801,0.109516,0.553640,0.557906,0.137736,0.126636,0.551839


In [35]:

plates = ['31v1', '31v2', '31v3']
phase2_31_10min_10min = phase2_df1[
    (phase2_df1['light_regime'] == '10min-10min') &
    (phase2_df1['plate'].isin(plates))
].copy()
phase2_31_10min_10min[['plate', 'mutant_ID', 'mutated_genes', 'well_id'] + y2_columns ]

Unnamed: 0,plate,mutant_ID,mutated_genes,well_id,y2_1,y2_2,y2_3,y2_4,y2_5,y2_6,...,y2_79,y2_80,y2_81,y2_82,y2_83,y2_84,y2_85,y2_86,y2_87,y2_88
14937,31v2,LMJ.RY0402.219500,"Cre04.g216850,Cre13.g587850",A02,0.296166,0.703343,0.294304,0.706410,0.246232,0.695932,...,0.234798,0.685314,0.679124,0.248173,0.253157,0.690889,,,,
14938,31v2,LMJ.RY0402.130430,Cre06.g303200,A03,0.209731,0.625152,0.175300,0.614023,0.234785,0.629520,...,0.148272,0.626118,0.621342,0.180567,0.174092,0.617063,,,,
14939,31v2,LMJ.RY0402.057187,Cre16.g678050,A04,0.186192,0.561888,0.194260,0.590998,0.156009,0.584857,...,0.141949,0.552657,0.562713,0.093522,0.114392,0.588266,,,,
14940,31v2,LMJ.RY0402.200883,"Cre09.g404503,Cre09.g404500 & Cre09.g404503",A05,0.148123,0.557159,0.138932,0.552599,0.125219,0.533860,...,0.139784,0.539045,0.533599,0.100768,0.103342,0.535377,,,,
14941,31v2,LMJ.RY0402.127527,Cre08.g384320,A06,0.165218,0.546890,0.180581,0.575228,0.189517,0.548453,...,0.121811,0.532916,0.547931,0.111263,0.147144,0.533787,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
19528,31v3,LMJ.RY0402.146100,Cre13.g584901,P20,0.243644,0.587211,0.231696,0.620270,0.248198,0.592297,...,0.161301,0.587382,0.611727,0.205708,0.216974,0.593298,,,,
19529,31v3,LMJ.RY0402.094443,Cre10.g441850,P21,0.138590,0.573846,0.183247,0.568384,0.114893,0.577237,...,0.174477,0.598753,0.594484,0.155315,0.205793,0.593904,,,,
19530,31v3,LMJ.RY0402.220921,Cre11.g476050,P22,0.177325,0.582901,0.181392,0.566307,0.170957,0.571795,...,0.138866,0.556955,0.580821,0.155477,0.135001,0.584748,,,,
19531,31v3,LMJ.RY0402.057187,Cre16.g678050,P23,0.151993,0.542265,0.198112,0.538256,0.210418,0.500029,...,0.116766,0.553298,0.560338,0.149455,0.135980,0.550241,,,,


### 31 plate 1min-1min

In [18]:
plates_31 = ['31v1','31v2']
y2_columns = [f'y2_{i}' for i in range(1, 89)]

phase2_31_1min_1min_normalized = quantile_normalize_plate_group(
    df=phase2_df1,
    light_regime='1min-1min',
    plates=plates_31,
    y2_cols=y2_columns,
    wt_n_replicates=21
)

phase2_31_1min_1min_normalized[['plate', 'mutant_ID', 'mutated_genes', 'well_id'] + y2_columns]

Unnamed: 0,plate,mutant_ID,mutated_genes,well_id,y2_1,y2_2,y2_3,y2_4,y2_5,y2_6,...,y2_79,y2_80,y2_81,y2_82,y2_83,y2_84,y2_85,y2_86,y2_87,y2_88
9192,31v1,LMJ.RY0402.047650,Cre10.g434650,A02,0.308586,0.643473,0.296351,0.605680,0.321367,0.587743,...,0.286327,0.570555,0.281301,0.591981,0.269195,0.598292,0.244228,0.582695,0.275786,0.588302
9193,31v1,LMJ.RY0402.058045,Cre10.g437300,A03,0.217737,0.556163,0.238684,0.550662,0.254151,0.544815,...,0.193704,0.527959,0.209027,0.522060,0.191411,0.519311,0.187993,0.520203,0.202246,0.511691
9194,31v1,LMJ.RY0402.052609,Cre10.g439350,A04,0.385289,0.711595,0.439171,0.704605,0.474512,0.712511,...,0.379590,0.667929,0.392456,0.708737,0.382031,0.685692,0.365544,0.667217,0.397956,0.692328
9195,31v1,LMJ.RY0402.236960,Cre11.g467675,A05,0.256903,0.628014,0.285498,0.615460,0.264098,0.585042,...,0.248346,0.589389,0.286561,0.569424,0.264987,0.594756,0.243050,0.614622,0.289311,0.571836
9196,31v1,LMJ.RY0402.049580,"Cre17.g728700,Cre12.g496100,Cre12.g496150 & Cr...",A06,0.196436,0.535884,0.166726,0.494714,0.199209,0.537890,...,0.136943,0.494945,0.129384,0.495412,0.113638,0.480614,0.119596,0.450347,0.143072,0.464541
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
14166,31v2,LMJ.RY0402.159750,"Cre07.g339104,Cre06.g303200",P20,0.197890,0.524444,0.203625,0.509892,0.197160,0.542772,...,0.162825,0.477525,0.171609,0.521885,0.145566,0.485318,0.152657,0.510075,0.198203,0.462110
14167,31v2,LMJ.RY0402.121674,Cre16.g685250,P21,0.111861,0.499992,0.191569,0.511806,0.164592,0.510700,...,0.155118,0.472460,0.142191,0.468855,0.186141,0.437839,0.096183,0.472188,0.119425,0.455996
14168,31v2,LMJ.RY0402.191617,Cre09.g401960,P22,0.133225,0.504297,0.181246,0.517499,0.125550,0.525334,...,0.145527,0.473436,0.155062,0.499202,0.180782,0.486266,0.165284,0.473701,0.171812,0.494265
14169,31v2,LMJ.RY0402.076124,Cre14.g627576,P23,0.218516,0.523523,0.223032,0.519801,0.190474,0.539583,...,0.117487,0.475487,0.194547,0.474637,0.190438,0.535173,0.178594,0.523614,0.144175,0.517323


In [19]:
plates = ['31v1', '31v2', '31v3']
phase2_31_1min_1min = phase2_df1[
    (phase2_df1['light_regime'] == '1min-1min') &
    (phase2_df1['plate'].isin(plates))
].copy()
phase2_31_1min_1min[['plate', 'mutant_ID', 'mutated_genes', 'well_id'] + y2_columns]

Unnamed: 0,plate,mutant_ID,mutated_genes,well_id,y2_1,y2_2,y2_3,y2_4,y2_5,y2_6,...,y2_79,y2_80,y2_81,y2_82,y2_83,y2_84,y2_85,y2_86,y2_87,y2_88
9192,31v1,LMJ.RY0402.047650,Cre10.g434650,A02,0.297222,0.632506,0.290178,0.596409,0.301106,0.588135,...,0.291314,0.565083,0.275257,0.591086,0.267297,0.586581,0.245557,0.582835,0.275291,0.585080
9193,31v1,LMJ.RY0402.058045,Cre10.g437300,A03,0.212011,0.554228,0.240911,0.546810,0.245031,0.545318,...,0.199589,0.528926,0.207129,0.521747,0.190323,0.513418,0.186127,0.523109,0.201961,0.510583
9194,31v1,LMJ.RY0402.052609,Cre10.g439350,A04,0.361194,0.706154,0.423301,0.692948,0.458870,0.707596,...,0.406745,0.674025,0.387942,0.709159,0.396793,0.672182,0.388604,0.670857,0.390931,0.689400
9195,31v1,LMJ.RY0402.236960,Cre11.g467675,A05,0.251507,0.622612,0.279887,0.603072,0.254427,0.583964,...,0.250334,0.581057,0.281521,0.564942,0.263433,0.583923,0.244442,0.606598,0.287352,0.567640
9196,31v1,LMJ.RY0402.049580,"Cre17.g728700,Cre12.g496100,Cre12.g496150 & Cr...",A06,0.189157,0.533646,0.167210,0.490566,0.192894,0.538176,...,0.142786,0.498179,0.128584,0.497276,0.109211,0.475532,0.114028,0.452609,0.145870,0.463144
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
14166,31v2,LMJ.RY0402.159750,"Cre07.g339104,Cre06.g303200",P20,0.204900,0.525601,0.203777,0.512381,0.203209,0.542151,...,0.152975,0.476226,0.174096,0.522331,0.145009,0.489730,0.152570,0.505604,0.198021,0.464441
14167,31v2,LMJ.RY0402.121674,Cre16.g685250,P21,0.130531,0.503977,0.192465,0.514095,0.173013,0.510297,...,0.144298,0.471008,0.141874,0.469119,0.186533,0.453285,0.106200,0.467242,0.119551,0.457983
14168,31v2,LMJ.RY0402.191617,Cre09.g401960,P22,0.140749,0.507250,0.181491,0.520108,0.133897,0.526087,...,0.137913,0.472482,0.155719,0.497289,0.181381,0.490543,0.165154,0.469679,0.172433,0.492059
14169,31v2,LMJ.RY0402.076124,Cre14.g627576,P23,0.224439,0.524351,0.221535,0.522297,0.196889,0.539032,...,0.107532,0.474075,0.195580,0.473320,0.191047,0.540505,0.178430,0.520347,0.141861,0.519318


### 31 plate 30s-30s

In [20]:
plates_31 = ['31v1','31v2','31v3']
y2_columns = [f'y2_{i}' for i in range(1, 89)]

phase2_31_30s_30s_normalized = quantile_normalize_plate_group(
    df=phase2_df1,
    light_regime='30s-30s',
    plates=plates_31,
    y2_cols=y2_columns,
    wt_n_replicates=21
)

phase2_31_30s_30s_normalized[['plate', 'mutant_ID', 'mutated_genes', 'well_id'] + y2_columns]

Unnamed: 0,plate,mutant_ID,mutated_genes,well_id,y2_1,y2_2,y2_3,y2_4,y2_5,y2_6,...,y2_79,y2_80,y2_81,y2_82,y2_83,y2_84,y2_85,y2_86,y2_87,y2_88
10724,31v1,LMJ.RY0402.047650,Cre10.g434650,A02,0.324678,0.616798,0.303911,0.578215,0.313574,0.586808,...,0.257191,0.595159,0.322531,0.567012,0.275982,0.574549,0.275071,0.609721,0.300527,0.531340
10725,31v1,LMJ.RY0402.058045,Cre10.g437300,A03,0.232990,0.533018,0.214188,0.531322,0.201160,0.502036,...,0.226351,0.495375,0.219140,0.508310,0.205517,0.492663,0.229054,0.493561,0.223823,0.505335
10726,31v1,LMJ.RY0402.052609,Cre10.g439350,A04,0.366197,0.723858,0.434909,0.695375,0.434654,0.716040,...,0.360527,0.658394,0.402406,0.664645,0.388592,0.693370,0.404852,0.680745,0.409806,0.652579
10727,31v1,LMJ.RY0402.236960,Cre11.g467675,A05,0.296797,0.637676,0.288310,0.571909,0.297113,0.595306,...,0.313919,0.568864,0.227494,0.562292,0.264671,0.577337,0.272925,0.589403,0.302459,0.570970
10728,31v1,LMJ.RY0402.049580,"Cre17.g728700,Cre12.g496100,Cre12.g496150 & Cr...",A06,0.159881,0.516752,0.148454,0.505290,0.162149,0.501256,...,0.117736,0.483503,0.115242,0.469378,0.169482,0.449932,0.143922,0.451708,0.183216,0.482225
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
19145,31v3,LMJ.RY0402.146100,Cre13.g584901,P20,0.264098,0.593419,0.281945,0.567050,0.300925,0.567632,...,0.265541,0.540467,0.281490,0.542930,0.264782,0.556206,0.221729,0.532586,0.280068,0.568158
19146,31v3,LMJ.RY0402.094443,Cre10.g441850,P21,0.269298,0.581237,0.213752,0.531329,0.255429,0.525656,...,0.196060,0.494101,0.224004,0.506558,0.173943,0.524131,0.205298,0.496253,0.195196,0.505064
19147,31v3,LMJ.RY0402.220921,Cre11.g476050,P22,0.245965,0.549162,0.252514,0.530931,0.252480,0.537421,...,0.205295,0.460978,0.255788,0.522032,0.187527,0.487616,0.229936,0.523023,0.230163,0.499411
19148,31v3,LMJ.RY0402.057187,Cre16.g678050,P23,0.233047,0.524966,0.181383,0.441907,0.204023,0.543455,...,0.189940,0.458266,0.139876,0.491608,0.221994,0.456017,0.236093,0.441952,0.173208,0.467545


In [21]:
plates = ['31v1', '31v2', '31v3']
phase2_31_30s_30s = phase2_df1[
    (phase2_df1['light_regime'] == '30s-30s') &
    (phase2_df1['plate'].isin(plates))
].copy()
phase2_31_30s_30s[['plate', 'mutant_ID', 'mutated_genes', 'well_id'] + y2_columns]

Unnamed: 0,plate,mutant_ID,mutated_genes,well_id,y2_1,y2_2,y2_3,y2_4,y2_5,y2_6,...,y2_79,y2_80,y2_81,y2_82,y2_83,y2_84,y2_85,y2_86,y2_87,y2_88
10724,31v1,LMJ.RY0402.047650,Cre10.g434650,A02,0.340101,0.624521,0.319202,0.589191,0.325900,0.593094,...,0.280107,0.604463,0.339402,0.575791,0.287580,0.583398,0.290804,0.612356,0.303712,0.537946
10725,31v1,LMJ.RY0402.058045,Cre10.g437300,A03,0.245580,0.538954,0.230285,0.538902,0.215305,0.510216,...,0.250098,0.512381,0.237173,0.519294,0.219599,0.502637,0.240192,0.502086,0.232780,0.515128
10726,31v1,LMJ.RY0402.052609,Cre10.g439350,A04,0.372884,0.715632,0.435912,0.695397,0.448710,0.711555,...,0.391742,0.682582,0.420854,0.670449,0.394247,0.697293,0.409957,0.686785,0.412994,0.664765
10727,31v1,LMJ.RY0402.236960,Cre11.g467675,A05,0.307090,0.639393,0.302675,0.582400,0.314604,0.598882,...,0.327958,0.583291,0.243361,0.572811,0.277776,0.585215,0.287615,0.595944,0.304731,0.573273
10728,31v1,LMJ.RY0402.049580,"Cre17.g728700,Cre12.g496100,Cre12.g496150 & Cr...",A06,0.171394,0.525099,0.156687,0.512275,0.175810,0.509492,...,0.136873,0.501909,0.126053,0.478130,0.184945,0.459943,0.151393,0.457950,0.190827,0.487720
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
19145,31v3,LMJ.RY0402.146100,Cre13.g584901,P20,0.248393,0.585628,0.263755,0.554717,0.276996,0.555127,...,0.243756,0.528055,0.259224,0.531886,0.252728,0.542031,0.205867,0.518041,0.263981,0.556120
19146,31v3,LMJ.RY0402.094443,Cre10.g441850,P21,0.253142,0.573149,0.194389,0.519744,0.235867,0.515228,...,0.179677,0.486639,0.206822,0.496534,0.159185,0.512784,0.190680,0.483209,0.180154,0.497295
19147,31v3,LMJ.RY0402.220921,Cre11.g476050,P22,0.229353,0.542276,0.232177,0.519558,0.232755,0.527006,...,0.189456,0.452758,0.237552,0.513622,0.172539,0.480109,0.215143,0.508191,0.214046,0.492546
19148,31v3,LMJ.RY0402.057187,Cre16.g678050,P23,0.216392,0.518995,0.164954,0.440085,0.186415,0.532557,...,0.172915,0.449647,0.121775,0.479824,0.207537,0.447602,0.219226,0.428973,0.160815,0.462624


### 31 plate 5min-5min

In [22]:
plates_31 = ['31v1','31v3']
y2_columns = [f'y2_{i}' for i in range(1, 89)]

phase2_31_5min_5min_normalized = quantile_normalize_plate_group(
    df=phase2_df1,
    light_regime='5min-5min',
    plates=plates_31,
    y2_cols=y2_columns,
    wt_n_replicates=21
)

phase2_31_5min_5min_normalized[['plate', 'mutant_ID', 'mutated_genes', 'well_id'] + y2_columns]

Unnamed: 0,plate,mutant_ID,mutated_genes,well_id,y2_1,y2_2,y2_3,y2_4,y2_5,y2_6,...,y2_79,y2_80,y2_81,y2_82,y2_83,y2_84,y2_85,y2_86,y2_87,y2_88
19916,31v3,LMJ.RY0402.051628,"Cre13.g589400,Cre13.g589350",A02,0.168781,0.573132,0.168886,0.542946,0.195597,0.540642,...,0.152758,0.520334,0.163197,0.512665,0.113180,0.506288,0.076096,0.510914,0.119192,0.518575
19917,31v3,LMJ.RY0402.189077,Cre08.g384320,A03,0.165223,0.581321,0.175408,0.551533,0.149610,0.557919,...,0.170881,0.524841,0.154685,0.502682,0.119138,0.524662,0.115544,0.512736,0.106690,0.537902
19918,31v3,LMJ.RY0402.251390,Cre13.g581050,A04,0.096359,0.521706,0.125370,0.510695,0.118621,0.527240,...,0.086874,0.513822,0.104685,0.484424,0.092243,0.488431,0.083027,0.497378,0.072405,0.505924
19919,31v3,LMJ.RY0402.214268,Cre09.g400849,A05,0.143767,0.552335,0.175634,0.534584,0.130396,0.538468,...,0.082485,0.501074,0.103757,0.496105,0.142085,0.497985,0.100999,0.501864,0.115070,0.503942
19920,31v3,LMJ.RY0402.068861,Cre16.g664400,A06,0.192676,0.579689,0.169817,0.535220,0.183148,0.553747,...,0.093537,0.541783,0.078462,0.520139,0.099664,0.534764,0.084426,0.549709,0.122786,0.506387
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
33316,31v1,WT,"Cre09.g392000,Cre02.g106400,Cre09.g406500",P20,0.107306,0.641791,0.158111,0.656809,0.178028,0.675283,...,0.156388,0.639364,0.122295,0.574718,0.168916,0.607310,0.143153,0.615652,0.191901,0.603091
33317,31v1,WT,"Cre09.g392000,Cre02.g106400,Cre09.g406500",P21,0.166142,0.700077,0.212349,0.693774,0.209813,0.649228,...,0.109915,0.616478,0.198422,0.641535,0.157873,0.616238,0.191603,0.654628,0.184010,0.589471
33318,31v1,WT,"Cre09.g392000,Cre02.g106400,Cre09.g406500",P22,0.200641,0.663300,0.255223,0.649520,0.199451,0.666278,...,0.346815,0.645892,0.232855,0.676992,0.300799,0.673833,0.197358,0.676826,0.295837,0.658881
33319,31v1,WT,"Cre09.g392000,Cre02.g106400,Cre09.g406500",P23,0.435552,0.726941,0.251830,0.707420,0.411341,0.695753,...,0.372843,0.694108,0.361740,0.568604,0.193589,0.687023,0.107347,0.685606,0.371393,0.693856


In [23]:
plates = ['31v1', '31v2', '31v3']
phase2_31_5min_5min = phase2_df1[
    (phase2_df1['light_regime'] == '5min-5min') &
    (phase2_df1['plate'].isin(plates))
].copy()
phase2_31_5min_5min[['plate', 'mutant_ID', 'mutated_genes', 'well_id'] + y2_columns]

Unnamed: 0,plate,mutant_ID,mutated_genes,well_id,y2_1,y2_2,y2_3,y2_4,y2_5,y2_6,...,y2_79,y2_80,y2_81,y2_82,y2_83,y2_84,y2_85,y2_86,y2_87,y2_88
19916,31v3,LMJ.RY0402.051628,"Cre13.g589400,Cre13.g589350",A02,0.170020,0.551608,0.167928,0.521125,0.198649,0.516698,...,0.175399,0.497414,0.193649,0.492378,0.126064,0.487796,0.087839,0.491424,0.136282,0.500689
19917,31v3,LMJ.RY0402.189077,Cre08.g384320,A03,0.166106,0.561030,0.176075,0.529370,0.144194,0.536482,...,0.199629,0.502381,0.182870,0.481053,0.133858,0.507305,0.131472,0.492992,0.122296,0.520588
19918,31v3,LMJ.RY0402.251390,Cre13.g581050,A04,0.090341,0.498844,0.113039,0.485456,0.105333,0.503604,...,0.100444,0.490755,0.124465,0.461022,0.104353,0.465363,0.095079,0.475349,0.079788,0.484800
19919,31v3,LMJ.RY0402.214268,Cre09.g400849,A05,0.140220,0.527643,0.176138,0.511496,0.120150,0.515912,...,0.098775,0.474602,0.123285,0.476204,0.160286,0.476906,0.113627,0.480109,0.133038,0.483801
19920,31v3,LMJ.RY0402.068861,Cre16.g664400,A06,0.199817,0.559092,0.168792,0.511891,0.183606,0.531326,...,0.108909,0.522229,0.095596,0.501297,0.112879,0.517740,0.095940,0.537705,0.140348,0.485541
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
33316,31v1,WT,"Cre09.g392000,Cre02.g106400,Cre09.g406500",P20,0.114882,0.642890,0.161466,0.640121,0.178455,0.647446,...,0.132628,0.617657,0.103537,0.578313,0.143823,0.595500,0.122170,0.602134,0.156245,0.595240
33317,31v1,WT,"Cre09.g392000,Cre02.g106400,Cre09.g406500",P21,0.165238,0.667861,0.204486,0.656585,0.204285,0.635737,...,0.092509,0.605062,0.156500,0.613618,0.135537,0.602057,0.153630,0.623426,0.150972,0.587903
33318,31v1,WT,"Cre09.g392000,Cre02.g106400,Cre09.g406500",P22,0.190437,0.652577,0.235079,0.634652,0.195951,0.641358,...,0.223930,0.621995,0.178882,0.630143,0.199353,0.628897,0.157966,0.633107,0.201696,0.617803
33319,31v1,WT,"Cre09.g392000,Cre02.g106400,Cre09.g406500",P23,0.316905,0.695669,0.233575,0.667459,0.288337,0.652882,...,0.248447,0.655899,0.218367,0.574468,0.160804,0.634085,0.091396,0.644213,0.252259,0.651414


### 31 plate 1min-5min

In [24]:
plates_31 = ['31v1','31v2','31v3']
y2_columns = [f'y2_{i}' for i in range(1, 89)]

phase2_31_1min_5min_normalized = quantile_normalize_plate_group(
    df=phase2_df1,
    light_regime='1min-5min',
    plates=plates_31,
    y2_cols=y2_columns,
    wt_n_replicates=21
)

phase2_31_1min_5min_normalized[['plate', 'mutant_ID', 'mutated_genes', 'well_id'] + y2_columns]

Unnamed: 0,plate,mutant_ID,mutated_genes,well_id,y2_1,y2_2,y2_3,y2_4,y2_5,y2_6,...,y2_79,y2_80,y2_81,y2_82,y2_83,y2_84,y2_85,y2_86,y2_87,y2_88
11107,31v1,LMJ.RY0402.047650,Cre10.g434650,A02,0.252183,0.645919,0.268310,0.663719,0.248233,0.639735,...,0.212747,0.653300,0.224428,0.635668,0.186035,0.644074,0.233667,0.638588,0.221422,0.637173
11108,31v1,LMJ.RY0402.058045,Cre10.g437300,A03,0.176205,0.589437,0.214570,0.576316,0.204894,0.578054,...,0.122505,0.571239,0.118810,0.568784,0.121103,0.570325,0.135039,0.544438,0.147057,0.565718
11109,31v1,LMJ.RY0402.052609,Cre10.g439350,A04,0.292848,0.758531,0.345402,0.748776,0.361220,0.750889,...,0.243570,0.737329,0.214382,0.719730,0.227698,0.717978,0.246277,0.715808,0.230433,0.706222
11110,31v1,LMJ.RY0402.236960,Cre11.g467675,A05,0.295608,0.665498,0.265629,0.641086,0.240533,0.656578,...,0.231448,0.641786,0.207521,0.655417,0.196478,0.646840,0.234912,0.657691,0.238301,0.650707
11111,31v1,LMJ.RY0402.049580,"Cre17.g728700,Cre12.g496100,Cre12.g496150 & Cr...",A06,0.164751,0.559698,0.157848,0.536364,0.122682,0.543228,...,0.106808,0.578347,0.121262,0.572644,0.071861,0.572095,0.113038,0.556482,0.122128,0.576607
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
19911,31v3,LMJ.RY0402.146100,Cre13.g584901,P20,0.285711,0.621288,0.217005,0.623273,0.224108,0.642647,...,0.195165,0.617065,0.210877,0.590963,0.176909,0.625362,0.188560,0.603517,0.161119,0.622835
19912,31v3,LMJ.RY0402.094443,Cre10.g441850,P21,0.260045,0.650552,0.163168,0.597689,0.181847,0.594837,...,0.149391,0.599487,0.149277,0.570136,0.172705,0.584541,0.168803,0.582767,0.162340,0.604090
19913,31v3,LMJ.RY0402.220921,Cre11.g476050,P22,0.253722,0.569415,0.215220,0.586833,0.246713,0.589162,...,0.169275,0.569438,0.122181,0.567854,0.089794,0.582490,0.124137,0.573111,0.139197,0.577431
19914,31v3,LMJ.RY0402.057187,Cre16.g678050,P23,0.164347,0.560840,0.177921,0.570404,0.172319,0.529284,...,0.118239,0.539091,0.063069,0.555446,0.118451,0.535428,0.158192,0.559595,0.086413,0.555432


In [25]:
plates = ['31v1', '31v2', '31v3']
phase2_31_1min_5min = phase2_df1[
    (phase2_df1['light_regime'] == '1min-5min') &
    (phase2_df1['plate'].isin(plates))
].copy()
phase2_31_1min_5min[['plate', 'mutant_ID', 'mutated_genes', 'well_id'] + y2_columns]

Unnamed: 0,plate,mutant_ID,mutated_genes,well_id,y2_1,y2_2,y2_3,y2_4,y2_5,y2_6,...,y2_79,y2_80,y2_81,y2_82,y2_83,y2_84,y2_85,y2_86,y2_87,y2_88
11107,31v1,LMJ.RY0402.047650,Cre10.g434650,A02,0.254733,0.647636,0.260723,0.659328,0.238083,0.646309,...,0.206995,0.648472,0.224843,0.633546,0.182104,0.640442,0.225787,0.638474,0.211076,0.638637
11108,31v1,LMJ.RY0402.058045,Cre10.g437300,A03,0.175979,0.595781,0.205897,0.575714,0.197716,0.584076,...,0.118419,0.569496,0.121313,0.566632,0.116988,0.568209,0.132212,0.546808,0.138396,0.564400
11109,31v1,LMJ.RY0402.052609,Cre10.g439350,A04,0.294824,0.765134,0.328679,0.752537,0.353286,0.758182,...,0.237817,0.731274,0.213715,0.723031,0.224074,0.724772,0.239195,0.719918,0.220672,0.703725
11110,31v1,LMJ.RY0402.236960,Cre11.g467675,A05,0.298301,0.666853,0.258395,0.643267,0.228807,0.662682,...,0.229232,0.640513,0.209036,0.649930,0.191371,0.644448,0.227216,0.653199,0.230063,0.649159
11111,31v1,LMJ.RY0402.049580,"Cre17.g728700,Cre12.g496100,Cre12.g496150 & Cr...",A06,0.160368,0.565876,0.150195,0.538117,0.111529,0.548003,...,0.102436,0.576758,0.125075,0.571621,0.060669,0.569743,0.111893,0.559230,0.116362,0.574358
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
19911,31v3,LMJ.RY0402.146100,Cre13.g584901,P20,0.291991,0.624114,0.235305,0.627511,0.241152,0.645275,...,0.210813,0.625622,0.217942,0.598047,0.186081,0.631486,0.206204,0.608447,0.175896,0.627724
19912,31v3,LMJ.RY0402.094443,Cre10.g441850,P21,0.267076,0.651052,0.180864,0.600807,0.199716,0.599398,...,0.160587,0.609468,0.156039,0.576964,0.181249,0.592567,0.183449,0.587174,0.177658,0.607393
19913,31v3,LMJ.RY0402.220921,Cre11.g476050,P22,0.258944,0.565873,0.233549,0.588560,0.263721,0.593849,...,0.185144,0.580837,0.128791,0.573464,0.099307,0.589329,0.138857,0.577188,0.154125,0.581833
19914,31v3,LMJ.RY0402.057187,Cre16.g678050,P23,0.173350,0.557380,0.197421,0.575179,0.189402,0.537384,...,0.128729,0.547961,0.069335,0.562549,0.127121,0.541142,0.171703,0.563624,0.101958,0.559281


In [26]:
phase2_31_quantile1= pd.concat([
    phase2_31_20h_ML_normalized,
    phase2_31_2h_2h_normalized,
    phase2_31_10min_10min_normalized,
    phase2_31_1min_1min_normalized,
    phase2_31_30s_30s_normalized,
    phase2_31_5min_5min_normalized,
    phase2_31_1min_5min_normalized
], ignore_index=True)

In [42]:
phase2_31_quantile1.to_csv('phase2_31_quantile1.csv',index=False)

In [27]:
phase2_31_quantile1.shape

(6511, 726)

In [29]:
plates = ['31v1', '31v2','31v3']
data=phase2_df1[(phase2_df1['plate'].isin(plates))&(phase2_df1['light_regime']!='20h_HL')]
data.shape

(6511, 726)