In [3]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

from sklearn.model_selection import train_test_split

import statsmodels.formula.api as smf
from sklearn.ensemble import RandomForestClassifier
import numpy as np
from sklearn.linear_model import LinearRegression, LogisticRegression

from sklearn.preprocessing import StandardScaler
from sklearn.metrics import accuracy_score, matthews_corrcoef
from kneed import KneeLocator

from matplotlib.lines import Line2D
from matplotlib.patches import Patch

In [4]:
phase2_df1=pd.read_csv('phase2_df1.csv',low_memory=False)

In [7]:
from scipy import interpolate
from scipy.stats import rankdata

def normalize_quantiles(A, ties=True):
    A = np.asarray(A, dtype=np.float64)
    n_rows, n_cols = A.shape
    if n_cols == 1:
        return A.copy()

    i = np.linspace(0, 1, n_rows)
    S = np.full((n_rows, n_cols), np.nan)
    nobs = np.zeros(n_cols, dtype=int)
    sort_idx = []

    for j in range(n_cols):
        col = A[:, j]
        not_nan = ~np.isnan(col)
        x = col[not_nan]
        nobs[j] = len(x)
        sort_order = np.argsort(x)
        sorted_x = x[sort_order]

        if nobs[j] < n_rows:
            f = interpolate.interp1d(np.linspace(0, 1, nobs[j]), sorted_x,
                                     bounds_error=False, fill_value="extrapolate")
            S[:, j] = f(i)
        else:
            S[:, j] = sorted_x

        sort_idx.append(np.argsort(np.argsort(col[not_nan])))

    m = np.nanmean(S, axis=1)
    A_out = np.full_like(A, np.nan)

    for j in range(n_cols):
        col = A[:, j]
        not_nan = ~np.isnan(col)

        if ties:
            r = rankdata(col[not_nan], method='average')
            quant_pos = (r - 1) / (nobs[j] - 1)
            f = interpolate.interp1d(i, m, bounds_error=False, fill_value="extrapolate")
            A_out[not_nan, j] = f(quant_pos)
        else:
            ranks = sort_idx[j]
            A_out[not_nan, j] = m[ranks.astype(int)]

    return A_out

In [9]:
# Step 1: Filter data for the three plates
plates = ['33v1', '33v2', '33v3']
df_30v =phase2_df1[phase2_df1['plate'].isin(plates)]

# Step 2: Count rows per (plate, mutant_ID, mutated_genes, light_regime)
group_counts = (
    df_30v.groupby(['plate', 'light_regime', 'mutant_ID', 'mutated_genes'])
    .size()
    .reset_index(name='count')
)

# Step 3: For each plate and light_regime, count how many mutants had 1, 2, ... rows
summary = (
    group_counts.groupby(['light_regime','plate', 'count'])
    .size()
    .reset_index(name='n_mutants')
)

# Optional: Sort for easier reading
summary = summary.sort_values(by=['light_regime','plate', 'count'])

# Show result
summary

Unnamed: 0,light_regime,plate,count,n_mutants
0,10min-10min,33v1,1,375
1,10min-10min,33v1,8,1
2,10min-10min,33v2,1,375
3,10min-10min,33v2,8,1
4,1min-1min,33v1,1,375
5,1min-1min,33v1,8,1
6,1min-1min,33v2,1,375
7,1min-1min,33v2,8,1
8,1min-5min,33v2,1,375
9,1min-5min,33v2,8,1


In [11]:
def quantile_normalize_light_regime(df, light_regime, plates, y2_cols, tie_handling=True):

    # Filter data
    subset_df = df[(df['light_regime'] == light_regime) & (df['plate'].isin(plates))].copy()
    df_normalized = subset_df.copy()

    for timepoint in y2_cols:
        position_values = []
        valid_plate_indices = {}

        for plate in plates:
            plate_df = subset_df[subset_df['plate'] == plate].copy()

            wt_rows = plate_df[plate_df['mutant_ID'] == 'WT'].copy()
            non_wt_rows = plate_df[plate_df['mutant_ID'] != 'WT'].copy()

            wt_rows = wt_rows.sort_values(['mutant_ID', 'mutated_genes', 'well_id'])
            non_wt_rows = non_wt_rows.sort_values(['mutant_ID', 'mutated_genes'])

            sorted_df = pd.concat([wt_rows, non_wt_rows], axis=0)
            # print('plate',plate ,wt_rows[timepoint].values)
            values = sorted_df[timepoint].values
            index = sorted_df.index.values

            position_values.append(values)
            valid_plate_indices[plate] = index

        # Validate shape
        lengths = [len(v) for v in position_values]
        if len(set(lengths)) != 1:
            raise ValueError(f"Length mismatch at {timepoint}: {lengths}")

        matrix = np.column_stack(position_values)
        normalized_matrix = normalize_quantiles(matrix, ties=tie_handling)

        # Write back
        for col_idx, plate in enumerate(plates):
            indices = valid_plate_indices[plate]
            df_normalized.loc[indices, timepoint] = normalized_matrix[:, col_idx]

    return df_normalized

## plate 33 20h hl

In [12]:

plates = ['33v1', '33v2', '33v3']
y2_cols = [f'y2_{i}' for i in range(1, 45)]

# Run normalization
phase2_33_20h_ML_normalized = quantile_normalize_light_regime(
    df=phase2_df1,
    light_regime='20h_ML',
    plates=plates,
    y2_cols=y2_cols
)

# View a few columns
phase2_33_20h_ML_normalized[['plate', 'mutant_ID', 'mutated_genes', 'well_id'] + y2_cols]

Unnamed: 0,plate,mutant_ID,mutated_genes,well_id,y2_1,y2_2,y2_3,y2_4,y2_5,y2_6,...,y2_35,y2_36,y2_37,y2_38,y2_39,y2_40,y2_41,y2_42,y2_43,y2_44
23746,33v1,LMJ.RY0402.177454,Cre03.g205428,A02,0.382983,0.373643,0.384257,0.401608,0.392631,0.431667,...,0.409621,0.343396,0.343977,0.369560,0.410916,0.347885,0.359663,0.360728,0.350947,0.337814
23747,33v1,LMJ.RY0402.227435,Cre12.g538850,A03,0.393178,0.446761,0.437981,0.471161,0.466986,0.475505,...,0.359517,0.360529,0.396791,0.352423,0.401342,0.372623,0.384482,0.394345,0.382006,0.357206
23748,33v1,LMJ.RY0402.172194,"Cre16.g651450,Cre04.g217220",A04,,,,,,,...,,,,,,,,,,
23749,33v1,LMJ.RY0402.168114,Cre01.g024800,A05,0.369037,0.415936,0.432143,0.452620,0.463865,0.441303,...,0.378400,0.389764,0.411083,0.373201,0.430877,0.388885,0.386882,0.384779,0.386944,0.383992
23750,33v1,LMJ.RY0402.184490,Cre02.g076200,A06,0.311156,0.293332,0.281838,0.307152,0.348711,0.314874,...,0.233851,0.243583,0.247007,0.266884,0.304823,0.256428,0.256020,0.257295,0.282699,0.270525
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
29103,33v3,LMJ.RY0402.177485,Cre04.g215150,P20,0.333716,0.399820,0.351710,0.374661,0.431202,0.403939,...,0.384579,0.373802,0.403885,0.362416,0.422927,0.363873,0.380812,0.336258,0.356564,0.330097
29104,33v3,LMJ.RY0402.093284,"Cre11.g467768,Cre14.g617150",P21,0.376131,0.450101,0.423277,0.423985,0.438222,0.459133,...,0.456635,0.462426,0.388864,0.429639,0.429677,0.428597,0.425299,0.436889,0.445283,0.430533
29105,33v3,LMJ.RY0402.046384,Cre01.g012650,P22,0.251782,0.333033,0.288624,0.273217,0.324869,0.253553,...,0.329597,0.292421,0.278639,0.303001,0.319492,0.177276,0.327876,0.306751,0.267904,0.291986
29106,33v3,LMJ.RY0402.168539,Cre16.g693600,P23,0.400542,0.504248,0.504430,0.416342,0.423259,0.440750,...,0.357735,0.384193,0.358541,0.391750,0.438084,0.359718,0.372252,0.393129,0.387424,0.306803


In [14]:
plates = ['33v1', '33v2', '33v3']
phase2_33_20h_ML= phase2_df1[(phase2_df1['light_regime'] == '20h_ML') & (phase2_df1['plate'].isin(plates))].copy()
phase2_33_20h_ML[['plate', 'mutant_ID', 'mutated_genes', 'well_id'] + y2_cols]

Unnamed: 0,plate,mutant_ID,mutated_genes,well_id,y2_1,y2_2,y2_3,y2_4,y2_5,y2_6,...,y2_35,y2_36,y2_37,y2_38,y2_39,y2_40,y2_41,y2_42,y2_43,y2_44
23746,33v1,LMJ.RY0402.177454,Cre03.g205428,A02,0.359225,0.346641,0.363445,0.370666,0.362318,0.406222,...,0.380669,0.308802,0.311536,0.328504,0.376005,0.313771,0.321492,0.324990,0.322061,0.303893
23747,33v1,LMJ.RY0402.227435,Cre12.g538850,A03,0.367160,0.420072,0.416421,0.438070,0.436410,0.443252,...,0.336560,0.326273,0.364270,0.311774,0.366034,0.338718,0.344892,0.352770,0.348021,0.322961
23748,33v1,LMJ.RY0402.172194,"Cre16.g651450,Cre04.g217220",A04,,,,,,,...,,,,,,,,,,
23749,33v1,LMJ.RY0402.168114,Cre01.g024800,A05,0.346882,0.386835,0.411510,0.417034,0.433215,0.414765,...,0.349704,0.353589,0.373545,0.332124,0.393959,0.354070,0.346918,0.341118,0.353827,0.349107
23750,33v1,LMJ.RY0402.184490,Cre02.g076200,A06,0.293332,0.271139,0.261855,0.266404,0.321489,0.286444,...,0.202927,0.210222,0.218069,0.225701,0.275473,0.228519,0.204756,0.216628,0.251089,0.239311
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
29103,33v3,LMJ.RY0402.177485,Cre04.g215150,P20,0.357637,0.427820,0.382627,0.410334,0.455450,0.435434,...,0.406310,0.404659,0.431102,0.390034,0.454653,0.390363,0.412852,0.360142,0.382663,0.353818
29104,33v3,LMJ.RY0402.093284,"Cre11.g467768,Cre14.g617150",P21,0.399169,0.476812,0.454743,0.456020,0.460913,0.484103,...,0.479412,0.493686,0.414672,0.458516,0.458391,0.455688,0.455986,0.458633,0.478832,0.450390
29105,33v3,LMJ.RY0402.046384,Cre01.g012650,P22,0.278512,0.364834,0.313476,0.328661,0.348412,0.287724,...,0.350319,0.328979,0.305810,0.332369,0.356792,0.211603,0.357999,0.331505,0.306726,0.315755
29106,33v3,LMJ.RY0402.168539,Cre16.g693600,P23,0.426155,0.535512,0.526733,0.448822,0.447813,0.469046,...,0.371789,0.415376,0.380965,0.422279,0.467254,0.386653,0.400277,0.420993,0.415307,0.331457


## plate 33 20h ML

In [15]:

plates = ['33v1', '33v2', '33v3']
y2_cols = [f'y2_{i}' for i in range(1, 45)]

# Run normalization
phase2_33_20h_HL_normalized = quantile_normalize_light_regime(
    df=phase2_df1,
    light_regime='20h_HL',
    plates=plates,
    y2_cols=y2_cols
)
phase2_33_20h_HL_normalized[['plate', 'mutant_ID', 'mutated_genes', 'well_id'] + y2_cols]

Unnamed: 0,plate,mutant_ID,mutated_genes,well_id,y2_1,y2_2,y2_3,y2_4,y2_5,y2_6,...,y2_35,y2_36,y2_37,y2_38,y2_39,y2_40,y2_41,y2_42,y2_43,y2_44
23363,33v1,LMJ.RY0402.177454,Cre03.g205428,A02,0.167211,0.153964,0.170546,0.127902,0.153137,0.256982,...,0.114071,0.159084,0.156047,0.182255,0.172745,0.111064,0.132235,0.135120,0.195101,0.130655
23364,33v1,LMJ.RY0402.227435,Cre12.g538850,A03,0.215218,0.190881,0.280253,0.264262,0.264573,0.294651,...,0.141758,0.190331,0.158758,0.124760,0.084204,0.183971,0.147322,0.157343,0.134718,0.097212
23365,33v1,LMJ.RY0402.172194,"Cre16.g651450,Cre04.g217220",A04,,,,,,,...,,,,,,,,,,
23366,33v1,LMJ.RY0402.168114,Cre01.g024800,A05,0.232531,0.175099,0.186088,0.235773,0.196952,0.205056,...,0.136320,0.137570,0.123010,0.146572,0.098042,0.135802,0.139076,0.162470,0.133605,0.110724
23367,33v1,LMJ.RY0402.184490,Cre02.g076200,A06,0.152084,0.178659,0.129200,0.185240,0.158707,0.157844,...,0.091090,0.092444,0.077328,0.089021,0.129247,0.057231,0.060486,0.086716,0.050397,0.050244
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
28720,33v3,LMJ.RY0402.177485,Cre04.g215150,P20,0.193929,0.194063,0.223388,0.196135,0.265423,0.203999,...,0.058415,0.137840,0.182513,0.126950,0.136126,0.102423,0.147036,0.147902,0.143443,0.108255
28721,33v3,LMJ.RY0402.093284,"Cre11.g467768,Cre14.g617150",P21,0.256464,0.197318,0.251512,0.270950,0.290334,0.236075,...,0.191015,0.209541,0.225453,0.167812,0.132087,0.170458,0.230621,0.153403,0.170036,0.198834
28722,33v3,LMJ.RY0402.046384,Cre01.g012650,P22,0.116644,0.112410,0.121732,0.187541,0.196367,0.124416,...,0.114980,0.029587,0.118895,0.124976,0.077161,0.130564,0.072112,0.037704,-0.019560,-0.000076
28723,33v3,LMJ.RY0402.168539,Cre16.g693600,P23,0.233205,0.229675,0.090067,0.271642,0.213958,0.150188,...,-0.004295,0.165329,0.055689,0.024624,0.051633,0.014750,0.049141,0.102222,0.095536,-0.045002


In [16]:
plates = ['33v1', '33v2', '33v3']
phase2_33_20h_HL= phase2_df1[(phase2_df1['light_regime'] == '20h_HL') & (phase2_df1['plate'].isin(plates))].copy()
phase2_33_20h_HL[['plate', 'mutant_ID', 'mutated_genes', 'well_id'] + y2_cols]

Unnamed: 0,plate,mutant_ID,mutated_genes,well_id,y2_1,y2_2,y2_3,y2_4,y2_5,y2_6,...,y2_35,y2_36,y2_37,y2_38,y2_39,y2_40,y2_41,y2_42,y2_43,y2_44
23363,33v1,LMJ.RY0402.177454,Cre03.g205428,A02,0.150376,0.139316,0.153947,0.106686,0.127222,0.236058,...,0.112404,0.148885,0.144511,0.168380,0.161571,0.100749,0.119699,0.116899,0.182727,0.123788
23364,33v1,LMJ.RY0402.227435,Cre12.g538850,A03,0.196431,0.177461,0.265365,0.243435,0.241967,0.268673,...,0.139683,0.181140,0.147735,0.113243,0.077333,0.175573,0.138246,0.140315,0.122530,0.089275
23365,33v1,LMJ.RY0402.172194,"Cre16.g651450,Cre04.g217220",A04,,,,,,,...,,,,,,,,,,
23366,33v1,LMJ.RY0402.168114,Cre01.g024800,A05,0.212197,0.162335,0.167037,0.213647,0.174096,0.179443,...,0.134432,0.130809,0.108168,0.135116,0.091057,0.124378,0.128819,0.146431,0.121531,0.102198
23367,33v1,LMJ.RY0402.184490,Cre02.g076200,A06,0.135149,0.167355,0.112223,0.161108,0.133633,0.135749,...,0.089514,0.084264,0.062398,0.077900,0.122600,0.049776,0.051848,0.067699,0.041095,0.046658
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
28720,33v3,LMJ.RY0402.177485,Cre04.g215150,P20,0.189012,0.193594,0.222513,0.205392,0.272277,0.203867,...,0.038706,0.115404,0.172494,0.117270,0.118385,0.086051,0.125260,0.134010,0.129053,0.096339
28721,33v3,LMJ.RY0402.093284,"Cre11.g467768,Cre14.g617150",P21,0.257976,0.196405,0.255381,0.283540,0.299385,0.237911,...,0.165390,0.201217,0.228478,0.160569,0.113200,0.154716,0.209840,0.138927,0.155056,0.186619
28722,33v3,LMJ.RY0402.046384,Cre01.g012650,P22,0.105337,0.116691,0.117845,0.197934,0.200687,0.122624,...,0.093313,0.020121,0.109202,0.114914,0.060147,0.115447,0.050335,0.026226,-0.017933,0.009134
28723,33v3,LMJ.RY0402.168539,Cre16.g693600,P23,0.233813,0.229751,0.084584,0.284829,0.220329,0.148090,...,-0.042242,0.145383,0.047487,0.021367,0.037541,0.003618,0.026773,0.089011,0.085255,-0.035297


## plate 33 2h-2h

In [17]:
plates = ['33v1', '33v2', '33v3']
y2_cols = [f'y2_{i}' for i in range(1, 49)]

# Run normalization
phase2_33_2h_2h_normalized = quantile_normalize_light_regime(
    df=phase2_df1,
    light_regime='2h-2h',
    plates=plates,
    y2_cols=y2_cols
)
phase2_33_2h_2h_normalized[['plate', 'mutant_ID', 'mutated_genes', 'well_id'] + y2_cols]

Unnamed: 0,plate,mutant_ID,mutated_genes,well_id,y2_1,y2_2,y2_3,y2_4,y2_5,y2_6,...,y2_39,y2_40,y2_41,y2_42,y2_43,y2_44,y2_45,y2_46,y2_47,y2_48
24129,33v1,LMJ.RY0402.177454,Cre03.g205428,A02,0.084408,0.231682,0.148808,0.247820,0.233878,0.230847,...,0.517767,0.558517,0.102536,0.136106,0.150273,0.152221,0.546009,0.533287,0.508907,0.566962
24130,33v1,LMJ.RY0402.227435,Cre12.g538850,A03,0.213718,0.247560,0.231876,0.221442,0.249747,0.290370,...,0.650043,0.647407,0.167783,0.208717,0.205719,0.213711,0.638519,0.626664,0.623343,0.630950
24131,33v1,LMJ.RY0402.172194,"Cre16.g651450,Cre04.g217220",A04,,,,,,,...,,,,,,,,,,
24132,33v1,LMJ.RY0402.168114,Cre01.g024800,A05,0.182948,0.174692,0.178658,0.233466,0.227678,0.225951,...,0.558580,0.562216,0.133898,0.157355,0.171309,0.137456,0.558311,0.579202,0.556789,0.566687
24133,33v1,LMJ.RY0402.184490,Cre02.g076200,A06,0.142451,0.154537,0.125794,0.167998,0.131832,0.198302,...,0.547278,0.559646,0.102795,0.037773,0.156369,0.087049,0.554300,0.575384,0.552238,0.547575
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
29486,33v3,LMJ.RY0402.177485,Cre04.g215150,P20,0.206002,0.186203,0.137653,0.194361,0.221967,0.204041,...,0.593930,0.609322,0.196680,0.219614,0.205366,0.238526,0.582908,0.571034,0.595880,0.616505
29487,33v3,LMJ.RY0402.093284,"Cre11.g467768,Cre14.g617150",P21,0.203714,0.307274,0.354503,0.326736,0.276464,0.349445,...,0.657596,0.633379,0.256999,0.243147,0.180318,0.253973,0.659504,0.648742,0.643783,0.640505
29488,33v3,LMJ.RY0402.046384,Cre01.g012650,P22,0.043463,0.220071,0.184950,0.135843,0.217727,0.157601,...,0.531960,0.544037,0.109544,0.120427,0.113249,0.127560,0.551272,0.556018,0.547455,0.489143
29489,33v3,LMJ.RY0402.168539,Cre16.g693600,P23,0.370429,0.250710,0.301611,0.230449,0.237110,0.310205,...,0.654246,0.662889,0.184767,0.005223,0.127398,0.095788,0.644589,0.607840,0.686710,0.659598


In [19]:
plates = ['33v1', '33v2', '33v3']
phase2_33_2h_2h= phase2_df1[(phase2_df1['light_regime'] == '2h-2h') & (phase2_df1['plate'].isin(plates))].copy()
phase2_33_2h_2h[['plate', 'mutant_ID', 'mutated_genes', 'well_id'] + y2_cols]

Unnamed: 0,plate,mutant_ID,mutated_genes,well_id,y2_1,y2_2,y2_3,y2_4,y2_5,y2_6,...,y2_39,y2_40,y2_41,y2_42,y2_43,y2_44,y2_45,y2_46,y2_47,y2_48
24129,33v1,LMJ.RY0402.177454,Cre03.g205428,A02,0.084305,0.223953,0.140513,0.237316,0.232525,0.229677,...,0.518910,0.551171,0.092873,0.132519,0.132255,0.157177,0.540747,0.528087,0.506919,0.563038
24130,33v1,LMJ.RY0402.227435,Cre12.g538850,A03,0.209848,0.243495,0.229587,0.212858,0.248887,0.289452,...,0.645278,0.645458,0.157863,0.204531,0.188133,0.215647,0.637104,0.624573,0.618291,0.625974
24131,33v1,LMJ.RY0402.172194,"Cre16.g651450,Cre04.g217220",A04,,,,,,,...,,,,,,,,,,
24132,33v1,LMJ.RY0402.168114,Cre01.g024800,A05,0.176366,0.165441,0.179017,0.225336,0.225552,0.224205,...,0.553903,0.553942,0.122524,0.152601,0.151698,0.142510,0.557128,0.577579,0.549363,0.562787
24133,33v1,LMJ.RY0402.184490,Cre02.g076200,A06,0.133814,0.146284,0.117333,0.156617,0.126678,0.196472,...,0.543576,0.552383,0.092933,0.038388,0.139225,0.088934,0.552409,0.571894,0.545498,0.543772
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
29486,33v3,LMJ.RY0402.177485,Cre04.g215150,P20,0.217940,0.205466,0.154182,0.213557,0.226803,0.213843,...,0.593795,0.607913,0.198178,0.219929,0.213063,0.231762,0.583015,0.576394,0.597603,0.620310
29487,33v3,LMJ.RY0402.093284,"Cre11.g467768,Cre14.g617150",P21,0.215576,0.318377,0.354503,0.339888,0.280217,0.344240,...,0.653741,0.637156,0.257708,0.240382,0.188921,0.245011,0.657229,0.649750,0.645573,0.646416
29488,33v3,LMJ.RY0402.046384,Cre01.g012650,P22,0.058956,0.240986,0.194864,0.157906,0.221518,0.168240,...,0.532058,0.545931,0.110584,0.123474,0.131026,0.126875,0.554417,0.557157,0.551409,0.486899
29489,33v3,LMJ.RY0402.168539,Cre16.g693600,P23,0.376656,0.269240,0.306560,0.247528,0.240844,0.315605,...,0.651371,0.661830,0.184614,-0.003239,0.141344,0.097466,0.643540,0.610272,0.687774,0.661421


## plate 33 10min-10min

In [20]:
plates = ['33v1', '33v2']
y2_cols = [f'y2_{i}' for i in range(1, 85)]

# Run normalization
phase2_33_10min_10min_normalized = quantile_normalize_light_regime(
    df=phase2_df1,
    light_regime='10min-10min',
    plates=plates,
    y2_cols=y2_cols
)
phase2_33_10min_10min_normalized[['plate', 'mutant_ID', 'mutated_genes', 'well_id'] + y2_cols]

Unnamed: 0,plate,mutant_ID,mutated_genes,well_id,y2_1,y2_2,y2_3,y2_4,y2_5,y2_6,...,y2_75,y2_76,y2_77,y2_78,y2_79,y2_80,y2_81,y2_82,y2_83,y2_84
24512,33v1,LMJ.RY0402.177454,Cre03.g205428,A02,0.198677,0.536579,0.147720,0.568481,0.120998,0.546400,...,0.115173,0.516082,0.537824,0.191885,0.114264,0.540000,0.525771,0.114657,0.139893,0.518483
24513,33v1,LMJ.RY0402.227435,Cre12.g538850,A03,0.225482,0.634133,0.198214,0.642996,0.201584,0.623308,...,0.172563,0.584558,0.572187,0.168729,0.160214,0.586576,0.593720,0.163300,0.162120,0.589010
24514,33v1,LMJ.RY0402.172194,"Cre16.g651450,Cre04.g217220",A04,,,,,,,...,,,,,,,,,,
24515,33v1,LMJ.RY0402.168114,Cre01.g024800,A05,0.225006,0.573435,0.178379,0.571858,0.182659,0.579151,...,0.114132,0.531785,0.522200,0.095942,0.102751,0.532994,0.537544,0.105741,0.090271,0.538769
24516,33v1,LMJ.RY0402.184490,Cre02.g076200,A06,0.164167,0.538143,0.163646,0.521753,0.128199,0.538717,...,0.098745,0.512808,0.516205,0.084114,0.099823,0.514094,0.504453,0.102330,0.097302,0.508519
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
27571,33v2,LMJ.RY0402.181834,Cre02.g078939,P20,0.236320,0.566801,0.164784,0.551795,0.140930,0.590319,...,0.146780,0.558797,0.548120,0.169082,0.101931,0.582665,0.566750,-0.006657,0.090271,0.549074
27572,33v2,LMJ.RY0402.088794,Cre12.g526850,P21,0.226727,0.563871,0.152827,0.602707,0.200727,0.586598,...,0.096900,0.611044,0.602208,0.146015,0.125264,0.573610,0.605298,0.103093,0.162677,0.576903
27573,33v2,LMJ.RY0402.218008,Cre08.g374250,P22,0.237836,0.552627,0.073897,0.541676,0.172664,0.559829,...,0.175485,0.521394,0.511183,0.102019,0.114406,0.514683,0.505914,0.072017,-0.043730,0.474971
27574,33v2,LMJ.RY0402.118852,"Cre08.g372100,Cre04.g214501",P23,0.240428,0.624774,0.136470,0.627129,0.276243,0.634087,...,0.119228,0.567982,0.551587,0.112120,0.102751,0.585524,0.582688,0.137463,0.126755,0.536614


In [21]:
plates = ['33v1', '33v2', '33v3']
phase2_33_10min_10min= phase2_df1[(phase2_df1['light_regime'] == '10min-10min') & (phase2_df1['plate'].isin(plates))].copy()
phase2_33_10min_10min[['plate', 'mutant_ID', 'mutated_genes', 'well_id'] + y2_cols]

Unnamed: 0,plate,mutant_ID,mutated_genes,well_id,y2_1,y2_2,y2_3,y2_4,y2_5,y2_6,...,y2_75,y2_76,y2_77,y2_78,y2_79,y2_80,y2_81,y2_82,y2_83,y2_84
24512,33v1,LMJ.RY0402.177454,Cre03.g205428,A02,0.188638,0.536475,0.145299,0.567457,0.115726,0.544620,...,0.110005,0.514448,0.541739,0.183801,0.105034,0.541743,0.524112,0.104114,0.129292,0.518546
24513,33v1,LMJ.RY0402.227435,Cre12.g538850,A03,0.212474,0.636216,0.193180,0.641434,0.192738,0.623804,...,0.167534,0.584159,0.572536,0.158852,0.152580,0.590038,0.594407,0.155474,0.153229,0.590714
24514,33v1,LMJ.RY0402.172194,"Cre16.g651450,Cre04.g217220",A04,,,,,,,...,,,,,,,,,,
24515,33v1,LMJ.RY0402.168114,Cre01.g024800,A05,0.212442,0.575951,0.172799,0.571651,0.174675,0.575641,...,0.109437,0.529570,0.524971,0.091714,0.095925,0.535475,0.536681,0.094696,0.082466,0.541464
24516,33v1,LMJ.RY0402.184490,Cre02.g076200,A06,0.154852,0.538233,0.158192,0.523982,0.123625,0.537121,...,0.093982,0.509881,0.517743,0.078739,0.092970,0.516193,0.504985,0.091445,0.089926,0.508870
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
27571,33v2,LMJ.RY0402.181834,Cre02.g078939,P20,0.248157,0.564367,0.170348,0.551906,0.144217,0.591434,...,0.153255,0.560771,0.545166,0.178701,0.108714,0.579657,0.567337,0.021844,0.098138,0.548124
27572,33v2,LMJ.RY0402.088794,Cre12.g526850,P21,0.239637,0.561796,0.155073,0.605066,0.209545,0.589501,...,0.101334,0.613778,0.601397,0.153440,0.134593,0.573097,0.605552,0.113960,0.171665,0.573791
27573,33v2,LMJ.RY0402.218008,Cre08.g374250,P22,0.250273,0.552186,0.081082,0.538356,0.178630,0.563975,...,0.181433,0.522335,0.509416,0.106640,0.123645,0.512554,0.505373,0.082205,-0.039612,0.469952
27574,33v2,LMJ.RY0402.118852,"Cre08.g372100,Cre04.g214501",P23,0.253732,0.625735,0.136871,0.629199,0.286822,0.633789,...,0.124535,0.570815,0.549715,0.118283,0.109575,0.581955,0.583533,0.143672,0.134567,0.534426


## plate 33 1min-1min

In [22]:
plates = ['33v1', '33v2']
y2_cols = [f'y2_{i}' for i in range(1, 89)]

# Run normalization
phase2_33_1min_1min_normalized = quantile_normalize_light_regime(
    df=phase2_df1,
    light_regime='1min-1min',
    plates=plates,
    y2_cols=y2_cols
)
phase2_33_1min_1min_normalized[['plate', 'mutant_ID', 'mutated_genes', 'well_id'] + y2_cols]

Unnamed: 0,plate,mutant_ID,mutated_genes,well_id,y2_1,y2_2,y2_3,y2_4,y2_5,y2_6,...,y2_79,y2_80,y2_81,y2_82,y2_83,y2_84,y2_85,y2_86,y2_87,y2_88
22980,33v1,LMJ.RY0402.177454,Cre03.g205428,A02,0.149300,0.537505,0.165141,0.526214,0.140654,0.519187,...,0.058842,0.485204,0.132867,0.488464,0.159586,0.490145,0.171616,0.450941,0.126555,0.484376
22981,33v1,LMJ.RY0402.227435,Cre12.g538850,A03,0.239766,0.583217,0.245381,0.567411,0.234438,0.554140,...,0.235513,0.582051,0.217694,0.533061,0.211657,0.526347,0.217965,0.520051,0.234838,0.533112
22982,33v1,LMJ.RY0402.172194,"Cre16.g651450,Cre04.g217220",A04,,,,,,,...,,,,,,,,,,
22983,33v1,LMJ.RY0402.168114,Cre01.g024800,A05,0.186334,0.553950,0.174683,0.521817,0.163127,0.509435,...,0.096800,0.494375,0.138939,0.481940,0.150364,0.519140,0.103605,0.493013,0.151423,0.494491
22984,33v1,LMJ.RY0402.184490,Cre02.g076200,A06,0.130784,0.518080,0.169035,0.523632,0.139073,0.474596,...,0.132123,0.435612,0.092976,0.475670,0.114962,0.442514,0.108727,0.430431,0.055787,0.430325
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
25656,33v2,LMJ.RY0402.181834,Cre02.g078939,P20,0.188097,0.553853,0.182207,0.479982,0.187686,0.545401,...,0.131193,0.504568,0.205576,0.526443,0.141939,0.487779,0.205263,0.520259,0.210755,0.487279
25657,33v2,LMJ.RY0402.088794,Cre12.g526850,P21,0.197133,0.528736,0.216110,0.506965,0.212793,0.530612,...,0.214043,0.522901,0.192263,0.517396,0.182329,0.529630,0.231207,0.490126,0.222143,0.529837
25658,33v2,LMJ.RY0402.218008,Cre08.g374250,P22,0.164218,0.500123,0.151179,0.513480,0.086967,0.484519,...,0.087489,0.466036,0.119931,0.451159,0.071903,0.470718,0.103503,0.455745,0.085601,0.430230
25659,33v2,LMJ.RY0402.118852,"Cre08.g372100,Cre04.g214501",P23,0.270771,0.517474,0.229384,0.487291,0.264441,0.495536,...,0.082169,0.469424,0.189369,0.509257,0.169007,0.519077,0.141990,0.494964,0.161680,0.451279


In [28]:
plates = ['33v1', '33v2', '33v3']
phase2_33_1min_1min= phase2_df1[(phase2_df1['light_regime'] == '1min-1min') & (phase2_df1['plate'].isin(plates))].copy()
phase2_33_1min_1min[['plate', 'mutant_ID', 'mutated_genes', 'well_id'] + y2_cols]

Unnamed: 0,plate,mutant_ID,mutated_genes,well_id,y2_1,y2_2,y2_3,y2_4,y2_5,y2_6,...,y2_80,y2_81,y2_82,y2_83,y2_84,y2_85,y2_86,y2_87,y2_88,y2_89
22980,33v1,LMJ.RY0402.177454,Cre03.g205428,A02,0.156407,0.550651,0.183130,0.528431,0.159135,0.527057,...,0.491784,0.139105,0.493003,0.163406,0.496298,0.178537,0.461385,0.132571,0.493216,
22981,33v1,LMJ.RY0402.227435,Cre12.g538850,A03,0.246817,0.587317,0.256333,0.567975,0.246202,0.557925,...,0.583613,0.220424,0.538117,0.214142,0.529889,0.222776,0.526620,0.241768,0.538201,
22982,33v1,LMJ.RY0402.172194,"Cre16.g651450,Cre04.g217220",A04,,,,,,,...,,,,,,,,,,
22983,33v1,LMJ.RY0402.168114,Cre01.g024800,A05,0.194227,0.562793,0.191641,0.525094,0.178403,0.520690,...,0.500997,0.145711,0.490225,0.157769,0.523754,0.115240,0.500070,0.160120,0.502885,
22984,33v1,LMJ.RY0402.184490,Cre02.g076200,A06,0.139156,0.530218,0.185255,0.526141,0.157731,0.488581,...,0.446739,0.095210,0.484994,0.127500,0.452607,0.120646,0.439234,0.062165,0.441506,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
25656,33v2,LMJ.RY0402.181834,Cre02.g078939,P20,0.180434,0.545107,0.166606,0.475098,0.173911,0.539207,...,0.497730,0.201560,0.522533,0.133572,0.482142,0.198769,0.513980,0.204759,0.478947,
25657,33v2,LMJ.RY0402.088794,Cre12.g526850,P21,0.190417,0.515623,0.204184,0.503194,0.201263,0.524727,...,0.518261,0.189344,0.512687,0.178606,0.524130,0.226255,0.484478,0.217394,0.524895,
25658,33v2,LMJ.RY0402.218008,Cre08.g374250,P22,0.156081,0.485873,0.134070,0.511095,0.072031,0.473161,...,0.458846,0.110693,0.440476,0.067785,0.462209,0.091986,0.445026,0.076176,0.419122,
25659,33v2,LMJ.RY0402.118852,"Cre08.g372100,Cre04.g214501",P23,0.267310,0.504983,0.218240,0.481934,0.253031,0.486453,...,0.463261,0.185661,0.505663,0.164310,0.514394,0.135052,0.488400,0.153544,0.440432,


## plate 33 30s-30s

In [24]:
plates = ['33v2', '33v3']
y2_cols = [f'y2_{i}' for i in range(1, 89)]

# Run normalization
phase2_33_30s_30s_normalized = quantile_normalize_light_regime(
    df=phase2_df1,
    light_regime='30s-30s',
    plates=plates,
    y2_cols=y2_cols
)
phase2_33_30s_30s_normalized[['plate', 'mutant_ID', 'mutated_genes', 'well_id'] + y2_cols]

Unnamed: 0,plate,mutant_ID,mutated_genes,well_id,y2_1,y2_2,y2_3,y2_4,y2_5,y2_6,...,y2_79,y2_80,y2_81,y2_82,y2_83,y2_84,y2_85,y2_86,y2_87,y2_88
26810,33v2,LMJ.RY0402.065793,Cre13.g588600,A02,0.177066,0.513614,0.237480,0.495210,0.186713,0.460235,...,0.160376,0.453803,0.207809,0.485753,0.205571,0.478664,0.196303,0.458238,0.236409,0.397408
26811,33v2,LMJ.RY0402.044988,Cre16.g664150,A03,0.324283,0.596032,0.364651,0.632236,0.348759,0.635915,...,0.251376,0.505907,0.249178,0.553145,0.328515,0.548017,0.335974,0.588636,0.301546,0.559624
26812,33v2,LMJ.RY0402.168539,Cre16.g693600,A04,0.352262,0.604649,0.332983,0.606152,0.325460,0.567643,...,0.265167,0.499160,0.250433,0.481803,0.232381,0.494476,0.183099,0.534623,0.214063,0.526436
26813,33v2,LMJ.RY0402.098695,Cre07.g334750,A05,0.230394,0.546382,0.189237,0.528216,0.186182,0.488065,...,0.108439,0.482193,0.126775,0.463881,0.124183,0.490822,0.125427,0.485848,0.183624,0.476499
26814,33v2,LMJ.RY0402.238883,Cre06.g256300,A06,0.255918,0.571456,0.225770,0.560801,0.267009,0.520470,...,0.257858,0.550327,0.265631,0.516871,0.280116,0.549539,0.266364,0.520361,0.239423,0.531863
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
29869,33v3,LMJ.RY0402.177485,Cre04.g215150,P20,0.262930,0.542120,0.234298,0.537313,0.258877,0.486090,...,0.230199,0.523218,0.214078,0.528250,0.239517,0.513613,0.263684,0.515313,0.222945,0.532124
29870,33v3,LMJ.RY0402.093284,"Cre11.g467768,Cre14.g617150",P21,0.297006,0.559114,0.240760,0.525613,0.305421,0.558659,...,0.285633,0.520208,0.252141,0.511596,0.306269,0.538211,0.229318,0.538070,0.190005,0.495087
29871,33v3,LMJ.RY0402.046384,Cre01.g012650,P22,0.156604,0.449343,0.139692,0.445026,0.121799,0.447676,...,0.129267,0.438419,0.181648,0.427360,0.146264,0.428047,0.143105,0.411513,0.158233,0.453908
29872,33v3,LMJ.RY0402.168539,Cre16.g693600,P23,0.251124,0.622209,0.316391,0.601336,0.267204,0.557320,...,0.246025,0.559863,0.105868,0.549145,0.332249,0.532221,0.223400,0.530918,0.177644,0.482760


In [27]:
plates = ['33v1', '33v2', '33v3']
phase2_33_30s_30s= phase2_df1[(phase2_df1['light_regime'] == '30s-30s') & (phase2_df1['plate'].isin(plates))].copy()
phase2_33_30s_30s[['plate', 'mutant_ID', 'mutated_genes', 'well_id'] + y2_cols]

Unnamed: 0,plate,mutant_ID,mutated_genes,well_id,y2_1,y2_2,y2_3,y2_4,y2_5,y2_6,...,y2_80,y2_81,y2_82,y2_83,y2_84,y2_85,y2_86,y2_87,y2_88,y2_89
26810,33v2,LMJ.RY0402.065793,Cre13.g588600,A02,0.158679,0.499843,0.208023,0.476413,0.162757,0.445955,...,0.432914,0.183804,0.467467,0.179664,0.460811,0.175086,0.440578,0.206574,0.380716,
26811,33v2,LMJ.RY0402.044988,Cre16.g664150,A03,0.311914,0.588254,0.337161,0.622330,0.326368,0.630473,...,0.485984,0.223440,0.537587,0.302018,0.533307,0.319822,0.574088,0.272913,0.538503,
26812,33v2,LMJ.RY0402.168539,Cre16.g693600,A04,0.336951,0.599042,0.312070,0.598330,0.300390,0.557345,...,0.479185,0.225225,0.464133,0.210294,0.477264,0.165353,0.516951,0.183464,0.505172,
26813,33v2,LMJ.RY0402.098695,Cre07.g334750,A05,0.208571,0.533561,0.159766,0.510540,0.162574,0.474913,...,0.461493,0.106174,0.448405,0.105007,0.473390,0.106412,0.465558,0.154139,0.454252,
26814,33v2,LMJ.RY0402.238883,Cre06.g256300,A06,0.234355,0.561184,0.195883,0.543101,0.239541,0.505931,...,0.530857,0.240881,0.501664,0.257687,0.534950,0.247916,0.501773,0.210345,0.511649,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
29869,33v3,LMJ.RY0402.177485,Cre04.g215150,P20,0.283285,0.554911,0.263001,0.554876,0.287083,0.499357,...,0.542054,0.237383,0.545449,0.263456,0.529368,0.281462,0.532828,0.254210,0.552558,
29870,33v3,LMJ.RY0402.093284,"Cre11.g467768,Cre14.g617150",P21,0.317214,0.570328,0.269589,0.544267,0.329729,0.571453,...,0.538188,0.277219,0.528496,0.332686,0.554252,0.251382,0.556550,0.218927,0.516161,
29871,33v3,LMJ.RY0402.046384,Cre01.g012650,P22,0.175968,0.463788,0.163501,0.460886,0.144529,0.462965,...,0.457424,0.207805,0.439898,0.169518,0.443017,0.162992,0.426032,0.186504,0.478073,
29872,33v3,LMJ.RY0402.168539,Cre16.g693600,P23,0.273042,0.629811,0.341522,0.611717,0.294690,0.569831,...,0.578390,0.132563,0.564007,0.358695,0.547606,0.246213,0.549436,0.206895,0.505133,


## plate 33 5min-5min

In [26]:
plates = ['33v1','33v2', '33v3']
y2_cols = [f'y2_{i}' for i in range(1, 89)]

# Run normalization
phase2_33_5min_5min_normalized = quantile_normalize_light_regime(
    df=phase2_df1,
    light_regime='5min-5min',
    plates=plates,
    y2_cols=y2_cols
)
y2_cols = [f'y2_{i}' for i in range(1, 90)]
phase2_33_5min_5min_normalized[['plate', 'mutant_ID', 'mutated_genes', 'well_id'] + y2_cols]

Unnamed: 0,plate,mutant_ID,mutated_genes,well_id,y2_1,y2_2,y2_3,y2_4,y2_5,y2_6,...,y2_80,y2_81,y2_82,y2_83,y2_84,y2_85,y2_86,y2_87,y2_88,y2_89
24895,33v1,LMJ.RY0402.177454,Cre03.g205428,A02,0.191734,0.555354,0.208050,0.554118,0.206554,0.528107,...,0.532035,0.108590,0.520437,0.191104,0.529771,0.151872,0.485406,0.141589,0.529487,
24896,33v1,LMJ.RY0402.227435,Cre12.g538850,A03,0.226532,0.628241,0.220938,0.618068,0.195701,0.600843,...,0.582163,0.162915,0.574421,0.183780,0.578783,0.205185,0.587627,0.189635,0.575679,
24897,33v1,LMJ.RY0402.172194,"Cre16.g651450,Cre04.g217220",A04,,,,,,,...,,,,,,,,,,
24898,33v1,LMJ.RY0402.168114,Cre01.g024800,A05,0.175791,0.577409,0.190942,0.556003,0.157611,0.558796,...,0.528631,0.130861,0.513891,0.132276,0.522144,0.133121,0.518434,0.108924,0.515616,
24899,33v1,LMJ.RY0402.184490,Cre02.g076200,A06,0.143525,0.526702,0.144356,0.489746,0.121033,0.521414,...,0.460251,0.054441,0.478895,0.066586,0.465794,0.091287,0.480906,0.095124,0.499448,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
30635,33v3,LMJ.RY0402.177485,Cre04.g215150,P20,0.167704,0.597442,0.174101,0.561755,0.213493,0.521207,...,0.548987,0.146384,0.537252,0.196126,0.556336,0.134918,0.586054,0.134517,0.517546,
30636,33v3,LMJ.RY0402.093284,"Cre11.g467768,Cre14.g617150",P21,0.268338,0.641214,0.296530,0.589109,0.232222,0.615680,...,0.551874,0.239458,0.579285,0.249764,0.564445,0.183603,0.578533,0.181104,0.543820,
30637,33v3,LMJ.RY0402.046384,Cre01.g012650,P22,0.157299,0.460023,0.162730,0.490231,0.086422,0.489709,...,0.442828,0.080280,0.443146,0.107427,0.434389,0.087371,0.461247,0.079928,0.427756,
30638,33v3,LMJ.RY0402.168539,Cre16.g693600,P23,0.283894,0.646135,0.326163,0.625604,0.264710,0.596620,...,0.559527,0.183189,0.539456,0.140982,0.606318,0.184159,0.580131,0.149075,0.590553,


In [29]:
plates = ['33v1', '33v2', '33v3']
phase2_33_5min_5min= phase2_df1[(phase2_df1['light_regime'] == '5min-5min') & (phase2_df1['plate'].isin(plates))].copy()
phase2_33_5min_5min[['plate', 'mutant_ID', 'mutated_genes', 'well_id'] + y2_cols]

Unnamed: 0,plate,mutant_ID,mutated_genes,well_id,y2_1,y2_2,y2_3,y2_4,y2_5,y2_6,...,y2_80,y2_81,y2_82,y2_83,y2_84,y2_85,y2_86,y2_87,y2_88,y2_89
24895,33v1,LMJ.RY0402.177454,Cre03.g205428,A02,0.175110,0.546731,0.194263,0.547397,0.188704,0.520713,...,0.517259,0.093018,0.507631,0.178261,0.516857,0.134914,0.472300,0.122711,0.519822,
24896,33v1,LMJ.RY0402.227435,Cre12.g538850,A03,0.205802,0.621277,0.207745,0.610929,0.179138,0.592029,...,0.573657,0.146833,0.562582,0.170572,0.568883,0.185903,0.576034,0.170140,0.566954,
24897,33v1,LMJ.RY0402.172194,"Cre16.g651450,Cre04.g217220",A04,,,,,,,...,,,,,,,,,,
24898,33v1,LMJ.RY0402.168114,Cre01.g024800,A05,0.157272,0.569406,0.178827,0.548628,0.143008,0.548657,...,0.514457,0.112757,0.501239,0.120711,0.511508,0.116730,0.505867,0.090972,0.504496,
24899,33v1,LMJ.RY0402.184490,Cre02.g076200,A06,0.125849,0.517042,0.131088,0.485661,0.102215,0.515213,...,0.447594,0.036707,0.466440,0.060706,0.453218,0.075065,0.468579,0.076307,0.490020,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
30635,33v3,LMJ.RY0402.177485,Cre04.g215150,P20,0.196759,0.607772,0.191063,0.570310,0.231258,0.531548,...,0.560213,0.168431,0.551704,0.207753,0.562517,0.159759,0.597868,0.158623,0.526340,
30636,33v3,LMJ.RY0402.093284,"Cre11.g467768,Cre14.g617150",P21,0.292088,0.644284,0.312481,0.600187,0.248288,0.625190,...,0.563242,0.254731,0.595036,0.261970,0.571118,0.208548,0.590865,0.202689,0.555375,
30637,33v3,LMJ.RY0402.046384,Cre01.g012650,P22,0.186202,0.481583,0.180601,0.497674,0.102089,0.504676,...,0.459803,0.097867,0.463277,0.122478,0.445639,0.112229,0.477067,0.100182,0.434193,
30638,33v3,LMJ.RY0402.168539,Cre16.g693600,P23,0.307329,0.650622,0.337946,0.634997,0.276772,0.605238,...,0.570669,0.201061,0.554695,0.154263,0.615885,0.209405,0.592266,0.170899,0.599959,


## plate 33 1min-5min

In [30]:
plates = ['33v2', '33v3']
y2_cols = [f'y2_{i}' for i in range(1, 89)]

# Run normalization
phase2_33_1min_5min_normalized = quantile_normalize_light_regime(
    df=phase2_df1,
    light_regime='1min-5min',
    plates=plates,
    y2_cols=y2_cols
)
y2_cols = [f'y2_{i}' for i in range(1, 90)]
phase2_33_1min_5min_normalized[['plate', 'mutant_ID', 'mutated_genes', 'well_id'] + y2_cols]

Unnamed: 0,plate,mutant_ID,mutated_genes,well_id,y2_1,y2_2,y2_3,y2_4,y2_5,y2_6,...,y2_80,y2_81,y2_82,y2_83,y2_84,y2_85,y2_86,y2_87,y2_88,y2_89
27576,33v2,LMJ.RY0402.065793,Cre13.g588600,A02,0.168683,0.534523,0.123931,0.519982,0.166812,0.540557,...,0.527180,0.103648,0.544425,0.106173,0.574787,0.090652,0.545323,0.080484,0.544618,
27577,33v2,LMJ.RY0402.044988,Cre16.g664150,A03,0.326148,0.678548,0.322875,0.659054,0.249563,0.657803,...,0.658957,0.255957,0.653236,0.212260,0.673080,0.228115,0.672160,0.252490,0.651288,
27578,33v2,LMJ.RY0402.168539,Cre16.g693600,A04,0.358549,0.662256,0.276447,0.652270,0.231854,0.662864,...,0.655625,0.248416,0.624494,0.199962,0.642328,0.195610,0.651008,0.190307,0.639916,
27579,33v2,LMJ.RY0402.098695,Cre07.g334750,A05,0.209126,0.592234,0.166436,0.581633,0.172778,0.581847,...,0.586414,0.141316,0.594496,0.176804,0.611664,0.180171,0.605856,0.127990,0.589423,
27580,33v2,LMJ.RY0402.238883,Cre06.g256300,A06,0.279858,0.663471,0.195221,0.635471,0.275938,0.631481,...,0.614121,0.197183,0.647862,0.173603,0.585354,0.186743,0.627816,0.203557,0.611444,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
30252,33v3,LMJ.RY0402.177485,Cre04.g215150,P20,0.206879,0.570953,0.206430,0.619871,0.180396,0.565156,...,0.610893,0.114621,0.580644,0.114801,0.569285,0.094541,0.569951,0.133866,0.569651,
30253,33v3,LMJ.RY0402.093284,"Cre11.g467768,Cre14.g617150",P21,0.286111,0.634347,0.195435,0.609112,0.160155,0.630649,...,0.561559,0.216430,0.586179,0.151975,0.584798,0.119560,0.576543,0.142242,0.584939,
30254,33v3,LMJ.RY0402.046384,Cre01.g012650,P22,0.128706,0.521509,0.044666,0.535103,0.054871,0.485588,...,0.464735,0.006315,0.470149,0.009116,0.472023,0.013592,0.523498,0.039258,0.527683,
30255,33v3,LMJ.RY0402.168539,Cre16.g693600,P23,0.341251,0.668649,0.345289,0.659876,0.303158,0.631046,...,0.622472,0.189985,0.568573,0.208761,0.634769,0.232446,0.587468,0.155869,0.632010,


In [31]:
plates = ['33v1', '33v2', '33v3']
phase2_33_1min_5min= phase2_df1[(phase2_df1['light_regime'] == '1min-5min') & (phase2_df1['plate'].isin(plates))].copy()
phase2_33_1min_5min[['plate', 'mutant_ID', 'mutated_genes', 'well_id'] + y2_cols]

Unnamed: 0,plate,mutant_ID,mutated_genes,well_id,y2_1,y2_2,y2_3,y2_4,y2_5,y2_6,...,y2_80,y2_81,y2_82,y2_83,y2_84,y2_85,y2_86,y2_87,y2_88,y2_89
27576,33v2,LMJ.RY0402.065793,Cre13.g588600,A02,0.156066,0.524207,0.113721,0.521178,0.165777,0.534620,...,0.523950,0.096576,0.541663,0.097527,0.568083,0.079127,0.538404,0.069869,0.533476,
27577,33v2,LMJ.RY0402.044988,Cre16.g664150,A03,0.322984,0.680460,0.319106,0.659125,0.250325,0.657463,...,0.657559,0.252669,0.654159,0.204689,0.668527,0.219442,0.673895,0.248676,0.648343,
27578,33v2,LMJ.RY0402.168539,Cre16.g693600,A04,0.355798,0.661282,0.269653,0.651807,0.233677,0.660257,...,0.653923,0.249002,0.624590,0.194874,0.637392,0.185712,0.650361,0.183585,0.635914,
27579,33v2,LMJ.RY0402.098695,Cre07.g334750,A05,0.199523,0.589184,0.156886,0.581503,0.169008,0.578480,...,0.582064,0.136018,0.593086,0.169737,0.607260,0.170430,0.602646,0.120288,0.582720,
27580,33v2,LMJ.RY0402.238883,Cre06.g256300,A06,0.273892,0.663177,0.186855,0.634839,0.278910,0.628357,...,0.611422,0.189270,0.648865,0.166259,0.581563,0.176378,0.626607,0.197844,0.607186,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
30252,33v3,LMJ.RY0402.177485,Cre04.g215150,P20,0.217207,0.574698,0.212783,0.621136,0.183623,0.571291,...,0.614474,0.121790,0.583156,0.122823,0.577722,0.104137,0.576323,0.141779,0.577478,
30253,33v3,LMJ.RY0402.093284,"Cre11.g467768,Cre14.g617150",P21,0.290928,0.636712,0.204212,0.610464,0.160003,0.633594,...,0.567993,0.226104,0.586683,0.159431,0.588869,0.129991,0.580223,0.150063,0.592623,
30254,33v3,LMJ.RY0402.046384,Cre01.g012650,P22,0.140043,0.524256,0.044065,0.537655,0.032451,0.485026,...,0.477255,0.011163,0.462258,0.001852,0.472043,0.022647,0.531127,0.048932,0.535142,
30255,33v3,LMJ.RY0402.168539,Cre16.g693600,P23,0.347807,0.667018,0.349508,0.659417,0.303536,0.633757,...,0.624189,0.197619,0.570200,0.215930,0.639446,0.242531,0.590516,0.163893,0.637216,


In [33]:
phase2_33_quantile1= pd.concat([
    phase2_33_20h_ML_normalized,
    phase2_33_20h_HL_normalized,
    phase2_33_2h_2h_normalized,
    phase2_33_10min_10min_normalized,
    phase2_33_1min_1min_normalized,
    phase2_33_30s_30s_normalized,
    phase2_33_5min_5min_normalized,
    phase2_33_1min_5min_normalized
], ignore_index=True)

In [36]:
phase2_33_quantile1.to_csv('phase2_33_quantile1.csv', index= False)

In [34]:
phase2_33_quantile1.shape

(7660, 818)

In [35]:
plates = ['33v1','33v2','33v3']
phase2_df1[phase2_df1['plate'].isin(plates)].shape

(7660, 818)