In [2]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

from sklearn.model_selection import train_test_split

import statsmodels.formula.api as smf
from sklearn.ensemble import RandomForestClassifier
import numpy as np
from sklearn.linear_model import LinearRegression, LogisticRegression

from sklearn.preprocessing import StandardScaler
from sklearn.metrics import accuracy_score, matthews_corrcoef


from matplotlib.lines import Line2D
from matplotlib.patches import Patch

In [3]:
file_path = 'database_2025-05-18.csv'  
df = pd.read_csv(file_path, low_memory=False)

In [4]:
phase2_plates = ['99','99v1','99v3']
phase2_df= df[df['plate'].isin(phase2_plates)]

In [5]:
# drop last data point
y2_cols = [f'y2_{i}' for i in range(1, 91)] 
def drop_last_valid(row):
    valid = row[y2_cols].last_valid_index()
    # if pd.notna(row[valid]):
    if valid is not None and pd.notna(row[valid]):
        row[valid] = np.nan
    return row

phase2_df1 = phase2_df.apply(drop_last_valid, axis=1)

In [6]:
phase2_df1['plate'] = phase2_df1['plate'].replace('99', '99v2')

In [8]:
## drop phase1 data in 99
phase2_df1 = phase2_df1.copy()

y2_cols = [f'y2_{i}' for i in range(1, 91)] 

# Find the first column with null per row
phase2_df1['first_null_y2'] = phase2_df1[y2_cols].isnull().idxmax(axis=1)

# Drop rows where first null is y2_42 or y2_82
phase2_df1= phase2_df1[~phase2_df1['first_null_y2'].isin(['y2_41', 'y2_81'])]

In [10]:
mask = (phase2_df1['light_regime'] == '10min-10min') & (phase2_df1['plate'] == '99v2')

phase2_df1.loc[mask & (phase2_df1['start_date'] == '2024-05-25'), 'plate'] = '99v2_2'
phase2_df1.loc[mask & (phase2_df1['start_date'] == '2024-11-22'), 'plate'] = '99v2_1'

In [11]:
phase2_df1.shape

(8809, 727)

In [13]:

plates = ['99v1', '99v2', '99v3']
df_30v =phase2_df1[phase2_df1['plate'].isin(plates)]

group_counts = (
    df_30v.groupby(['plate', 'light_regime', 'mutant_ID'])
    .size()
    .reset_index(name='count')
)

summary = (
    group_counts.groupby(['light_regime','plate', 'count'])
    .size()
    .reset_index(name='n_mutants')
)

summary = summary.sort_values(by=['light_regime','plate', 'count'])

# Show result
summary

Unnamed: 0,light_regime,plate,count,n_mutants
0,10min-10min,99v1,383,1
1,10min-10min,99v3,382,1
2,1min-1min,99v1,383,1
3,1min-1min,99v2,383,1
4,1min-1min,99v3,382,1
5,1min-5min,99v1,383,1
6,1min-5min,99v2,383,1
7,1min-5min,99v3,382,1
8,20h_HL,99v1,383,1
9,20h_HL,99v3,382,1


In [16]:
# Get y2_ columns
y2_cols = [col for col in df.columns if col.startswith('y2_')]

# Work on all WT plates
subset = phase2_df1.copy()

# Identify first null y2_ column for each row
subset['first_null_y2'] = subset[y2_cols].isnull().idxmax(axis=1)

# Keep only rows that have at least one null in y2_ columns
subset['has_null'] = subset[y2_cols].isnull().any(axis=1)
subset = subset[subset['has_null']]

# Group by plate, light_regime, and first null column
summary = (
    subset.groupby(['plate', 'light_regime', 'first_null_y2'])
    .size()
    .reset_index(name='count')
    .sort_values(['plate', 'light_regime', 'count'], ascending=[True, True, False])
)

summary

Unnamed: 0,plate,light_regime,first_null_y2,count
1,99v1,10min-10min,y2_85,382
0,99v1,10min-10min,y2_1,1
2,99v1,1min-1min,y2_89,383
4,99v1,1min-5min,y2_89,381
3,99v1,1min-5min,y2_1,2
5,99v1,20h_HL,y2_45,383
7,99v1,20h_ML,y2_45,380
6,99v1,20h_ML,y2_1,3
9,99v1,2h-2h,y2_49,381
8,99v1,2h-2h,y2_1,2


## 99 plate 20h ML

In [17]:
from scipy import interpolate
from scipy.stats import rankdata

def normalize_quantiles(A, ties=True):
    A = np.asarray(A, dtype=np.float64)
    n_rows, n_cols = A.shape
    if n_cols == 1:
        return A.copy()

    i = np.linspace(0, 1, n_rows)
    S = np.full((n_rows, n_cols), np.nan)
    nobs = np.zeros(n_cols, dtype=int)
    sort_idx = []

    for j in range(n_cols):
        col = A[:, j]
        not_nan = ~np.isnan(col)
        x = col[not_nan]
        nobs[j] = len(x)
        sort_order = np.argsort(x)
        sorted_x = x[sort_order]

        if nobs[j] < n_rows:
            f = interpolate.interp1d(np.linspace(0, 1, nobs[j]), sorted_x,
                                     bounds_error=False, fill_value="extrapolate")
            S[:, j] = f(i)
        else:
            S[:, j] = sorted_x

        sort_idx.append(np.argsort(np.argsort(col[not_nan])))

    m = np.nanmean(S, axis=1)
    A_out = np.full_like(A, np.nan)

    for j in range(n_cols):
        col = A[:, j]
        not_nan = ~np.isnan(col)

        if ties:
            r = rankdata(col[not_nan], method='average')
            quant_pos = (r - 1) / (nobs[j] - 1)
            f = interpolate.interp1d(i, m, bounds_error=False, fill_value="extrapolate")
            A_out[not_nan, j] = f(quant_pos)
        else:
            ranks = sort_idx[j]
            A_out[not_nan, j] = m[ranks.astype(int)]

    return A_out

In [73]:
def quantile_normalize_light_regime(df, light_regime, plates, y2_cols, tie_handling=True):
    subset_df = df[(df['light_regime'] == light_regime) & (df['plate'].isin(plates))].copy()
    df_normalized = subset_df.copy()

    for timepoint in y2_cols:
        position_values = []
        valid_plate_indices = {}

        for plate in plates:
            plate_df = subset_df[subset_df['plate'] == plate].copy()

            wt_rows = plate_df[plate_df['mutant_ID'] == 'WT'].copy()
            non_wt_rows = plate_df[plate_df['mutant_ID'] != 'WT'].copy()

            wt_rows = wt_rows.sort_values(['mutant_ID', 'well_id'])
            non_wt_rows = non_wt_rows.sort_values(['mutant_ID', 'mutated_genes'])

            sorted_df = pd.concat([wt_rows, non_wt_rows], axis=0)
            values = sorted_df[timepoint].values
            index = sorted_df.index.values

            position_values.append(values)
            valid_plate_indices[plate] = index

        # Validate shape
        lengths = [len(v) for v in position_values]
        if len(set(lengths)) != 1:
            raise ValueError(f"Length mismatch at {timepoint}: {lengths}")

        matrix = np.column_stack(position_values)
        normalized_matrix = normalize_quantiles(matrix, ties=tie_handling)

        # Write back
        for col_idx, plate in enumerate(plates):
            indices = valid_plate_indices[plate]
            df_normalized.loc[indices, timepoint] = normalized_matrix[:, col_idx]

    return df_normalized

In [83]:
# Define inputs
plates = ['99v1', '99v2', '99v3']
y2_cols = [f'y2_{i}' for i in range(1, 45)]

# Run normalization
phase2_99_20h_ML_normalized = quantile_normalize_light_regime(
    df=phase2_df1,
    light_regime='20h_ML',
    plates=plates,
    y2_cols=y2_cols
)

# View a few columns
phase2_99_20h_ML_normalized[['plate', 'mutant_ID', 'mutated_genes', 'well_id'] + y2_cols]

Unnamed: 0,plate,mutant_ID,mutated_genes,well_id,y2_1,y2_2,y2_3,y2_4,y2_5,y2_6,...,y2_35,y2_36,y2_37,y2_38,y2_39,y2_40,y2_41,y2_42,y2_43,y2_44
4596,99v2,WT,,A02,0.395946,0.458715,0.431259,0.441383,0.462359,0.463824,...,0.434465,0.441521,0.433813,0.424129,0.459732,0.412107,0.435436,0.425556,0.435235,0.462957
4597,99v2,WT,,A03,0.400521,0.437912,0.436556,0.421168,0.439059,0.482322,...,0.436272,0.422604,0.423329,0.406950,0.420465,0.426990,0.417690,0.422986,0.442066,0.426282
4598,99v2,WT,,A04,0.383845,0.424522,0.434359,0.451196,0.430484,0.449514,...,0.430937,0.411337,0.421027,0.418433,0.418183,0.417599,0.416310,0.404369,0.399143,0.417182
4599,99v2,WT,,A05,0.399664,0.412371,0.435721,0.441674,0.423477,0.444598,...,0.391300,0.412572,0.394101,0.389790,0.381372,0.397775,0.399523,0.390447,0.398786,0.399561
4600,99v2,WT,,A06,0.400250,0.446534,0.442323,0.435889,0.428917,0.428473,...,0.403972,0.392939,0.398829,0.402337,0.393212,0.390589,0.401409,0.399117,0.394485,0.401988
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
65392,99v1,WT,"Cre09.g392000,Cre09.g406500,Cre02.g106400",P20,0.383382,0.444426,0.457106,0.410935,0.427216,0.447526,...,0.330216,0.419351,0.420114,0.382113,0.376643,0.388826,0.360239,0.391127,0.383291,0.386648
65393,99v1,WT,"Cre09.g392000,Cre09.g406500,Cre02.g106400",P21,0.384473,0.406081,0.441028,0.412007,0.401302,0.457748,...,0.397199,0.379543,0.357246,0.389105,0.387824,0.391758,0.371967,0.368862,0.362684,0.369061
65394,99v1,WT,"Cre09.g392000,Cre09.g406500,Cre02.g106400",P22,0.380537,0.433729,0.400139,0.414827,0.432052,0.419632,...,0.405836,0.394230,0.398572,0.393796,0.375626,0.358379,0.339321,0.415138,0.380802,0.431625
65395,99v1,WT,"Cre09.g392000,Cre09.g406500,Cre02.g106400",P23,0.431699,0.446807,0.444888,0.452344,0.461880,0.449620,...,0.422453,0.396658,0.406811,0.409140,0.421503,0.435083,0.409350,0.396158,0.397842,0.422163


In [52]:
plates = ['99v1', '99v2', '99v3']
phase2_99_20h_ML= phase2_df1[(phase2_df1['light_regime'] == '20h_ML') & (phase2_df1['plate'].isin(plates))].copy()
phase2_99_20h_ML[['plate', 'mutant_ID', 'mutated_genes', 'well_id'] + y2_cols]

Unnamed: 0,plate,mutant_ID,mutated_genes,well_id,y2_1,y2_2,y2_3,y2_4,y2_5,y2_6,...,y2_35,y2_36,y2_37,y2_38,y2_39,y2_40,y2_41,y2_42,y2_43,y2_44
4596,99v2,WT,,A02,0.403452,0.458381,0.446355,0.451893,0.462545,0.473654,...,0.437306,0.438739,0.436745,0.425182,0.460132,0.416820,0.431699,0.426660,0.434186,0.455165
4597,99v2,WT,,A03,0.406571,0.444557,0.450163,0.437717,0.446181,0.485868,...,0.439028,0.426156,0.427898,0.409835,0.429220,0.426878,0.418236,0.425053,0.437955,0.431473
4598,99v2,WT,,A04,0.394895,0.434208,0.448605,0.458544,0.439302,0.463217,...,0.435019,0.416140,0.426639,0.419435,0.427098,0.421326,0.416890,0.410006,0.406576,0.424699
4599,99v2,WT,,A05,0.406142,0.425590,0.449469,0.451979,0.435090,0.459654,...,0.404971,0.417715,0.404941,0.396745,0.398079,0.405243,0.402744,0.398921,0.406331,0.409915
4600,99v2,WT,,A06,0.406358,0.450139,0.454591,0.447797,0.438275,0.447140,...,0.414056,0.401087,0.407818,0.406559,0.407936,0.399093,0.403970,0.406643,0.402226,0.412067
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
65392,99v1,WT,"Cre09.g392000,Cre09.g406500,Cre02.g106400",P20,0.364720,0.433444,0.446292,0.399993,0.415975,0.439088,...,0.330747,0.419245,0.418152,0.380421,0.371389,0.387180,0.354626,0.386047,0.376109,0.385652
65393,99v1,WT,"Cre09.g392000,Cre09.g406500,Cre02.g106400",P21,0.366708,0.391424,0.429494,0.400389,0.384282,0.451481,...,0.397373,0.379729,0.358661,0.386191,0.381462,0.389817,0.365854,0.361340,0.356809,0.370830
65394,99v1,WT,"Cre09.g392000,Cre09.g406500,Cre02.g106400",P22,0.360958,0.420659,0.389780,0.402575,0.421138,0.411756,...,0.406826,0.394486,0.396328,0.391756,0.370473,0.359580,0.341186,0.409142,0.373238,0.431071
65395,99v1,WT,"Cre09.g392000,Cre09.g406500,Cre02.g106400",P23,0.418334,0.435465,0.434098,0.442750,0.452277,0.441625,...,0.423901,0.396018,0.404833,0.407971,0.413785,0.436093,0.402584,0.390933,0.389488,0.419402


## 99 plate 20h_HL

In [84]:
# Define inputs
plates = ['99v1', '99v3']
y2_cols = [f'y2_{i}' for i in range(1, 45)]

# Run normalization
phase2_99_20h_HL_normalized = quantile_normalize_light_regime(
    df=phase2_df1,
    light_regime='20h_HL',
    plates=plates,
    y2_cols=y2_cols
)

# View a few columns
phase2_99_20h_HL_normalized[['plate', 'mutant_ID', 'mutated_genes', 'well_id'] + y2_cols]

Unnamed: 0,plate,mutant_ID,mutated_genes,well_id,y2_1,y2_2,y2_3,y2_4,y2_5,y2_6,...,y2_35,y2_36,y2_37,y2_38,y2_39,y2_40,y2_41,y2_42,y2_43,y2_44
22980,99v1,WT,"Cre09.g392000,Cre09.g406500,Cre02.g106400",A02,0.227733,0.156949,0.275488,0.208355,0.217091,0.288140,...,0.171750,0.153272,0.168553,0.157615,0.142822,0.175983,0.167369,0.174141,0.183342,0.105016
22981,99v1,WT,"Cre09.g392000,Cre09.g406500,Cre02.g106400",A03,0.293961,0.208287,0.232045,0.273618,0.288352,0.217896,...,0.207750,0.150628,0.183110,0.113795,0.201841,0.154640,0.168062,0.108103,0.143275,0.131343
22982,99v1,WT,"Cre09.g392000,Cre09.g406500,Cre02.g106400",A04,0.196529,0.223045,0.263849,0.244969,0.140907,0.243491,...,0.131401,0.170996,0.150280,0.115324,0.121275,0.122425,0.126396,0.158191,0.143594,0.152415
22983,99v1,WT,"Cre09.g392000,Cre09.g406500,Cre02.g106400",A05,0.220471,0.180099,0.195316,0.215613,0.231598,0.290431,...,0.146328,0.183538,0.126604,0.144003,0.135325,0.125197,0.157508,0.111979,0.139288,0.140445
22984,99v1,WT,"Cre09.g392000,Cre09.g406500,Cre02.g106400",A06,0.245143,0.216371,0.229295,0.255868,0.256799,0.217103,...,0.154904,0.157866,0.133043,0.190131,0.147941,0.117884,0.150204,0.155876,0.151835,0.184703
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
29103,99v3,WT,"Cre09.g392000,Cre09.g406500,Cre02.g106400",P20,0.177426,0.197113,0.143058,0.230338,0.185146,0.228542,...,0.154045,0.124297,0.088880,0.135597,0.080778,0.150357,0.141249,0.077163,0.165975,0.043349
29104,99v3,WT,"Cre09.g392000,Cre09.g406500,Cre02.g106400",P21,0.233342,0.180099,0.170652,0.234194,0.248434,0.174348,...,0.151294,0.149218,0.189918,0.089892,0.102106,0.166587,0.134158,0.158536,0.086216,0.140992
29105,99v3,WT,"Cre09.g392000,Cre09.g406500,Cre02.g106400",P22,0.153990,0.207864,0.212530,0.187158,0.213635,0.343544,...,0.161659,0.125137,0.128797,0.203046,0.172544,0.202567,0.082322,0.182243,0.160612,0.138841
29106,99v3,WT,"Cre09.g392000,Cre09.g406500,Cre02.g106400",P23,0.238947,0.269594,0.241434,0.261594,0.293087,0.288140,...,0.145192,0.217737,0.150280,0.197185,0.201841,0.200498,0.162786,0.152079,0.220176,0.187207


In [54]:
plates = ['99v1', '99v2', '99v3']
phase2_99_20h_HL= phase2_df1[(phase2_df1['light_regime'] == '20h_HL') & (phase2_df1['plate'].isin(plates))].copy()
phase2_99_20h_HL[['plate', 'mutant_ID', 'mutated_genes', 'well_id'] + y2_cols]

Unnamed: 0,plate,mutant_ID,mutated_genes,well_id,y2_1,y2_2,y2_3,y2_4,y2_5,y2_6,...,y2_35,y2_36,y2_37,y2_38,y2_39,y2_40,y2_41,y2_42,y2_43,y2_44
22980,99v1,WT,"Cre09.g392000,Cre09.g406500,Cre02.g106400",A02,0.223692,0.150451,0.279403,0.215679,0.231034,0.299706,...,0.203820,0.181967,0.195582,0.188456,0.169538,0.201735,0.191844,0.196477,0.210617,0.131806
22981,99v1,WT,"Cre09.g392000,Cre09.g406500,Cre02.g106400",A03,0.291092,0.206437,0.237117,0.280124,0.304270,0.233735,...,0.226963,0.179368,0.212361,0.141774,0.217817,0.177966,0.191977,0.124145,0.166607,0.157615
22982,99v1,WT,"Cre09.g392000,Cre09.g406500,Cre02.g106400",A04,0.190895,0.219992,0.272971,0.253991,0.144646,0.262105,...,0.156738,0.200098,0.176370,0.143704,0.144855,0.147119,0.151827,0.179914,0.166955,0.178890
22983,99v1,WT,"Cre09.g392000,Cre09.g406500,Cre02.g106400",A05,0.217283,0.176363,0.195978,0.222856,0.246436,0.303580,...,0.173130,0.214410,0.151630,0.172509,0.160044,0.149385,0.184198,0.129462,0.162519,0.167788
22984,99v1,WT,"Cre09.g392000,Cre09.g406500,Cre02.g106400",A06,0.241095,0.214038,0.234044,0.264630,0.273593,0.232244,...,0.183726,0.187214,0.158162,0.218776,0.175351,0.142223,0.178639,0.177912,0.175340,0.203963
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
29103,99v3,WT,"Cre09.g392000,Cre09.g406500,Cre02.g106400",P20,0.182887,0.198695,0.147168,0.220024,0.174564,0.211684,...,0.125380,0.096428,0.069056,0.107113,0.056401,0.126640,0.113232,0.062233,0.138134,0.026668
29104,99v3,WT,"Cre09.g392000,Cre09.g406500,Cre02.g106400",P21,0.236611,0.183836,0.173101,0.223039,0.233664,0.162534,...,0.123246,0.120737,0.162598,0.066142,0.079219,0.140911,0.107941,0.137136,0.065446,0.113914
29105,99v3,WT,"Cre09.g392000,Cre09.g406500,Cre02.g106400",P22,0.163181,0.209844,0.209850,0.182413,0.199352,0.333134,...,0.134179,0.097590,0.103789,0.179072,0.149210,0.180913,0.061176,0.161209,0.135025,0.111902
29106,99v3,WT,"Cre09.g392000,Cre09.g406500,Cre02.g106400",P23,0.243034,0.271881,0.234719,0.252187,0.276447,0.276574,...,0.118371,0.197862,0.124189,0.171443,0.185864,0.178001,0.137160,0.131326,0.207128,0.166710


## 99 2h-2h

In [85]:
# Define inputs
plates = ['99v1', '99v2', '99v3']
y2_cols = [f'y2_{i}' for i in range(1, 49)]

# Run normalization
phase2_99_2h_2h_normalized = quantile_normalize_light_regime(
    df=phase2_df1,
    light_regime='2h-2h',
    plates=plates,
    y2_cols=y2_cols
)

# View a few columns
phase2_99_2h_2h_normalized[['plate', 'mutant_ID', 'mutated_genes', 'well_id'] + y2_cols[:10]]

Unnamed: 0,plate,mutant_ID,mutated_genes,well_id,y2_1,y2_2,y2_3,y2_4,y2_5,y2_6,y2_7,y2_8,y2_9,y2_10
103170,99v1,WT,"Cre09.g392000,Cre09.g406500,Cre02.g106400",A02,,,,,,,,,,
103171,99v1,WT,"Cre09.g392000,Cre09.g406500,Cre02.g106400",A03,0.158425,0.289999,0.384687,0.178672,0.402788,0.323224,0.148167,0.388080,0.675147,0.662015
103172,99v1,WT,"Cre09.g392000,Cre09.g406500,Cre02.g106400",A04,0.291121,0.280075,0.133536,0.239350,0.323653,0.334633,0.307456,0.337102,0.630935,0.690169
103173,99v1,WT,"Cre09.g392000,Cre09.g406500,Cre02.g106400",A05,,,,,,,,,,
103174,99v1,WT,"Cre09.g392000,Cre09.g406500,Cre02.g106400",A06,0.230304,0.225471,0.284468,0.322598,0.294424,0.290643,0.284608,0.254281,0.598083,0.613276
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
111208,99v3,WT,"Cre09.g392000,Cre09.g406500,Cre02.g106400",P20,0.233438,0.234873,0.241525,0.255709,0.277541,0.251533,0.148167,0.218046,0.568509,0.556291
111209,99v3,WT,"Cre09.g392000,Cre09.g406500,Cre02.g106400",P21,0.250182,0.250185,0.133536,0.227341,0.320908,0.300992,0.233526,0.254354,0.615140,0.583884
111210,99v3,WT,"Cre09.g392000,Cre09.g406500,Cre02.g106400",P22,0.254549,0.296875,0.296869,0.246711,0.402788,0.235917,0.265327,0.369644,0.590992,0.617263
111211,99v3,WT,"Cre09.g392000,Cre09.g406500,Cre02.g106400",P23,0.223592,0.259748,0.273237,0.265633,0.286395,0.297310,0.195880,0.228453,0.601497,0.599078


In [56]:
plates = ['99v1', '99v2', '99v3']
phase2_99_2h_2h= phase2_df1[(phase2_df1['light_regime'] == '2h-2h') & (phase2_df1['plate'].isin(plates))].copy()
phase2_99_2h_2h[['plate', 'mutant_ID', 'mutated_genes', 'well_id'] + y2_cols[:10]]

Unnamed: 0,plate,mutant_ID,mutated_genes,well_id,y2_1,y2_2,y2_3,y2_4,y2_5,y2_6,y2_7,y2_8,y2_9,y2_10
103170,99v1,WT,"Cre09.g392000,Cre09.g406500,Cre02.g106400",A02,,,,,,,,,,
103171,99v1,WT,"Cre09.g392000,Cre09.g406500,Cre02.g106400",A03,0.128414,0.287258,0.450798,0.138225,0.474914,0.318630,0.121447,0.437765,0.690907,0.670776
103172,99v1,WT,"Cre09.g392000,Cre09.g406500,Cre02.g106400",A04,0.275160,0.276654,0.088176,0.218042,0.322693,0.343155,0.305980,0.340596,0.635754,0.695876
103173,99v1,WT,"Cre09.g392000,Cre09.g406500,Cre02.g106400",A05,,,,,,,,,,
103174,99v1,WT,"Cre09.g392000,Cre09.g406500,Cre02.g106400",A06,0.206655,0.214014,0.274581,0.318951,0.286749,0.275571,0.277564,0.245363,0.600197,0.618032
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
111208,99v3,WT,"Cre09.g392000,Cre09.g406500,Cre02.g106400",P20,0.228812,0.232599,0.245646,0.263600,0.297140,0.254598,0.098762,0.211163,0.559227,0.526474
111209,99v3,WT,"Cre09.g392000,Cre09.g406500,Cre02.g106400",P21,0.250658,0.252904,0.097357,0.224036,0.343323,0.316160,0.225369,0.257107,0.620514,0.569550
111210,99v3,WT,"Cre09.g392000,Cre09.g406500,Cre02.g106400",P22,0.256325,0.303709,0.312282,0.252933,0.389179,0.233858,0.265225,0.366266,0.588438,0.617212
111211,99v3,WT,"Cre09.g392000,Cre09.g406500,Cre02.g106400",P23,0.215780,0.265076,0.285530,0.276992,0.305228,0.314181,0.178173,0.225083,0.600893,0.592223


## 99 10min-10min

In [87]:
plates = ['99v1', '99v2_1','99v2_2', '99v3']
y2_cols = [f'y2_{i}' for i in range(1, 85)]

# Run normalization
phase2_99_10min_10min_normalized = quantile_normalize_light_regime(
    df=phase2_df1,
    light_regime='10min-10min',
    plates=plates,
    y2_cols=y2_cols
)

# View a few columns
phase2_99_10min_10min_normalized[['plate', 'mutant_ID', 'mutated_genes', 'well_id'] + y2_cols]

Unnamed: 0,plate,mutant_ID,mutated_genes,well_id,y2_1,y2_2,y2_3,y2_4,y2_5,y2_6,...,y2_75,y2_76,y2_77,y2_78,y2_79,y2_80,y2_81,y2_82,y2_83,y2_84
14937,99v2_2,WT,,A02,0.221738,0.611262,0.230782,0.598887,0.246160,0.587988,...,0.183400,0.600210,0.608193,0.187202,0.213666,0.590404,0.603155,0.177763,0.182947,0.598128
14938,99v2_2,WT,,A03,0.240655,0.596470,0.184163,0.589264,0.258617,0.593946,...,0.210360,0.589197,0.589039,0.193542,0.160484,0.585217,0.580921,0.189026,0.175267,0.585428
14939,99v2_2,WT,,A04,0.245883,0.600197,0.222335,0.599651,0.248342,0.590468,...,0.183077,0.585084,0.579164,0.159363,0.172101,0.580202,0.583570,0.194446,0.152982,0.588294
14940,99v2_2,WT,,A05,0.205556,0.585050,0.213748,0.592797,0.197926,0.582116,...,0.166650,0.568843,0.578139,0.157874,0.168686,0.568143,0.574831,0.157229,0.162257,0.577867
14941,99v2_2,WT,,A06,0.214666,0.589445,0.193294,0.592935,0.232177,0.584264,...,0.172252,0.582533,0.590661,0.188363,0.194266,0.592544,0.581218,0.172923,0.163041,0.591307
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
81095,99v2_1,WT,,P20,0.266987,0.591097,0.217602,0.607903,0.247015,0.604526,...,0.165029,0.579833,0.609624,0.146665,0.157770,0.566883,0.589627,0.215531,0.188928,0.603638
81096,99v2_1,WT,,P21,0.238022,0.626327,0.193294,0.618895,0.215102,0.613092,...,0.162425,0.598394,0.597395,0.184634,0.155314,0.601654,0.580156,0.193790,0.176255,0.582157
81097,99v2_1,WT,,P22,0.198136,0.612476,0.212659,0.590376,0.230822,0.598350,...,0.166650,0.567924,0.583176,0.198232,0.185296,0.595137,0.605063,0.228083,0.172805,0.600388
81098,99v2_1,WT,,P23,0.262315,0.652084,0.206150,0.600522,0.239666,0.607162,...,0.189067,0.630075,0.588490,0.176628,0.165002,0.588680,0.599729,0.178559,0.187568,0.598309


In [86]:
plates = ['99v1', '99v2_1','99v2_2', '99v3']
y2_cols = [f'y2_{i}' for i in range(1, 86)]
phase2_99_10min_10min= phase2_df1[(phase2_df1['light_regime'] == '10min-10min') & (phase2_df1['plate'].isin(plates))].copy()
phase2_99_10min_10min[['plate', 'mutant_ID', 'mutated_genes', 'well_id'] + y2_cols]

Unnamed: 0,plate,mutant_ID,mutated_genes,well_id,y2_1,y2_2,y2_3,y2_4,y2_5,y2_6,...,y2_76,y2_77,y2_78,y2_79,y2_80,y2_81,y2_82,y2_83,y2_84,y2_85
14937,99v2_2,WT,,A02,0.230300,0.586211,0.230817,0.580000,0.241381,0.568830,...,0.576596,0.580370,0.202011,0.229165,0.572685,0.579497,0.199824,0.205752,0.577854,
14938,99v2_2,WT,,A03,0.243044,0.575280,0.201956,0.572098,0.250696,0.573949,...,0.567629,0.567134,0.205525,0.188248,0.567454,0.558765,0.208057,0.200534,0.565170,
14939,99v2_2,WT,,A04,0.246176,0.578642,0.225390,0.580353,0.241917,0.571113,...,0.563131,0.556719,0.182794,0.196261,0.562340,0.562550,0.212236,0.183594,0.568438,
14940,99v2_2,WT,,A05,0.220302,0.565284,0.220229,0.574672,0.211264,0.565222,...,0.547902,0.555643,0.181457,0.193846,0.550653,0.553273,0.185945,0.190511,0.558709,
14941,99v2_2,WT,,A06,0.225863,0.569152,0.207408,0.574695,0.231349,0.566830,...,0.560477,0.568514,0.202769,0.210309,0.574201,0.559189,0.197110,0.190767,0.571718,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
81095,99v2_1,WT,,P20,0.221731,0.602259,0.175121,0.620811,0.208346,0.615105,...,0.596135,0.622542,0.100670,0.108494,0.586022,0.610236,0.153268,0.143844,0.615513,
81096,99v2_1,WT,,P21,0.191330,0.636671,0.145076,0.630146,0.173228,0.621789,...,0.611669,0.614797,0.138879,0.106240,0.614478,0.601217,0.139777,0.132331,0.598149,
81097,99v2_1,WT,,P22,0.150407,0.623824,0.169797,0.604020,0.190256,0.609336,...,0.584601,0.602673,0.152480,0.135345,0.608539,0.621744,0.164253,0.130093,0.613851,
81098,99v2_1,WT,,P23,0.216622,0.646162,0.162957,0.614250,0.200008,0.617540,...,0.630610,0.607593,0.132231,0.115541,0.603222,0.618723,0.125772,0.142611,0.611616,


## 99 5min-5min

In [88]:
plates = ['99v1', '99v3']
y2_cols = [f'y2_{i}' for i in range(1, 89)]

# Run normalization
phase2_99_5min_5min_normalized = quantile_normalize_light_regime(
    df=phase2_df1,
    light_regime='5min-5min',
    plates=plates,
    y2_cols=y2_cols
)

# View a few columns
phase2_99_5min_5min_normalized[['plate', 'mutant_ID', 'mutated_genes', 'well_id'] + y2_cols]

Unnamed: 0,plate,mutant_ID,mutated_genes,well_id,y2_1,y2_2,y2_3,y2_4,y2_5,y2_6,...,y2_79,y2_80,y2_81,y2_82,y2_83,y2_84,y2_85,y2_86,y2_87,y2_88
1915,99v1,WT,"Cre09.g392000,Cre09.g406500,Cre02.g106400",A02,0.201527,0.613208,0.210021,0.596459,0.209403,0.570428,...,0.145874,0.555471,0.145607,0.562329,0.088512,0.567975,0.128579,0.575302,0.088954,0.544614
1916,99v1,WT,"Cre09.g392000,Cre09.g406500,Cre02.g106400",A03,0.202560,0.618160,0.220707,0.614432,0.162569,0.580081,...,0.132067,0.599380,0.114299,0.566579,0.083128,0.569391,0.086007,0.567587,0.110298,0.566066
1917,99v1,WT,"Cre09.g392000,Cre09.g406500,Cre02.g106400",A04,0.218712,0.589313,0.195667,0.577464,0.199060,0.565471,...,0.115733,0.583109,0.126911,0.563317,0.080002,0.567341,0.139651,0.559848,0.117158,0.581420
1918,99v1,WT,"Cre09.g392000,Cre09.g406500,Cre02.g106400",A05,0.222428,0.616181,0.170379,0.605413,0.145538,0.591351,...,0.105432,0.555289,0.137105,0.591590,0.126733,0.551006,0.126695,0.558061,0.110634,0.548610
1919,99v1,WT,"Cre09.g392000,Cre09.g406500,Cre02.g106400",A06,0.186464,0.632140,0.192082,0.602689,0.226035,0.605452,...,0.129803,0.574085,0.100779,0.565936,0.121237,0.563907,0.109801,0.573187,0.161852,0.564224
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
111926,99v3,WT,"Cre09.g392000,Cre09.g406500,Cre02.g106400",P20,0.216187,0.574292,0.209063,0.560896,0.149038,0.570643,...,0.127278,0.518862,0.135671,0.511455,0.100158,0.546058,0.123681,0.527393,0.128692,0.528779
111927,99v3,WT,"Cre09.g392000,Cre09.g406500,Cre02.g106400",P21,0.201856,0.600112,0.228537,0.592337,0.207602,0.602545,...,0.112430,0.545203,0.081666,0.548752,0.117385,0.556202,0.114143,0.538410,0.111082,0.536315
111928,99v3,WT,"Cre09.g392000,Cre09.g406500,Cre02.g106400",P22,0.197394,0.589706,0.192653,0.584983,0.200956,0.574468,...,0.093929,0.531082,0.114576,0.547212,0.134587,0.580023,0.064413,0.543471,0.102006,0.536972
111929,99v3,WT,"Cre09.g392000,Cre09.g406500,Cre02.g106400",P23,0.204313,0.629012,0.200759,0.607914,0.170061,0.609451,...,0.138331,0.586118,0.125511,0.576066,0.118792,0.582347,0.131305,0.586215,0.122616,0.556035


In [69]:
plates = ['99v1', '99v2', '99v3']
y2_cols = [f'y2_{i}' for i in range(1, 89)]
phase2_99_5min_5min= phase2_df1[(phase2_df1['light_regime'] == '5min-5min') & (phase2_df1['plate'].isin(plates))].copy()
phase2_99_5min_5min[['plate', 'mutant_ID', 'mutated_genes', 'well_id'] + y2_cols]

Unnamed: 0,plate,mutant_ID,mutated_genes,well_id,y2_1,y2_2,y2_3,y2_4,y2_5,y2_6,...,y2_79,y2_80,y2_81,y2_82,y2_83,y2_84,y2_85,y2_86,y2_87,y2_88
1915,99v1,WT,"Cre09.g392000,Cre09.g406500,Cre02.g106400",A02,0.190647,0.617540,0.198424,0.601937,0.194258,0.572529,...,0.143888,0.561096,0.147433,0.562324,0.080917,0.572402,0.125075,0.581075,0.082306,0.548997
1916,99v1,WT,"Cre09.g392000,Cre09.g406500,Cre02.g106400",A03,0.191338,0.622610,0.206715,0.618540,0.149968,0.581675,...,0.128784,0.600096,0.114606,0.566284,0.075300,0.573925,0.080028,0.573264,0.105478,0.570385
1917,99v1,WT,"Cre09.g392000,Cre09.g406500,Cre02.g106400",A04,0.207064,0.596410,0.182301,0.581565,0.185519,0.568869,...,0.111504,0.587597,0.129830,0.563093,0.072907,0.571780,0.136522,0.565099,0.111975,0.586423
1918,99v1,WT,"Cre09.g392000,Cre09.g406500,Cre02.g106400",A05,0.210743,0.620200,0.158423,0.610157,0.131814,0.594201,...,0.099267,0.560894,0.139403,0.591845,0.123914,0.554444,0.123113,0.563273,0.105826,0.552662
1919,99v1,WT,"Cre09.g392000,Cre09.g406500,Cre02.g106400",A06,0.173941,0.634943,0.179336,0.607721,0.212796,0.606441,...,0.127207,0.578861,0.100190,0.565716,0.118084,0.567632,0.106516,0.578418,0.158164,0.568581
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
111926,99v3,WT,"Cre09.g392000,Cre09.g406500,Cre02.g106400",P20,0.226441,0.559597,0.221269,0.556422,0.161216,0.568696,...,0.129905,0.504236,0.132920,0.502840,0.106688,0.541202,0.127401,0.519853,0.132965,0.521889
111927,99v3,WT,"Cre09.g392000,Cre09.g406500,Cre02.g106400",P21,0.212853,0.595234,0.243308,0.586928,0.222713,0.601182,...,0.117031,0.539050,0.083179,0.547626,0.122464,0.551718,0.117690,0.533224,0.115861,0.531716
111928,99v3,WT,"Cre09.g392000,Cre09.g406500,Cre02.g106400",P22,0.207505,0.582928,0.205530,0.579287,0.215237,0.573321,...,0.100957,0.523013,0.114054,0.545607,0.137307,0.576447,0.066795,0.537882,0.107096,0.532412
111929,99v3,WT,"Cre09.g392000,Cre09.g406500,Cre02.g106400",P23,0.215470,0.625658,0.213498,0.602891,0.182054,0.608078,...,0.140444,0.582572,0.123115,0.575228,0.123176,0.578813,0.134985,0.580583,0.126790,0.552123


## 99 1min-5min

In [89]:
plates = ['99v1','99v2', '99v3']
y2_cols = [f'y2_{i}' for i in range(1, 89)]

# Run normalization
phase2_99_1min_5min_normalized = quantile_normalize_light_regime(
    df=phase2_df1,
    light_regime='1min-5min',
    plates=plates,
    y2_cols=y2_cols
)

# View a few columns
phase2_99_1min_5min_normalized[['plate', 'mutant_ID', 'mutated_genes', 'well_id'] + y2_cols]

Unnamed: 0,plate,mutant_ID,mutated_genes,well_id,y2_1,y2_2,y2_3,y2_4,y2_5,y2_6,...,y2_79,y2_80,y2_81,y2_82,y2_83,y2_84,y2_85,y2_86,y2_87,y2_88
73823,99v1,WT,"Cre09.g392000,Cre09.g406500,Cre02.g106400",A02,0.206692,0.603613,0.197244,0.590120,0.174082,0.584799,...,0.162868,0.591215,0.141781,0.565442,0.154426,0.573423,0.185880,0.555043,0.146450,0.590139
73824,99v1,WT,"Cre09.g392000,Cre09.g406500,Cre02.g106400",A03,0.240287,0.614567,0.234713,0.629063,0.214254,0.609945,...,0.173488,0.600924,0.164372,0.597564,0.189566,0.591688,0.139741,0.540954,0.150178,0.604586
73825,99v1,WT,"Cre09.g392000,Cre09.g406500,Cre02.g106400",A04,0.273506,0.617586,0.242326,0.627343,0.251313,0.616711,...,0.147536,0.572699,0.109095,0.580767,0.186663,0.576934,0.180161,0.622464,0.161121,0.578498
73826,99v1,WT,"Cre09.g392000,Cre09.g406500,Cre02.g106400",A05,0.226872,0.624183,0.203967,0.607969,0.218251,0.610126,...,0.133590,0.605723,0.146816,0.588969,0.172004,0.605923,0.180833,0.589280,0.168414,0.598917
73827,99v1,WT,"Cre09.g392000,Cre09.g406500,Cre02.g106400",A06,0.230715,0.615225,0.244470,0.635145,0.199110,0.617592,...,0.120409,0.615005,0.177312,0.610926,0.185527,0.603864,0.133559,0.607075,0.171209,0.590825
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
92489,99v3,WT,"Cre09.g392000,Cre09.g406500,Cre02.g106400",P20,0.219029,0.602396,0.198746,0.561249,0.218318,0.562321,...,0.086489,0.563211,0.121702,0.544036,0.146394,0.557812,0.107304,0.548236,0.167934,0.573642
92490,99v3,WT,"Cre09.g392000,Cre09.g406500,Cre02.g106400",P21,0.274922,0.604591,0.224866,0.591066,0.241006,0.603719,...,0.143174,0.592178,0.175386,0.601221,0.180266,0.597309,0.169676,0.615209,0.143225,0.585270
92491,99v3,WT,"Cre09.g392000,Cre09.g406500,Cre02.g106400",P22,0.258804,0.601669,0.220842,0.614353,0.269906,0.595753,...,0.133795,0.592614,0.183072,0.587076,0.151192,0.592799,0.164473,0.588011,0.106073,0.600177
92492,99v3,WT,"Cre09.g392000,Cre09.g406500,Cre02.g106400",P23,0.274353,0.648643,0.243106,0.641830,0.264088,0.649679,...,0.185267,0.630612,0.153416,0.598311,0.169352,0.613847,0.124680,0.625069,0.180764,0.608165


In [90]:
plates = ['99v1', '99v2', '99v3']
y2_cols = [f'y2_{i}' for i in range(1, 89)]
phase2_99_1min_5min= phase2_df1[(phase2_df1['light_regime'] == '1min-5min') & (phase2_df1['plate'].isin(plates))].copy()
phase2_99_1min_5min[['plate', 'mutant_ID', 'mutated_genes', 'well_id'] + y2_cols]

Unnamed: 0,plate,mutant_ID,mutated_genes,well_id,y2_1,y2_2,y2_3,y2_4,y2_5,y2_6,...,y2_79,y2_80,y2_81,y2_82,y2_83,y2_84,y2_85,y2_86,y2_87,y2_88
73823,99v1,WT,"Cre09.g392000,Cre09.g406500,Cre02.g106400",A02,0.244237,0.616000,0.228961,0.598000,0.210520,0.593200,...,0.198987,0.602104,0.180279,0.584010,0.189989,0.587021,0.224178,0.576082,0.186390,0.605154
73824,99v1,WT,"Cre09.g392000,Cre09.g406500,Cre02.g106400",A03,0.272145,0.624415,0.268848,0.632193,0.249163,0.618226,...,0.210153,0.611501,0.203103,0.611096,0.229685,0.600804,0.177623,0.564027,0.190366,0.619733
73825,99v1,WT,"Cre09.g392000,Cre09.g406500,Cre02.g106400",A04,0.308494,0.626886,0.276824,0.630235,0.294680,0.624927,...,0.181397,0.587652,0.150512,0.595162,0.224464,0.590282,0.218063,0.642868,0.200722,0.594572
73826,99v1,WT,"Cre09.g392000,Cre09.g406500,Cre02.g106400",A05,0.260553,0.631662,0.233549,0.611139,0.252836,0.618376,...,0.168595,0.616070,0.184885,0.602784,0.206794,0.615792,0.218515,0.602380,0.209026,0.613940
73827,99v1,WT,"Cre09.g392000,Cre09.g406500,Cre02.g106400",A06,0.262911,0.624653,0.278540,0.638153,0.234040,0.626041,...,0.158375,0.628267,0.219041,0.626613,0.222952,0.613086,0.173067,0.620335,0.212106,0.605764
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
92489,99v3,WT,"Cre09.g392000,Cre09.g406500,Cre02.g106400",P20,0.154492,0.609871,0.149423,0.564580,0.169085,0.574532,...,0.016109,0.569354,0.046175,0.540589,0.069150,0.555615,0.025101,0.540081,0.104758,0.577031
92490,99v3,WT,"Cre09.g392000,Cre09.g406500,Cre02.g106400",P21,0.217137,0.612801,0.175259,0.602197,0.193539,0.623012,...,0.073569,0.603181,0.106953,0.612268,0.111068,0.606737,0.097843,0.626844,0.075086,0.591459
92491,99v3,WT,"Cre09.g392000,Cre09.g406500,Cre02.g106400",P22,0.198408,0.609325,0.172206,0.628177,0.212200,0.615088,...,0.062993,0.603893,0.114429,0.597439,0.075935,0.601691,0.091782,0.598887,0.033338,0.607048
92492,99v3,WT,"Cre09.g392000,Cre09.g406500,Cre02.g106400",P23,0.216647,0.659097,0.197939,0.654461,0.207643,0.662598,...,0.119710,0.639304,0.082944,0.608651,0.099829,0.623926,0.044242,0.634865,0.118984,0.617023


## 99 1min-1min

In [91]:
plates = ['99v1','99v2', '99v3']
y2_cols = [f'y2_{i}' for i in range(1, 89)]

# Run normalization
phase2_99_1min_1min_normalized = quantile_normalize_light_regime(
    df=phase2_df1,
    light_regime='1min-1min',
    plates=plates,
    y2_cols=y2_cols
)

# View a few columns
phase2_99_1min_1min_normalized[['plate', 'mutant_ID', 'mutated_genes', 'well_id'] + y2_cols]

Unnamed: 0,plate,mutant_ID,mutated_genes,well_id,y2_1,y2_2,y2_3,y2_4,y2_5,y2_6,...,y2_79,y2_80,y2_81,y2_82,y2_83,y2_84,y2_85,y2_86,y2_87,y2_88
18767,99v3,WT,"Cre09.g392000,Cre09.g406500,Cre02.g106400",A02,0.276723,0.570942,0.247386,0.530161,0.261636,0.558337,...,0.218774,0.551678,0.191344,0.511628,0.222326,0.541171,0.214269,0.539996,0.257315,0.525944
18768,99v3,WT,"Cre09.g392000,Cre09.g406500,Cre02.g106400",A03,0.303614,0.591586,0.293911,0.579835,0.295312,0.551625,...,0.193050,0.538789,0.232996,0.530719,0.224822,0.530076,0.227987,0.544566,0.207679,0.547965
18769,99v3,WT,"Cre09.g392000,Cre09.g406500,Cre02.g106400",A04,0.266544,0.563614,0.270514,0.560817,0.277670,0.548670,...,0.220560,0.528575,0.204333,0.528308,0.176032,0.520441,0.199539,0.513455,0.236393,0.524954
18770,99v3,WT,"Cre09.g392000,Cre09.g406500,Cre02.g106400",A05,0.235442,0.573964,0.216853,0.572884,0.244905,0.556473,...,0.154392,0.476099,0.209720,0.537239,0.164719,0.500381,0.200430,0.497804,0.208454,0.520394
18771,99v3,WT,"Cre09.g392000,Cre09.g406500,Cre02.g106400",A06,0.264413,0.574167,0.242834,0.557126,0.296377,0.532207,...,0.203059,0.498779,0.201181,0.522669,0.202120,0.502456,0.234132,0.530583,0.170377,0.491571
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
90191,99v2,WT,,P20,0.227497,0.551067,0.233223,0.530161,0.227185,0.539563,...,0.154597,0.476099,0.186697,0.476506,0.177745,0.478480,0.144245,0.487221,0.163501,0.482398
90192,99v2,WT,,P21,0.234054,0.582390,0.233887,0.535544,0.241593,0.543335,...,0.191042,0.483054,0.218545,0.493827,0.192355,0.468159,0.179477,0.496139,0.201384,0.495831
90193,99v2,WT,,P22,0.261817,0.586243,0.240793,0.560372,0.271682,0.572301,...,0.215438,0.536402,0.241730,0.545212,0.222623,0.534747,0.197628,0.513373,0.210599,0.519344
90194,99v2,WT,,P23,0.263551,0.587161,0.273056,0.574668,0.247436,0.575085,...,0.243242,0.523039,0.210362,0.523779,0.211236,0.519395,0.227633,0.525065,0.211770,0.522973


In [92]:
plates = ['99v1', '99v2', '99v3']
y2_cols = [f'y2_{i}' for i in range(1, 89)]
phase2_99_1min_1min= phase2_df1[(phase2_df1['light_regime'] == '1min-1min') & (phase2_df1['plate'].isin(plates))].copy()
phase2_99_1min_1min[['plate', 'mutant_ID', 'mutated_genes', 'well_id'] + y2_cols]

Unnamed: 0,plate,mutant_ID,mutated_genes,well_id,y2_1,y2_2,y2_3,y2_4,y2_5,y2_6,...,y2_79,y2_80,y2_81,y2_82,y2_83,y2_84,y2_85,y2_86,y2_87,y2_88
18767,99v3,WT,"Cre09.g392000,Cre09.g406500,Cre02.g106400",A02,0.264551,0.570967,0.239774,0.526250,0.246456,0.561221,...,0.197089,0.551569,0.159307,0.496335,0.199378,0.534717,0.191272,0.536017,0.223950,0.518743
18768,99v3,WT,"Cre09.g392000,Cre09.g406500,Cre02.g106400",A03,0.294769,0.596971,0.290169,0.583798,0.281068,0.551924,...,0.166880,0.535127,0.208458,0.520417,0.201910,0.522696,0.207791,0.544026,0.169787,0.547799
18769,99v3,WT,"Cre09.g392000,Cre09.g406500,Cre02.g106400",A04,0.251932,0.562610,0.265719,0.560813,0.263428,0.548048,...,0.198527,0.522508,0.174997,0.517428,0.140762,0.510780,0.176177,0.503382,0.202838,0.517477
18770,99v3,WT,"Cre09.g392000,Cre09.g406500,Cre02.g106400",A05,0.214353,0.575144,0.199836,0.575340,0.226656,0.558710,...,0.116656,0.459644,0.180482,0.527319,0.127954,0.487491,0.176844,0.485770,0.170408,0.511400
18771,99v3,WT,"Cre09.g392000,Cre09.g406500,Cre02.g106400",A06,0.249242,0.575305,0.233352,0.557543,0.283715,0.526833,...,0.177197,0.488413,0.170534,0.509013,0.175202,0.489087,0.211696,0.524634,0.124950,0.479536
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
90191,99v2,WT,,P20,0.253484,0.549079,0.244806,0.527783,0.241559,0.528904,...,0.184919,0.481055,0.213759,0.486971,0.202037,0.483786,0.182755,0.482621,0.207548,0.481166
90192,99v2,WT,,P21,0.257681,0.570812,0.245223,0.532390,0.251401,0.531367,...,0.206135,0.483560,0.235085,0.496231,0.210559,0.478612,0.204260,0.489175,0.231119,0.489777
90193,99v2,WT,,P22,0.276893,0.573141,0.249415,0.549793,0.272008,0.550382,...,0.222168,0.518792,0.252931,0.528725,0.230217,0.521148,0.215668,0.502041,0.236641,0.507316
90194,99v2,WT,,P23,0.277850,0.574023,0.269821,0.557518,0.254883,0.551528,...,0.238875,0.509349,0.230207,0.514756,0.222052,0.510701,0.237466,0.508879,0.237413,0.509298


## 99 30s-30s

In [93]:
plates = ['99v1','99v2', '99v3']
y2_cols = [f'y2_{i}' for i in range(1, 89)]

# Run normalization
phase2_99_30s_30s_normalized = quantile_normalize_light_regime(
    df=phase2_df1,
    light_regime='30s-30s',
    plates=plates,
    y2_cols=y2_cols
)

# View a few columns
phase2_99_30s_30s_normalized[['plate', 'mutant_ID', 'mutated_genes', 'well_id'] + y2_cols]

Unnamed: 0,plate,mutant_ID,mutated_genes,well_id,y2_1,y2_2,y2_3,y2_4,y2_5,y2_6,...,y2_79,y2_80,y2_81,y2_82,y2_83,y2_84,y2_85,y2_86,y2_87,y2_88
64248,99v2,WT,,A02,0.277805,0.562827,0.264473,0.541762,0.282163,0.553062,...,0.274138,0.526372,0.275010,0.511500,0.276162,0.531865,0.267551,0.543991,0.259284,0.542295
64249,99v2,WT,,A03,0.242030,0.553360,0.263181,0.534073,0.261607,0.539435,...,0.241979,0.505997,0.249168,0.496919,0.259219,0.497258,0.234394,0.516725,0.232904,0.532120
64250,99v2,WT,,A04,0.268252,0.549185,0.274796,0.549844,0.268714,0.531225,...,0.202399,0.492904,0.235565,0.483083,0.179299,0.476806,0.209723,0.493492,0.192402,0.476078
64251,99v2,WT,,A05,0.258248,0.547512,0.269089,0.533921,0.266186,0.528924,...,0.203437,0.473973,0.208724,0.471566,0.213617,0.488903,0.205783,0.494020,0.182906,0.491192
64252,99v2,WT,,A06,0.226427,0.553593,0.251804,0.539441,0.263681,0.538931,...,0.219966,0.490935,0.216109,0.502797,0.216184,0.489870,0.219936,0.497630,0.220110,0.492629
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
71137,99v3,WT,"Cre09.g392000,Cre09.g406500,Cre02.g106400",P20,0.239109,0.534561,0.213346,0.534073,0.252233,0.558226,...,0.180331,0.478837,0.136196,0.476793,0.164860,0.472720,0.174343,0.462729,0.135955,0.466799
71138,99v3,WT,"Cre09.g392000,Cre09.g406500,Cre02.g106400",P21,0.275809,0.546987,0.271342,0.525303,0.229946,0.544919,...,0.184768,0.498834,0.243036,0.486051,0.181236,0.501730,0.176012,0.496307,0.166575,0.485730
71139,99v3,WT,"Cre09.g392000,Cre09.g406500,Cre02.g106400",P22,0.194016,0.560772,0.267330,0.517608,0.278332,0.569440,...,0.198205,0.480578,0.232050,0.499104,0.183400,0.496560,0.215953,0.531194,0.184536,0.516463
71140,99v3,WT,"Cre09.g392000,Cre09.g406500,Cre02.g106400",P23,0.293885,0.543905,0.291495,0.552917,0.232345,0.513677,...,0.272683,0.536088,0.240727,0.498901,0.269817,0.537293,0.241264,0.532905,0.277678,0.510910


In [94]:
plates = ['99v1', '99v2', '99v3']
y2_cols = [f'y2_{i}' for i in range(1, 89)]
phase2_99_30s_30s= phase2_df1[(phase2_df1['light_regime'] == '30s-30s') & (phase2_df1['plate'].isin(plates))].copy()
phase2_99_30s_30s[['plate', 'mutant_ID', 'mutated_genes', 'well_id'] + y2_cols]

Unnamed: 0,plate,mutant_ID,mutated_genes,well_id,y2_1,y2_2,y2_3,y2_4,y2_5,y2_6,...,y2_79,y2_80,y2_81,y2_82,y2_83,y2_84,y2_85,y2_86,y2_87,y2_88
64248,99v2,WT,,A02,0.288173,0.546189,0.274986,0.533421,0.287106,0.534634,...,0.268977,0.491739,0.261998,0.487974,0.274629,0.501902,0.256919,0.520069,0.258011,0.515691
64249,99v2,WT,,A03,0.259196,0.538272,0.274464,0.527709,0.274980,0.526115,...,0.244350,0.479122,0.243354,0.478092,0.260782,0.475815,0.235075,0.500202,0.240203,0.507053
64250,99v2,WT,,A04,0.281197,0.535489,0.282168,0.538891,0.278914,0.519756,...,0.219680,0.470734,0.233978,0.467684,0.208421,0.461900,0.220071,0.484832,0.215932,0.472524
64251,99v2,WT,,A05,0.273729,0.534211,0.278432,0.527616,0.277945,0.518031,...,0.220485,0.457156,0.217490,0.460752,0.231100,0.470464,0.218353,0.485069,0.209380,0.481782
64252,99v2,WT,,A06,0.244931,0.538567,0.266312,0.531335,0.276181,0.525705,...,0.230265,0.468882,0.222417,0.481704,0.232484,0.471071,0.226659,0.487441,0.232206,0.482555
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
71137,99v3,WT,"Cre09.g392000,Cre09.g406500,Cre02.g106400",P20,0.210527,0.525221,0.167321,0.524872,0.220568,0.557301,...,0.146047,0.467223,0.111934,0.468372,0.126424,0.457223,0.144459,0.445111,0.103434,0.443350
71138,99v3,WT,"Cre09.g392000,Cre09.g406500,Cre02.g106400",P21,0.252564,0.542258,0.246072,0.513800,0.185393,0.541582,...,0.153103,0.492819,0.235041,0.479180,0.152237,0.494050,0.147327,0.487653,0.134039,0.469781
71139,99v3,WT,"Cre09.g392000,Cre09.g406500,Cre02.g106400",P22,0.168596,0.558241,0.242333,0.506077,0.253710,0.571134,...,0.169332,0.470122,0.220537,0.493196,0.154552,0.488056,0.193979,0.531559,0.149500,0.507819
71140,99v3,WT,"Cre09.g392000,Cre09.g406500,Cre02.g106400",P23,0.274738,0.538040,0.274593,0.547774,0.188885,0.506852,...,0.260938,0.542356,0.232751,0.492963,0.264502,0.542598,0.226762,0.533275,0.269833,0.500218


In [95]:
phase2_99_quantile1= pd.concat([
    phase2_99_20h_ML_normalized,
    phase2_99_20h_HL_normalized,
    phase2_99_2h_2h_normalized,
    phase2_99_10min_10min_normalized,
    phase2_99_1min_1min_normalized,
    phase2_99_30s_30s_normalized,
    phase2_99_5min_5min_normalized,
    phase2_99_1min_5min_normalized
], ignore_index=True)

In [96]:
phase2_99_quantile1.to_csv('phase2_99_qn(5.18).csv',index=False)

In [97]:
phase2_99_quantile1.shape

(8809, 727)

In [98]:
## lack 10min-10min 30v2, part of 30v3
plates = ['99v1', '99v2','99v2_1','99v2_2','99v3']
data=phase2_df1[(phase2_df1['plate'].isin(plates))]
data.shape

(8809, 727)