# Gene essentiality

In [7]:
import pandas as pd
from cobra.io.json import load_json_model
from cobra.flux_analysis import single_gene_deletion

The authors perfomed a gene essentiality analysis to find which genes are important to still enable growth of the cells. Check how  they identified them.

Check gene knockouts on COBRApy docs to identify this important genes per cell line.

Compare your results. The authors compared their results with in-vitro data for validation (this would be very nice to do).

In [2]:
model = load_json_model('./data/Recon3D.json')
model.solver = 'gurobi' #you can use cplex or glpk as you whish, but gurobi and cplex are usually faster

Set parameter Username
Set parameter LicenseID to value 2732830
Academic license - for non-commercial use only - expires 2026-11-04


In [8]:
LG = {
    "59M": pd.read_csv("./data/deletion_ACH-000520_LG.csv", index_col=0),
    "HEYA8": pd.read_csv("./data/deletion_ACH-000542_LG.csv", index_col=0)
}

HG = {
    "COV318": pd.read_csv("./data/deletion_ACH-000256_HG.csv", index_col=0),
    "CAOV3": pd.read_csv("./data/deletion_ACH-000713_HG.csv", index_col=0),
    "OAW28": pd.read_csv("./data/deletion_ACH-000116_HG.csv", index_col=0)
}

In [9]:
for group_name, group_dict in [("LG", LG), ("HG", HG)]:
    print(f"\n=== Checking {group_name} models ===")
    for cell, df in group_dict.items():
        unique_vals = df["growth"].nunique() if "growth" in df.columns else df.iloc[:,0].nunique()
        print(f"{cell}: {unique_vals} unique growth values")



=== Checking LG models ===
59M: 1 unique growth values
HEYA8: 1 unique growth values

=== Checking HG models ===
COV318: 1 unique growth values
CAOV3: 1 unique growth values
OAW28: 1 unique growth values


## Experimental Validation

In [None]:
f = pd.read_csv('data/CRISPR_gene_dependency.csv')

In [None]:
#Extract subset containing cell lines on which CBM was performed: 59m, heya8, ov56, caov3, cov318, oaw28
oc_ids = ['ACH-000520','ACH-000542','ACH-000091','ACH-000713','ACH-000256','ACH-000116'] 

indexes_to_remove = []
for n in range(len(f)):
    if f.iloc[n,0] not in oc_ids: 
        indexes_to_remove.append(n)
f_2 = f.drop(f.index[indexes_to_remove])
f_2

Unnamed: 0,DepMap_ID,A1BG (1),A1CF (29974),A2M (2),A2ML1 (144568),A3GALT2 (127550),A4GALT (53947),A4GNT (51146),AAAS (8086),AACS (65985),...,ZWILCH (55055),ZWINT (11130),ZXDA (7789),ZXDB (158586),ZXDC (79364),ZYG11A (440590),ZYG11B (79699),ZYX (7791),ZZEF1 (23140),ZZZ3 (26009)
62,ACH-000116,0.00974,0.118181,0.052278,0.005124,0.034051,0.055238,0.009203,0.300745,0.017164,...,0.204094,0.776858,0.02991,0.051818,0.012775,0.0201,0.135621,0.011016,0.09203,0.453578
148,ACH-000256,0.138922,0.02207,0.018532,0.053145,0.024841,0.03435,0.00159,0.422603,0.054672,...,0.169871,0.757895,0.084996,0.02023,0.000781,0.013533,0.010321,0.021971,0.006167,0.44927
334,ACH-000520,0.16737,0.040641,0.00815,0.006143,0.060948,0.056582,0.006833,0.190486,0.021255,...,0.432195,0.795119,0.077896,0.173668,0.04804,0.08345,0.210538,0.161016,0.054267,0.094941
346,ACH-000542,0.077545,0.030802,0.022475,0.027509,0.040512,0.029159,0.125693,0.679893,0.100139,...,0.543757,0.644974,0.016062,0.013627,0.036028,0.032089,0.21708,0.068786,0.109258,0.514024
460,ACH-000713,0.056964,0.012344,0.005115,0.007754,0.017439,0.030139,0.07287,0.081048,0.020171,...,0.161708,0.521575,0.006034,0.01005,0.026967,0.067992,0.097278,0.041282,0.024393,0.19455


In [None]:
oc_names = ['oaw28','cov318','59m','heya8','caov3']
f_2['cell_lines'] = oc_names
f_3 = f_2.transpose() # now indexes are columns
f_4 = f_3[1:]# indexing: remove first row
f_4

Unnamed: 0,62,148,334,346,460
A1BG (1),0.00974,0.138922,0.16737,0.077545,0.056964
A1CF (29974),0.118181,0.02207,0.040641,0.030802,0.012344
A2M (2),0.052278,0.018532,0.00815,0.022475,0.005115
A2ML1 (144568),0.005124,0.053145,0.006143,0.027509,0.007754
A3GALT2 (127550),0.034051,0.024841,0.060948,0.040512,0.017439
...,...,...,...,...,...
ZYG11B (79699),0.135621,0.010321,0.210538,0.21708,0.097278
ZYX (7791),0.011016,0.021971,0.161016,0.068786,0.041282
ZZEF1 (23140),0.09203,0.006167,0.054267,0.109258,0.024393
ZZZ3 (26009),0.453578,0.44927,0.094941,0.514024,0.19455


In [None]:
genes = (f_4.index).to_list()


In [None]:
gene_names = []
for n in range(len(genes)):
    gene_names.append((genes[n].split())[0])
gene_names

['A1BG',
 'A1CF',
 'A2M',
 'A2ML1',
 'A3GALT2',
 'A4GALT',
 'A4GNT',
 'AAAS',
 'AACS',
 'AADAC',
 'AADACL2',
 'AADACL3',
 'AADACL4',
 'AADAT',
 'AAGAB',
 'AAK1',
 'AAMDC',
 'AAMP',
 'AANAT',
 'AAR2',
 'AARD',
 'AARS1',
 'AARS2',
 'AARSD1',
 'AASDH',
 'AASDHPPT',
 'AASS',
 'AATF',
 'AATK',
 'ABAT',
 'ABCA1',
 'ABCA10',
 'ABCA12',
 'ABCA13',
 'ABCA2',
 'ABCA3',
 'ABCA4',
 'ABCA5',
 'ABCA6',
 'ABCA7',
 'ABCA8',
 'ABCA9',
 'ABCB1',
 'ABCB10',
 'ABCB11',
 'ABCB4',
 'ABCB5',
 'ABCB6',
 'ABCB7',
 'ABCB8',
 'ABCB9',
 'ABCC1',
 'ABCC10',
 'ABCC11',
 'ABCC12',
 'ABCC2',
 'ABCC3',
 'ABCC4',
 'ABCC5',
 'ABCC6',
 'ABCC8',
 'ABCC9',
 'ABCD1',
 'ABCD2',
 'ABCD3',
 'ABCD4',
 'ABCE1',
 'ABCF1',
 'ABCF2',
 'ABCF3',
 'ABCG1',
 'ABCG2',
 'ABCG4',
 'ABCG5',
 'ABCG8',
 'ABHD1',
 'ABHD10',
 'ABHD11',
 'ABHD12',
 'ABHD12B',
 'ABHD13',
 'ABHD14A',
 'ABHD14B',
 'ABHD15',
 'ABHD16A',
 'ABHD16B',
 'ABHD17A',
 'ABHD17B',
 'ABHD17C',
 'ABHD18',
 'ABHD2',
 'ABHD3',
 'ABHD4',
 'ABHD5',
 'ABHD6',
 'ABHD8',
 'ABI1',
 '

In [None]:
f_4['genes'] = gene_names
f_4

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  f_4['genes'] = gene_names


Unnamed: 0,62,148,334,346,460,genes
A1BG (1),0.00974,0.138922,0.16737,0.077545,0.056964,A1BG
A1CF (29974),0.118181,0.02207,0.040641,0.030802,0.012344,A1CF
A2M (2),0.052278,0.018532,0.00815,0.022475,0.005115,A2M
A2ML1 (144568),0.005124,0.053145,0.006143,0.027509,0.007754,A2ML1
A3GALT2 (127550),0.034051,0.024841,0.060948,0.040512,0.017439,A3GALT2
...,...,...,...,...,...,...
ZYG11B (79699),0.135621,0.010321,0.210538,0.21708,0.097278,ZYG11B
ZYX (7791),0.011016,0.021971,0.161016,0.068786,0.041282,ZYX
ZZEF1 (23140),0.09203,0.006167,0.054267,0.109258,0.024393,ZZEF1
ZZZ3 (26009),0.453578,0.44927,0.094941,0.514024,0.19455,ZZZ3


In [None]:
_59m_d_scores = f_4.iloc[:17386,2]
heya8_d_scores = f_4.iloc[:17386,3]
caov3_d_scores = f_4.iloc[:17386,4]
cov318_d_scores = f_4.iloc[:17386,1]
oaw28_d_scores = f_4.iloc[:17386,0]
gene_names_2 = gene_names[:17386]

In [None]:
d = pd.DataFrame()
d['_59m_d'] = _59m_d_scores
d['heya8_d'] = heya8_d_scores
d['caov3'] = caov3_d_scores
d['cov318_d'] = cov318_d_scores
d['oaw28_d'] = oaw28_d_scores
d['genes'] = gene_names_2
d = d.set_index('genes')
d

Unnamed: 0_level_0,_59m_d,heya8_d,caov3,cov318_d,oaw28_d
genes,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
A1BG,0.16737,0.077545,0.056964,0.138922,0.00974
A1CF,0.040641,0.030802,0.012344,0.02207,0.118181
A2M,0.00815,0.022475,0.005115,0.018532,0.052278
A2ML1,0.006143,0.027509,0.007754,0.053145,0.005124
A3GALT2,0.060948,0.040512,0.017439,0.024841,0.034051
...,...,...,...,...,...
ZYG11A,0.08345,0.032089,0.067992,0.013533,0.0201
ZYG11B,0.210538,0.21708,0.097278,0.010321,0.135621
ZYX,0.161016,0.068786,0.041282,0.021971,0.011016
ZZEF1,0.054267,0.109258,0.024393,0.006167,0.09203


In [None]:
d.to_csv('data/CCLE_low_high_grade_dependencies.csv')