In [1]:
###
# This is a python notebook that is used to calculate the genetic interaction score of normalized OD data
# Prepared by: Myra Paz Masinas, Boone Lab, University of Toronto, November 2023
#
# This executes procedure # 71 from the paper: Halder et. al. Design, execution, and analysis of CRISPR–Cas9-based 
# deletions and genetic interaction networks in the fungal pathogen Candida albicans. 
# Nat Protoc 14, 955–975 (2019). https://doi.org/10.1038/s41596-018-0122-6
###

In [2]:
import pandas as pd
import numpy as np

### user-defined variables - update this section as needed

In [3]:
cdn = '15C'  # which condition to process - Change
num_reps = 3  # number of replicates - Change

main_dir = '/Users/violahalder/Desktop/GitHub_GI_Analysis'  # main directory path - Change
gene_path = f'{main_dir}/files/geneNames.csv'  # path to geneNames.csv file - DO NOT CHANGE
norm_path = f'{main_dir}/output/allData.csv'  # path to normalized allData.csv file - DO NOT CHANGE
outpath = f'{main_dir}/output/{cdn}_fitness_and_eps.xlsx'  # path to expected output file - DO NOT CHANGE

### get gene list

In [4]:
df_genes = pd.read_csv(gene_path, header=None)
genes = list(df_genes[1].values)

In [5]:
lower_matrix = []
single_genes = []
combo_genes = []
for idx, gene in enumerate(genes):
    lower_matrix.append(gene)
    single_genes.append(gene)
    
    for i in range(idx, -1, -1):
        other_gene = genes[i]
        if other_gene != gene:
            lower_matrix.append(f'{gene}{other_gene}')
            combo_genes.append(f'{gene}{other_gene}')

### load normalized values

In [6]:
df_norm = pd.read_csv(norm_path)
df_norm.rename(columns={'Unnamed: 0': 'Genes'}, inplace=True)
df_norm[:2]

Unnamed: 0,Genes,15C_R1_lower,15C_R1_upper,15C_R2_lower,15C_R2_upper,15C_R3_lower,15C_R3_upper,No Treatment_R1_lower,No Treatment_R1_upper,No Treatment_R2_lower,No Treatment_R2_upper,No Treatment_R3_lower,No Treatment_R3_upper
0,CAT1,1.427808,1.427808,1.379058,1.379058,1.347524,1.347524,1.023405,1.023405,0.946744,0.946744,1.008417,1.008417
1,C7_00810W_A,0.013299,0.013299,0.234342,0.234342,0.108178,0.108178,1.080044,1.080044,1.000714,1.000714,1.001256,1.001256


### compile fitness scores and calculate GI scores for each gene pair and replicate

In [7]:
final_cols = ['Gene_X', 'Gene_Y', 'Fitness_X', 'Fitness_Y', 'Fitness_XY', 'Fitness_YX', 'EPS_XY', 'EPS_YX']
final_data = {}
rep_data = {}

for rep in range(1, num_reps+1):
    final_data[f'R{rep}'] = {c: [] for c in final_cols}
    
    for gene_xy in lower_matrix:
        if gene_xy in combo_genes:
            for g in single_genes:
                if gene_xy.startswith(g):
                    gene_x = g
                    gene_y = gene_xy.split(g)[1]

            # lower matrix: XY
            fit_x =  df_norm[df_norm['Genes']==gene_x][f'{cdn}_R{rep}_lower'].values[0]
            fit_y =  df_norm[df_norm['Genes']==gene_y][f'{cdn}_R{rep}_lower'].values[0]
            fit_xy = df_norm[df_norm['Genes']==gene_xy][f'{cdn}_R{rep}_lower'].values[0]
            eps_xy = fit_xy - (fit_x * fit_y)

            # upper matrix: YX
            fit_yx = df_norm[df_norm['Genes']==gene_xy][f'{cdn}_R{rep}_upper'].values[0]
            eps_yx = fit_yx - (fit_x * fit_y)
            
            final_data[f'R{rep}']['Gene_X'].append(gene_x)
            final_data[f'R{rep}']['Gene_Y'].append(gene_y)
            final_data[f'R{rep}']['Fitness_X'].append(fit_x)
            final_data[f'R{rep}']['Fitness_Y'].append(fit_y)
            final_data[f'R{rep}']['Fitness_XY'].append(fit_xy)
            final_data[f'R{rep}']['Fitness_YX'].append(fit_yx)
            final_data[f'R{rep}']['EPS_XY'].append(eps_xy)
            final_data[f'R{rep}']['EPS_YX'].append(eps_yx)
            
            # compile eps data for averaging
            if (gene_x, gene_y) not in rep_data:
                rep_data[(gene_x, gene_y)] = {'Fitness_X': [fit_x], 'Fitness_Y': [fit_y],
                                              'Fitness_XY': [fit_xy], 'Fitness_YX': [fit_yx],
                                              'EPS_XY': [eps_xy], 'EPS_YX': [eps_yx]}
            else:
                rep_data[(gene_x, gene_y)]['Fitness_X'].append(fit_x)
                rep_data[(gene_x, gene_y)]['Fitness_Y'].append(fit_y)
                rep_data[(gene_x, gene_y)]['Fitness_XY'].append(fit_xy)
                rep_data[(gene_x, gene_y)]['Fitness_YX'].append(fit_yx)
                rep_data[(gene_x, gene_y)]['EPS_XY'].append(eps_xy)
                rep_data[(gene_x, gene_y)]['EPS_YX'].append(eps_yx)
            
            
            #print(f'X: {gene_x}  Y: {gene_y}  Rep: {rep}   XY: {fit_xy:.2f}  YX: {fit_yx:.2f} X: {fit_x:.2f}  Y: {fit_y:.2f} EPS_XY: {eps_xy:.2f} EPS_YX: {eps_yx:.2f}') # Can uncomment if you would like to see the values
#         break

### get average data

In [8]:
avg_data = {}
for key, values in rep_data.items():
    avg_data[key] = {'Fitness_X': np.mean(values['Fitness_X']), 'Fitness_Y': np.mean(values['Fitness_Y']),
                     'Fitness_XY': np.mean(values['Fitness_XY']), 'Fitness_YX': np.mean(values['Fitness_YX']),
                     'EPS_XY': np.mean(values['EPS_XY']), 'EPS_YX': np.mean(values['EPS_YX'])}

In [9]:
df_avg = pd.DataFrame.from_dict(avg_data, orient='index').reset_index()
df_avg.columns = final_cols
df_avg['Average_Fitness_XY_Fitness_YX'] = df_avg[['Fitness_XY', 'Fitness_YX']].mean(axis=1)
df_avg['Average_EPS_XY_EPS_YX'] = df_avg[['EPS_XY', 'EPS_YX']].mean(axis=1)
df_avg[:2] # Can change the number to see more columns

Unnamed: 0,Gene_X,Gene_Y,Fitness_X,Fitness_Y,Fitness_XY,Fitness_YX,EPS_XY,EPS_YX,Average_Fitness_XY_Fitness_YX,Average_EPS_XY_EPS_YX
0,TYE7,CAT1,0.432315,1.384797,0.437915,0.534644,-0.154163,-0.057433,0.486279,-0.105798
1,YAK1,TYE7,1.044491,0.432315,0.527271,0.638989,0.043667,0.155385,0.58313,0.099526


### save to output file

In [10]:
# save dataframes to excel - one sheet per replicate
df_reps = {}
for rep in range(1, num_reps+1):
    df_reps[f'R{rep}'] = pd.DataFrame.from_dict(final_data[f'R{rep}'])
    
writer = pd.ExcelWriter(outpath, engine="xlsxwriter")

for rep in range(1, num_reps+1):
    df_reps[f'R{rep}'].to_excel(writer, sheet_name=f'R{rep}', index=False)

df_avg.to_excel(writer, sheet_name="Average", index=False)
writer.close()
print(f'Saved {outpath}') # Tells you where this files is saved

Saved /Users/violahalder/Desktop/GitHub_GI_Analysis/output/15C_fitness_and_eps.xlsx


In [11]:
print("ALL DONE")

ALL DONE
