# Demo code associated with article "Unveiling Inter-Embryo Variability in Spindle Length over time: towards Quantitative Phenotype Analysis." by Y. Le Cunff et al. 2024

Code author: Jacques Pécréaux

May 2024

CNRS, Univ Rennes, IGDR (Institut de Génétique et Développement de Rennes) – UMR 6290, F-35000 Rennes, France

License: [CeCILL v2.1, see file Licence_CeCILL_V2.1-en.txt](./Licence_CeCILL_V2.1-en.txt)

In [25]:
# temperature considered
temp="18C" # "18C", "23C" with the provided dataset

# We will also download the interaction from wormbase
current_WormBase_Release='WS292'

# Logistic regression will return the probability that a gene interacts with another. We need a threshold to classify between interacting and non-interacting
proba_theshold_prediction=0.9

# we also need the PCA score file output by PCA_paper_code.ipynb
pca_scores_file='scores_relative_to_none'+temp+'.csv'

## Imports, (down)loading and subfunctions

In [26]:
import pandas as pd
import json
from sklearn.linear_model import LogisticRegression
import numpy as np
import matplotlib.pyplot as plt
from sklearn.metrics import accuracy_score, recall_score, precision_score
import requests
import os
import pathlib

In [27]:
# Download table from wormbase for correspondance between gene name and id
url=f"https://downloads.wormbase.org/releases/current-production-release/species/c_elegans/PRJNA13758/annotation/c_elegans.PRJNA13758.{current_WormBase_Release}.geneIDs.txt.gz"
cur_dir=pathlib.Path().resolve()
# Create the Data folder if it doesn't exist
if not os.path.exists(os.path.join(cur_dir,'Data')):
    os.makedirs('Data')
# Define the file path
file_path = 'Data/' + url.split('/')[-1]
# Download the file
response = requests.get(url)
with open(file_path, 'wb') as file:
    file.write(response.content)
print(f"File downloaded successfully to {file_path}")
WBgene = pd.read_csv(file_path, compression='gzip', sep=',', header=None, names=['dummy', 'geneId', 'geneName', 'sequence', 'status', 'type'])

File downloaded successfully to Data/c_elegans.PRJNA13758.WS292.geneIDs.txt.gz


In [28]:
# Load the PCA scores
df = pd.read_csv(pca_scores_file)
display(df)

Unnamed: 0,gene,principal component 1,principal component 2,principal component 3,tempe,strain
0,ANA019none,-1.043194,1.466452,5.329549,18C,ANA019
1,JEP13gpr1,0.273005,-0.840942,-0.350799,18C,JEP13
2,JEP14gpr2-18C,-1.030865,-3.242924,1.638099,18C,JEP14
3,JEP15L4440,0.231701,-2.249044,4.431642,18C,JEP15
4,JEP1klp13-18C,-0.669484,-2.066330,0.301457,18C,JEP1
...,...,...,...,...,...,...
83,unc59,-0.178511,1.244468,-10.093683,18C,TH27
84,vab8,-0.375866,-0.210577,0.648375,18C,TH27
85,zen4,1.288270,8.995004,-10.353139,18C,TH27
86,zyg11,-0.769377,-9.888831,-0.444174,18C,TH27


In [29]:
def find_interactor(WBgoi,json_file,tempe=temp,df=df,proba_theshold_prediction=proba_theshold_prediction):
    goi=WBgoi.replace('-', '')
    df=df.loc[ df['tempe'] == tempe]
    gene_interacting=list()
    
    # get the interactors of klp-19 from wormbase
    # Assuming the JSON file is named "interactors.json"
    with open(json_file) as file:
        data = json.load(file)
    #display(data)
    for i in data['fields']['interactions']['data']['edges']:
        if not i['effector']['label'] in gene_interacting:
            gene_interacting.append(i['effector']['label'])
        if not i['affected']['label'] in gene_interacting:
            gene_interacting.append(i['affected']['label'])
    #display(gene_interacting)
    gene_interacting_formated = [item.replace('-', '') for item in gene_interacting]
    gene_interacting_formated.append(goi) # append the gene of interest itself
    
    # get interacting genes in our dataset
    cols=df.columns
    selected_cols = [col for col in cols if goi in col]
    if not selected_cols:
        dist = df[['gene', 'principal component 1', 'principal component 2', 'principal component 3' ]].copy()
    else:
        dist_col=selected_cols[0]
        dist = df[['gene', dist_col, 'principal component 1', 'principal component 2', 'principal component 3' ]].copy()
    dist['interacting'] = dist['gene'].apply(lambda x: any(item in x for item in gene_interacting_formated))

    # create the train set using control and non-treated as non interacting
    dist_train=dist.loc[np.logical_or(dist['interacting'], np.logical_or(dist['gene'].map(lambda x:x.startswith('none')),np.asarray(dist['gene'].map(lambda x:x.startswith('L4440')))) )]
    print(f"---------- training set {WBgoi}-------")
    pd.set_option('display.max_rows', 0) 
    display(dist_train)
    pd.set_option('display.max_rows', 10) 
    print(any(dist_train[['interacting']]))
    if not any(dist_train[['interacting']].to_numpy()):
        raise Exception("Something wrong in naming ? No interacting gene in our dataset")
    
    # Logistic regression
    clf = LogisticRegression(random_state=0,solver='liblinear',verbose=1,fit_intercept=False).fit(dist_train[['principal component 1', 'principal component 2', 'principal component 3' ]].to_numpy().reshape(-1, 3), dist_train['interacting'].to_numpy())
    # Predict the labels for the training dataset
    train_predictions = clf.predict(dist_train[['principal component 1', 'principal component 2', 'principal component 3']].to_numpy().reshape(-1, 3))
    # Compute accuracy
    accuracy = accuracy_score(dist_train['interacting'].to_numpy(), train_predictions)
    # Compute recall
    recall = recall_score(dist_train['interacting'].to_numpy(), train_predictions)
    # Compute precision
    precision = precision_score(dist_train['interacting'].to_numpy(), train_predictions)
    print(f"On the training set: Accuracy: {accuracy}, Recall: {recall}, Precision: {precision}")

    # annotate our dataset with predictions
    predProba=clf.predict_proba(dist[['principal component 1', 'principal component 2', 'principal component 3' ]].to_numpy().reshape(-1, 3))
    dist['predicted_probability']=predProba[:,1]
    # dist['predicted_interaction']=clf.predict(dist[['principal component 1', 'principal component 2', 'principal component 3' ]].to_numpy().reshape(-1, 3))
    dist['predicted_interaction']= np.asarray(dist['predicted_probability']>=proba_theshold_prediction)
    list_predicted=dist.loc[dist['predicted_interaction']]['gene'].to_list()
    return (list_predicted, dist)

def get_id_from_name(goi):
    selec=(WBgene['geneName']==goi)
    if not any(selec):
        selec=(WBgene['sequence']==goi)
    name=WBgene.loc[selec]['geneId'].values[0]
    return name
    
def get_interaction(goi):
    name=get_id_from_name(goi)
    print(f"--- goi={goi} name={name} ----")
    json_file=os.path.join(cur_dir,"Data",f"gene_{name}_interactions.json")
    # Check if the file already exists
    if not os.path.exists(json_file):
        print("Downloading the file")
        # URL of the interaction file
        url = f"https://wormbase.org/rest/widget/gene/{name}/interactions?download=1&content-type=application%2Fjson"
        response = requests.get(url)
        # Save the file to the specified file path
        with open(json_file, 'wb') as json:
            json.write(response.content)
    return json_file
def get_list_from_goi(goi,tempe=temp,proba_theshold_prediction=proba_theshold_prediction):
    json_file=get_interaction(goi)
    (inter_klp19, pred_klp19)= find_interactor(goi,json_file,tempe=tempe,proba_theshold_prediction=proba_theshold_prediction)
    return (inter_klp19, pred_klp19)

def assemble_list(idx,common_genes,common_details,interactors):
    if idx == 0:
        common_genes = set(interactors)
        common_details = details.drop(columns=['predicted_probability'],inplace=False)
        common_details = common_details.filter(regex=r'^(?!dist_to)')
    else:
        common_genes = common_genes.intersection(interactors)
        common_details = pd.merge(common_details, details.drop(columns=['predicted_probability'],inplace=False).filter(regex=r'^(?!dist_to)'), on='gene', how='inner',suffixes=('', '_details'))        
        common_details['predicted_interaction'] = np.logical_and(common_details['predicted_interaction'], common_details['predicted_interaction_details'])
        common_details['interacting'] = np.logical_or(common_details['interacting'], common_details['interacting_details'])
        common_details = common_details.drop(columns=common_details.filter(regex='_details$').columns)
        display(common_details)
    return (common_genes,common_details)

def  output_formater(common_genes,common_details):
    display(common_genes)
    details_interacting_genes = common_details.loc[common_details['predicted_interaction']]
    count_known = len(details_interacting_genes[(details_interacting_genes['predicted_interaction'] == True) & (details_interacting_genes['interacting'] == True)])
    count_false_neg = len(details_interacting_genes[(details_interacting_genes['predicted_interaction'] == False) & (details_interacting_genes['interacting'] == True)])
    count_novel = len(details_interacting_genes[(details_interacting_genes['predicted_interaction'] == True) & (details_interacting_genes['interacting'] == False)])
    print(f"Number of known recovered interactions: {count_known}   Number of known but NOT recovered interactions: {count_false_neg} Number of novel interactions: {count_novel}   Total number of interactions: {count_known+count_novel}")
    details_interacting_genes[['principal component 1', 'principal component 2', 'principal component 3']] = details_interacting_genes[['principal component 1', 'principal component 2', 'principal component 3']].round(2)
    display(details_interacting_genes)
    return details_interacting_genes

## poleward flux after Steblyanko 2020

In [30]:
common_genes = set()
common_details = None
gois=['klp-19', 'bmk-1', 'klp-15', 'klp-16' , 'klp-17', 'klp-18', 'lin-5', 'klp-7']
for idx,goi in enumerate(gois):
    print(f"=============== interactors for {goi} ===============")
    interactors, details = get_list_from_goi(goi)
    print(f"--------------- found interactors ({goi}) --------------")
    pd.set_option('display.max_rows', 0) 
    display(details.loc[details['predicted_interaction']][['gene', 'interacting', 'predicted_interaction', 'predicted_probability']].sort_values('predicted_probability',ascending=False))
    pd.set_option('display.max_rows', 10) 
    common_genes,common_details = assemble_list(idx,common_genes,common_details,interactors)
print("=============== Common genes =================")
details_interacting_genes = output_formater(common_genes,common_details)

--- goi=klp-19 name=WBGene00002229 ----
---------- training set klp-19-------


Unnamed: 0,gene,principal component 1,principal component 2,principal component 3,interacting
10,L4440-18C,-0.723055,-1.546943,0.525031,False
11,L4440ebp2,-0.498394,0.984421,2.054312,False
50,klp19,-0.597349,-3.926979,-4.945162,True
63,none-18C,0.0,0.0,-0.0,False
67,plk1-18C,0.424861,-8.71225,-6.001948,True
85,zen4,1.28827,8.995004,-10.353139,True


True
[LibLinear]iter  1 act 1.968e+00 pre 1.725e+00 delta 3.342e-01 f 4.159e+00 |g| 1.210e+01 CG   2
iter  2 act 3.795e-01 pre 3.110e-01 delta 4.143e-01 f 2.191e+00 |g| 3.063e+00 CG   3
iter  3 act 1.213e-01 pre 9.609e-02 delta 4.143e-01 f 1.811e+00 |g| 1.092e+00 CG   2
iter  4 act 3.436e-02 pre 3.135e-02 delta 4.143e-01 f 1.690e+00 |g| 3.740e-01 CG   2
iter  5 act 1.216e-03 pre 1.212e-03 delta 4.143e-01 f 1.656e+00 |g| 5.680e-02 CG   3
iter  6 act 7.229e-08 pre 7.228e-08 delta 4.143e-01 f 1.655e+00 |g| 7.037e-04 CG   2
On the training set: Accuracy: 1.0, Recall: 1.0, Precision: 1.0
--------------- found interactors (klp-19) --------------


Unnamed: 0,gene,interacting,predicted_interaction,predicted_probability
79,tpxl1-18C,False,True,0.999998
21,cls2-18C,False,True,0.999997
18,cdk1,False,True,0.999940
85,zen4,True,True,0.999873
40,gpb1,False,True,0.999726
83,unc59,False,True,0.999583
70,spd1,False,True,0.998568
74,such1dylt1,False,True,0.997916
14,air1,False,True,0.997597
...,...,...,...,...


--- goi=bmk-1 name=WBGene00000257 ----
---------- training set bmk-1-------


Unnamed: 0,gene,principal component 1,principal component 2,principal component 3,interacting
1,JEP13gpr1,0.273005,-0.840942,-0.350799,True
2,JEP14gpr2-18C,-1.030865,-3.242924,1.638099,True
6,JEP3gpr1,-1.257637,-3.375586,1.657985,True
7,JEP4gpr2-18C,-1.023349,-1.924331,4.170737,True
10,L4440-18C,-0.723055,-1.546943,0.525031,False
11,L4440ebp2,-0.498394,0.984421,2.054312,False
14,air1,-4.206413,1.172099,-9.398000,True
17,bmk1-18C,-0.131047,1.620277,-1.108807,True
23,dhc1,0.508078,-1.924023,-1.872476,True
...,...,...,...,...,...


True
[LibLinear]iter  1 act 3.832e+00 pre 3.157e+00 delta 6.707e-01 f 1.456e+01 |g| 1.388e+01 CG   2
iter  2 act 1.436e+00 pre 1.201e+00 delta 8.237e-01 f 1.072e+01 |g| 5.315e+00 CG   3
iter  3 act 1.380e-01 pre 1.316e-01 delta 8.237e-01 f 9.289e+00 |g| 1.246e+00 CG   3
iter  4 act 8.249e-04 pre 8.211e-04 delta 8.237e-01 f 9.151e+00 |g| 8.443e-02 CG   3
iter  5 act 4.200e-08 pre 4.199e-08 delta 8.237e-01 f 9.150e+00 |g| 6.398e-04 CG   3
On the training set: Accuracy: 0.8571428571428571, Recall: 0.8888888888888888, Precision: 0.9411764705882353
--------------- found interactors (bmk-1) --------------


Unnamed: 0,gene,interacting,predicted_interaction,predicted_probability
79,tpxl1-18C,False,True,1.000000
18,cdk1,False,True,0.999999
21,cls2-18C,False,True,0.999988
14,air1,True,True,0.999982
40,gpb1,False,True,0.999947
26,dyci1,False,True,0.999794
74,such1dylt1,False,True,0.999743
67,plk1-18C,False,True,0.999608
73,such1L4440,False,True,0.999591
...,...,...,...,...


Unnamed: 0,gene,principal component 1,principal component 2,principal component 3,interacting,predicted_interaction
0,ANA019none,-1.043194,1.466452,5.329549,False,False
1,JEP13gpr1,0.273005,-0.840942,-0.350799,True,False
2,JEP14gpr2-18C,-1.030865,-3.242924,1.638099,True,False
3,JEP15L4440,0.231701,-2.249044,4.431642,False,False
4,JEP1klp13-18C,-0.669484,-2.066330,0.301457,False,False
...,...,...,...,...,...,...
83,unc59,-0.178511,1.244468,-10.093683,False,True
84,vab8,-0.375866,-0.210577,0.648375,False,False
85,zen4,1.288270,8.995004,-10.353139,True,False
86,zyg11,-0.769377,-9.888831,-0.444174,False,False


--- goi=klp-15 name=WBGene00002225 ----
---------- training set klp-15-------


Unnamed: 0,gene,principal component 1,principal component 2,principal component 3,interacting
10,L4440-18C,-0.723055,-1.546943,0.525031,False
11,L4440ebp2,-0.498394,0.984421,2.054312,False
30,ebp1,-0.589048,-1.572828,2.13372,True
31,ebp1ebp2,-0.276091,0.780492,-1.270438,True
32,ebp1ebp2ebp3,-0.929308,-0.835375,-2.727331,True
33,ebp1ebp3,-0.547722,-1.060237,-1.284696,True
46,klp15,-0.350271,0.2783,1.430395,True
47,klp16,-0.355957,-2.078986,-2.713604,True
63,none-18C,0.0,0.0,-0.0,False
67,plk1-18C,0.424861,-8.71225,-6.001948,True


True
[LibLinear]iter  1 act 1.663e+00 pre 1.519e+00 delta 5.745e-01 f 7.625e+00 |g| 9.177e+00 CG   2
iter  2 act 2.671e-01 pre 2.381e-01 delta 5.745e-01 f 5.962e+00 |g| 2.144e+00 CG   3
iter  3 act 2.853e-02 pre 2.582e-02 delta 5.745e-01 f 5.694e+00 |g| 6.042e-01 CG   2
iter  4 act 1.298e-03 pre 1.281e-03 delta 5.745e-01 f 5.666e+00 |g| 9.621e-02 CG   3
iter  5 act 6.565e-07 pre 6.562e-07 delta 5.745e-01 f 5.665e+00 |g| 2.555e-03 CG   2
On the training set: Accuracy: 0.7272727272727273, Recall: 0.75, Precision: 0.8571428571428571
--------------- found interactors (klp-15) --------------


Unnamed: 0,gene,interacting,predicted_interaction,predicted_probability
79,tpxl1-18C,False,True,1.000000
18,cdk1,False,True,0.999810
21,cls2-18C,False,True,0.999269
40,gpb1,False,True,0.998165
14,air1,False,True,0.998033
74,such1dylt1,False,True,0.994797
26,dyci1,False,True,0.993396
73,such1L4440,False,True,0.992755
67,plk1-18C,True,True,0.992504
...,...,...,...,...


Unnamed: 0,gene,principal component 1,principal component 2,principal component 3,interacting,predicted_interaction
0,ANA019none,-1.043194,1.466452,5.329549,False,False
1,JEP13gpr1,0.273005,-0.840942,-0.350799,True,False
2,JEP14gpr2-18C,-1.030865,-3.242924,1.638099,True,False
3,JEP15L4440,0.231701,-2.249044,4.431642,False,False
4,JEP1klp13-18C,-0.669484,-2.066330,0.301457,False,False
...,...,...,...,...,...,...
83,unc59,-0.178511,1.244468,-10.093683,False,True
84,vab8,-0.375866,-0.210577,0.648375,False,False
85,zen4,1.288270,8.995004,-10.353139,True,False
86,zyg11,-0.769377,-9.888831,-0.444174,False,False


--- goi=klp-16 name=WBGene00002226 ----
---------- training set klp-16-------


Unnamed: 0,gene,principal component 1,principal component 2,principal component 3,interacting
10,L4440-18C,-0.723055,-1.546943,0.525031,False
11,L4440ebp2,-0.498394,0.984421,2.054312,False
46,klp15,-0.350271,0.2783,1.430395,True
47,klp16,-0.355957,-2.078986,-2.713604,True
63,none-18C,0.0,0.0,-0.0,False
67,plk1-18C,0.424861,-8.71225,-6.001948,True


True
[LibLinear]iter  1 act 1.008e+00 pre 9.282e-01 delta 5.079e-01 f 4.159e+00 |g| 7.021e+00 CG   2
iter  2 act 7.319e-02 pre 6.306e-02 delta 5.079e-01 f 3.151e+00 |g| 1.413e+00 CG   1
iter  3 act 8.214e-03 pre 7.708e-03 delta 5.079e-01 f 3.078e+00 |g| 3.314e-01 CG   2
iter  4 act 2.355e-04 pre 2.344e-04 delta 5.079e-01 f 3.069e+00 |g| 4.115e-02 CG   2
iter  5 act 1.910e-06 pre 1.910e-06 delta 5.079e-01 f 3.069e+00 |g| 2.813e-03 CG   2
On the training set: Accuracy: 0.8333333333333334, Recall: 0.6666666666666666, Precision: 1.0
--------------- found interactors (klp-16) --------------


Unnamed: 0,gene,interacting,predicted_interaction,predicted_probability
79,tpxl1-18C,False,True,0.999868
21,cls2-18C,False,True,0.999541
18,cdk1,False,True,0.998814
40,gpb1,False,True,0.996823
83,unc59,False,True,0.991692
85,zen4,False,True,0.990588
74,such1dylt1,False,True,0.989106
73,such1L4440,False,True,0.981778
14,air1,False,True,0.979928
...,...,...,...,...


Unnamed: 0,gene,principal component 1,principal component 2,principal component 3,interacting,predicted_interaction
0,ANA019none,-1.043194,1.466452,5.329549,False,False
1,JEP13gpr1,0.273005,-0.840942,-0.350799,True,False
2,JEP14gpr2-18C,-1.030865,-3.242924,1.638099,True,False
3,JEP15L4440,0.231701,-2.249044,4.431642,False,False
4,JEP1klp13-18C,-0.669484,-2.066330,0.301457,False,False
...,...,...,...,...,...,...
83,unc59,-0.178511,1.244468,-10.093683,False,True
84,vab8,-0.375866,-0.210577,0.648375,False,False
85,zen4,1.288270,8.995004,-10.353139,True,False
86,zyg11,-0.769377,-9.888831,-0.444174,False,False


--- goi=klp-17 name=WBGene00002227 ----
---------- training set klp-17-------


Unnamed: 0,gene,principal component 1,principal component 2,principal component 3,interacting
10,L4440-18C,-0.723055,-1.546943,0.525031,False
11,L4440ebp2,-0.498394,0.984421,2.054312,False
14,air1,-4.206413,1.172099,-9.398,True
17,bmk1-18C,-0.131047,1.620277,-1.108807,True
23,dhc1,0.508078,-1.924023,-1.872476,True
25,dnc1,-0.724038,-1.214808,1.352019,True
48,klp17,-0.490345,-0.717917,-0.412519,True
52,klp3,-0.546674,-0.628759,-0.766375,True
63,none-18C,0.0,0.0,-0.0,False
67,plk1-18C,0.424861,-8.71225,-6.001948,True


True
[LibLinear]iter  1 act 1.878e+00 pre 1.648e+00 delta 4.764e-01 f 7.625e+00 |g| 1.066e+01 CG   3
iter  2 act 3.691e-01 pre 2.927e-01 delta 4.764e-01 f 5.746e+00 |g| 2.955e+00 CG   2
iter  3 act 9.882e-02 pre 8.832e-02 delta 4.764e-01 f 5.377e+00 |g| 1.049e+00 CG   3
iter  4 act 3.607e-03 pre 3.517e-03 delta 4.764e-01 f 5.278e+00 |g| 1.856e-01 CG   2
iter  5 act 1.775e-05 pre 1.774e-05 delta 4.764e-01 f 5.275e+00 |g| 1.066e-02 CG   3
On the training set: Accuracy: 0.8181818181818182, Recall: 0.875, Precision: 0.875
--------------- found interactors (klp-17) --------------


Unnamed: 0,gene,interacting,predicted_interaction,predicted_probability
79,tpxl1-18C,False,True,0.999987
18,cdk1,False,True,0.999729
40,gpb1,False,True,0.999164
67,plk1-18C,True,True,0.999157
26,dyci1,False,True,0.999060
74,such1dylt1,False,True,0.998373
73,such1L4440,False,True,0.998189
86,zyg11,False,True,0.997994
27,dyci1ebp2,False,True,0.993859
...,...,...,...,...


Unnamed: 0,gene,principal component 1,principal component 2,principal component 3,interacting,predicted_interaction
0,ANA019none,-1.043194,1.466452,5.329549,False,False
1,JEP13gpr1,0.273005,-0.840942,-0.350799,True,False
2,JEP14gpr2-18C,-1.030865,-3.242924,1.638099,True,False
3,JEP15L4440,0.231701,-2.249044,4.431642,False,False
4,JEP1klp13-18C,-0.669484,-2.066330,0.301457,False,False
...,...,...,...,...,...,...
83,unc59,-0.178511,1.244468,-10.093683,False,True
84,vab8,-0.375866,-0.210577,0.648375,False,False
85,zen4,1.288270,8.995004,-10.353139,True,False
86,zyg11,-0.769377,-9.888831,-0.444174,False,False


--- goi=klp-18 name=WBGene00002228 ----
---------- training set klp-18-------


Unnamed: 0,gene,principal component 1,principal component 2,principal component 3,interacting
10,L4440-18C,-0.723055,-1.546943,0.525031,False
11,L4440ebp2,-0.498394,0.984421,2.054312,False
49,klp18,-0.915597,-2.572807,1.148282,True
63,none-18C,0.0,0.0,-0.0,False
85,zen4,1.28827,8.995004,-10.353139,True


True
[LibLinear]iter  1 act 8.472e-01 pre 7.814e-01 delta 5.708e-01 f 3.466e+00 |g| 6.896e+00 CG   2
iter  2 act 5.282e-02 pre 4.553e-02 delta 5.708e-01 f 2.619e+00 |g| 1.498e+00 CG   1
iter  3 act 7.977e-03 pre 7.560e-03 delta 5.708e-01 f 2.566e+00 |g| 3.573e-01 CG   2
iter  4 act 1.635e-04 pre 1.630e-04 delta 5.708e-01 f 2.558e+00 |g| 4.431e-02 CG   2
iter  5 act 1.270e-08 pre 1.270e-08 delta 5.708e-01 f 2.558e+00 |g| 4.495e-04 CG   2
On the training set: Accuracy: 0.8, Recall: 1.0, Precision: 0.6666666666666666
--------------- found interactors (klp-18) --------------


Unnamed: 0,gene,interacting,predicted_interaction,predicted_probability
79,tpxl1-18C,False,True,0.999986
18,cdk1,False,True,0.999832
21,cls2-18C,False,True,0.999619
40,gpb1,False,True,0.999499
74,such1dylt1,False,True,0.998166
73,such1L4440,False,True,0.997027
67,plk1-18C,False,True,0.996619
83,unc59,False,True,0.993676
26,dyci1,False,True,0.992005
...,...,...,...,...


Unnamed: 0,gene,principal component 1,principal component 2,principal component 3,interacting,predicted_interaction
0,ANA019none,-1.043194,1.466452,5.329549,False,False
1,JEP13gpr1,0.273005,-0.840942,-0.350799,True,False
2,JEP14gpr2-18C,-1.030865,-3.242924,1.638099,True,False
3,JEP15L4440,0.231701,-2.249044,4.431642,False,False
4,JEP1klp13-18C,-0.669484,-2.066330,0.301457,False,False
...,...,...,...,...,...,...
83,unc59,-0.178511,1.244468,-10.093683,False,True
84,vab8,-0.375866,-0.210577,0.648375,False,False
85,zen4,1.288270,8.995004,-10.353139,True,False
86,zyg11,-0.769377,-9.888831,-0.444174,False,False


--- goi=lin-5 name=WBGene00002994 ----
---------- training set lin-5-------


Unnamed: 0,gene,principal component 1,principal component 2,principal component 3,interacting
1,JEP13gpr1,0.273005,-0.840942,-0.350799,True
2,JEP14gpr2-18C,-1.030865,-3.242924,1.638099,True
6,JEP3gpr1,-1.257637,-3.375586,1.657985,True
7,JEP4gpr2-18C,-1.023349,-1.924331,4.170737,True
9,JEP6lin5-18C,-1.10204,-1.6195,9.132557,True
10,L4440-18C,-0.723055,-1.546943,0.525031,False
11,L4440ebp2,-0.498394,0.984421,2.054312,False
16,aspm1,-0.551169,1.216247,4.288914,True
23,dhc1,0.508078,-1.924023,-1.872476,True
29,dyrb1,-1.134324,-4.033935,-3.2824,True


True
[LibLinear]iter  1 act 1.430e+00 pre 1.344e+00 delta 2.648e-01 f 1.109e+01 |g| 1.511e+01 CG   2
cg reaches trust region boundary
iter  2 act 2.579e-01 pre 2.563e-01 delta 4.823e-01 f 9.660e+00 |g| 2.294e+00 CG   2
iter  3 act 1.161e-01 pre 1.171e-01 delta 4.823e-01 f 9.402e+00 |g| 7.142e-01 CG   3
iter  4 act 3.828e-04 pre 3.810e-04 delta 4.823e-01 f 9.286e+00 |g| 2.195e-01 CG   2
iter  5 act 2.523e-06 pre 2.523e-06 delta 4.823e-01 f 9.285e+00 |g| 3.447e-03 CG   3
On the training set: Accuracy: 0.6875, Recall: 0.7692307692307693, Precision: 0.8333333333333334
--------------- found interactors (lin-5) --------------


Unnamed: 0,gene,interacting,predicted_interaction,predicted_probability
79,tpxl1-18C,False,True,0.999384
14,air1,False,True,0.968747
18,cdk1,False,True,0.919364


Unnamed: 0,gene,principal component 1,principal component 2,principal component 3,interacting,predicted_interaction
0,ANA019none,-1.043194,1.466452,5.329549,False,False
1,JEP13gpr1,0.273005,-0.840942,-0.350799,True,False
2,JEP14gpr2-18C,-1.030865,-3.242924,1.638099,True,False
3,JEP15L4440,0.231701,-2.249044,4.431642,False,False
4,JEP1klp13-18C,-0.669484,-2.066330,0.301457,False,False
...,...,...,...,...,...,...
83,unc59,-0.178511,1.244468,-10.093683,False,False
84,vab8,-0.375866,-0.210577,0.648375,False,False
85,zen4,1.288270,8.995004,-10.353139,True,False
86,zyg11,-0.769377,-9.888831,-0.444174,False,False


--- goi=klp-7 name=WBGene00002219 ----
---------- training set klp-7-------


Unnamed: 0,gene,principal component 1,principal component 2,principal component 3,interacting
10,L4440-18C,-0.723055,-1.546943,0.525031,False
11,L4440ebp2,-0.498394,0.984421,2.054312,False
14,air1,-4.206413,1.172099,-9.398,True
44,klp12,-0.285156,-0.332009,-0.223893,True
55,klp7,-0.818308,-0.340189,-6.474885,True
63,none-18C,0.0,0.0,-0.0,False
67,plk1-18C,0.424861,-8.71225,-6.001948,True
85,zen4,1.28827,8.995004,-10.353139,True
87,zyg9,-0.878232,-5.03797,6.289178,True


True
[LibLinear]iter  1 act 1.826e+00 pre 1.632e+00 delta 2.770e-01 f 6.238e+00 |g| 1.458e+01 CG   2
iter  2 act 2.312e-01 pre 1.897e-01 delta 2.770e-01 f 4.412e+00 |g| 2.876e+00 CG   2
iter  3 act 4.831e-02 pre 4.229e-02 delta 2.770e-01 f 4.181e+00 |g| 8.453e-01 CG   3
iter  4 act 2.698e-03 pre 2.610e-03 delta 2.770e-01 f 4.132e+00 |g| 1.676e-01 CG   3
iter  5 act 7.280e-06 pre 7.267e-06 delta 2.770e-01 f 4.130e+00 |g| 9.178e-03 CG   3
On the training set: Accuracy: 0.7777777777777778, Recall: 0.8333333333333334, Precision: 0.8333333333333334
--------------- found interactors (klp-7) --------------


Unnamed: 0,gene,interacting,predicted_interaction,predicted_probability
79,tpxl1-18C,False,True,0.999893
18,cdk1,False,True,0.999231
40,gpb1,False,True,0.998239
74,such1dylt1,False,True,0.995707
67,plk1-18C,True,True,0.995308
21,cls2-18C,False,True,0.995134
73,such1L4440,False,True,0.994280
26,dyci1,False,True,0.991893
76,such1mdf1L4440,False,True,0.978143
...,...,...,...,...


Unnamed: 0,gene,principal component 1,principal component 2,principal component 3,interacting,predicted_interaction
0,ANA019none,-1.043194,1.466452,5.329549,False,False
1,JEP13gpr1,0.273005,-0.840942,-0.350799,True,False
2,JEP14gpr2-18C,-1.030865,-3.242924,1.638099,True,False
3,JEP15L4440,0.231701,-2.249044,4.431642,False,False
4,JEP1klp13-18C,-0.669484,-2.066330,0.301457,False,False
...,...,...,...,...,...,...
83,unc59,-0.178511,1.244468,-10.093683,False,False
84,vab8,-0.375866,-0.210577,0.648375,False,False
85,zen4,1.288270,8.995004,-10.353139,True,False
86,zyg11,-0.769377,-9.888831,-0.444174,False,False




{'air1', 'cdk1', 'tpxl1-18C'}

Number of known recovered interactions: 1   Number of known but NOT recovered interactions: 0 Number of novel interactions: 2   Total number of interactions: 3


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  details_interacting_genes[['principal component 1', 'principal component 2', 'principal component 3']] = details_interacting_genes[['principal component 1', 'principal component 2', 'principal component 3']].round(2)


Unnamed: 0,gene,principal component 1,principal component 2,principal component 3,interacting,predicted_interaction
14,air1,-4.21,1.17,-9.4,True,True
18,cdk1,-1.67,-5.07,-13.8,False,True
79,tpxl1-18C,-8.24,-3.02,-20.48,False,True


### poleward flux, Steblyanko except NCD (redundant) and bmk-1 (divergent function ?)

In [31]:
common_genes = set()
common_details = None
gois=['klp-19', 'klp-18', 'lin-5', 'klp-7']
for idx,goi in enumerate(gois):
    print(f"=============== interactors for {goi} ===============")
    interactors, details = get_list_from_goi(goi)
    print(f"--------------- found interactors ({goi}) --------------")
    pd.set_option('display.max_rows', 0) 
    display(details.loc[details['predicted_interaction']][['gene', 'interacting', 'predicted_interaction', 'predicted_probability']].sort_values('predicted_probability',ascending=False))
    pd.set_option('display.max_rows', 10) 
    common_genes,common_details = assemble_list(idx,common_genes,common_details,interactors)
print("=============== Common genes =================")
details_interacting_genes = output_formater(common_genes,common_details)

--- goi=klp-19 name=WBGene00002229 ----
---------- training set klp-19-------


Unnamed: 0,gene,principal component 1,principal component 2,principal component 3,interacting
10,L4440-18C,-0.723055,-1.546943,0.525031,False
11,L4440ebp2,-0.498394,0.984421,2.054312,False
50,klp19,-0.597349,-3.926979,-4.945162,True
63,none-18C,0.0,0.0,-0.0,False
67,plk1-18C,0.424861,-8.71225,-6.001948,True
85,zen4,1.28827,8.995004,-10.353139,True


True
[LibLinear]iter  1 act 1.968e+00 pre 1.725e+00 delta 3.342e-01 f 4.159e+00 |g| 1.210e+01 CG   2
iter  2 act 3.795e-01 pre 3.110e-01 delta 4.143e-01 f 2.191e+00 |g| 3.063e+00 CG   3
iter  3 act 1.213e-01 pre 9.609e-02 delta 4.143e-01 f 1.811e+00 |g| 1.092e+00 CG   2
iter  4 act 3.436e-02 pre 3.135e-02 delta 4.143e-01 f 1.690e+00 |g| 3.740e-01 CG   2
iter  5 act 1.216e-03 pre 1.212e-03 delta 4.143e-01 f 1.656e+00 |g| 5.680e-02 CG   3
iter  6 act 7.229e-08 pre 7.228e-08 delta 4.143e-01 f 1.655e+00 |g| 7.037e-04 CG   2
On the training set: Accuracy: 1.0, Recall: 1.0, Precision: 1.0
--------------- found interactors (klp-19) --------------


Unnamed: 0,gene,interacting,predicted_interaction,predicted_probability
79,tpxl1-18C,False,True,0.999998
21,cls2-18C,False,True,0.999997
18,cdk1,False,True,0.999940
85,zen4,True,True,0.999873
40,gpb1,False,True,0.999726
83,unc59,False,True,0.999583
70,spd1,False,True,0.998568
74,such1dylt1,False,True,0.997916
14,air1,False,True,0.997597
...,...,...,...,...


--- goi=klp-18 name=WBGene00002228 ----
---------- training set klp-18-------


Unnamed: 0,gene,principal component 1,principal component 2,principal component 3,interacting
10,L4440-18C,-0.723055,-1.546943,0.525031,False
11,L4440ebp2,-0.498394,0.984421,2.054312,False
49,klp18,-0.915597,-2.572807,1.148282,True
63,none-18C,0.0,0.0,-0.0,False
85,zen4,1.28827,8.995004,-10.353139,True


True
[LibLinear]iter  1 act 8.472e-01 pre 7.814e-01 delta 5.708e-01 f 3.466e+00 |g| 6.896e+00 CG   2
iter  2 act 5.282e-02 pre 4.553e-02 delta 5.708e-01 f 2.619e+00 |g| 1.498e+00 CG   1
iter  3 act 7.977e-03 pre 7.560e-03 delta 5.708e-01 f 2.566e+00 |g| 3.573e-01 CG   2
iter  4 act 1.635e-04 pre 1.630e-04 delta 5.708e-01 f 2.558e+00 |g| 4.431e-02 CG   2
iter  5 act 1.270e-08 pre 1.270e-08 delta 5.708e-01 f 2.558e+00 |g| 4.495e-04 CG   2
On the training set: Accuracy: 0.8, Recall: 1.0, Precision: 0.6666666666666666
--------------- found interactors (klp-18) --------------


Unnamed: 0,gene,interacting,predicted_interaction,predicted_probability
79,tpxl1-18C,False,True,0.999986
18,cdk1,False,True,0.999832
21,cls2-18C,False,True,0.999619
40,gpb1,False,True,0.999499
74,such1dylt1,False,True,0.998166
73,such1L4440,False,True,0.997027
67,plk1-18C,False,True,0.996619
83,unc59,False,True,0.993676
26,dyci1,False,True,0.992005
...,...,...,...,...


Unnamed: 0,gene,principal component 1,principal component 2,principal component 3,interacting,predicted_interaction
0,ANA019none,-1.043194,1.466452,5.329549,False,False
1,JEP13gpr1,0.273005,-0.840942,-0.350799,False,False
2,JEP14gpr2-18C,-1.030865,-3.242924,1.638099,False,False
3,JEP15L4440,0.231701,-2.249044,4.431642,False,False
4,JEP1klp13-18C,-0.669484,-2.066330,0.301457,False,False
...,...,...,...,...,...,...
83,unc59,-0.178511,1.244468,-10.093683,False,True
84,vab8,-0.375866,-0.210577,0.648375,False,False
85,zen4,1.288270,8.995004,-10.353139,True,True
86,zyg11,-0.769377,-9.888831,-0.444174,False,False


--- goi=lin-5 name=WBGene00002994 ----
---------- training set lin-5-------


Unnamed: 0,gene,principal component 1,principal component 2,principal component 3,interacting
1,JEP13gpr1,0.273005,-0.840942,-0.350799,True
2,JEP14gpr2-18C,-1.030865,-3.242924,1.638099,True
6,JEP3gpr1,-1.257637,-3.375586,1.657985,True
7,JEP4gpr2-18C,-1.023349,-1.924331,4.170737,True
9,JEP6lin5-18C,-1.10204,-1.6195,9.132557,True
10,L4440-18C,-0.723055,-1.546943,0.525031,False
11,L4440ebp2,-0.498394,0.984421,2.054312,False
16,aspm1,-0.551169,1.216247,4.288914,True
23,dhc1,0.508078,-1.924023,-1.872476,True
29,dyrb1,-1.134324,-4.033935,-3.2824,True


True
[LibLinear]iter  1 act 1.430e+00 pre 1.344e+00 delta 2.648e-01 f 1.109e+01 |g| 1.511e+01 CG   2
cg reaches trust region boundary
iter  2 act 2.579e-01 pre 2.563e-01 delta 4.823e-01 f 9.660e+00 |g| 2.294e+00 CG   2
iter  3 act 1.161e-01 pre 1.171e-01 delta 4.823e-01 f 9.402e+00 |g| 7.142e-01 CG   3
iter  4 act 3.828e-04 pre 3.810e-04 delta 4.823e-01 f 9.286e+00 |g| 2.195e-01 CG   2
iter  5 act 2.523e-06 pre 2.523e-06 delta 4.823e-01 f 9.285e+00 |g| 3.447e-03 CG   3
On the training set: Accuracy: 0.6875, Recall: 0.7692307692307693, Precision: 0.8333333333333334
--------------- found interactors (lin-5) --------------


Unnamed: 0,gene,interacting,predicted_interaction,predicted_probability
79,tpxl1-18C,False,True,0.999384
14,air1,False,True,0.968747
18,cdk1,False,True,0.919364


Unnamed: 0,gene,principal component 1,principal component 2,principal component 3,interacting,predicted_interaction
0,ANA019none,-1.043194,1.466452,5.329549,False,False
1,JEP13gpr1,0.273005,-0.840942,-0.350799,True,False
2,JEP14gpr2-18C,-1.030865,-3.242924,1.638099,True,False
3,JEP15L4440,0.231701,-2.249044,4.431642,False,False
4,JEP1klp13-18C,-0.669484,-2.066330,0.301457,False,False
...,...,...,...,...,...,...
83,unc59,-0.178511,1.244468,-10.093683,False,False
84,vab8,-0.375866,-0.210577,0.648375,False,False
85,zen4,1.288270,8.995004,-10.353139,True,False
86,zyg11,-0.769377,-9.888831,-0.444174,False,False


--- goi=klp-7 name=WBGene00002219 ----
---------- training set klp-7-------


Unnamed: 0,gene,principal component 1,principal component 2,principal component 3,interacting
10,L4440-18C,-0.723055,-1.546943,0.525031,False
11,L4440ebp2,-0.498394,0.984421,2.054312,False
14,air1,-4.206413,1.172099,-9.398,True
44,klp12,-0.285156,-0.332009,-0.223893,True
55,klp7,-0.818308,-0.340189,-6.474885,True
63,none-18C,0.0,0.0,-0.0,False
67,plk1-18C,0.424861,-8.71225,-6.001948,True
85,zen4,1.28827,8.995004,-10.353139,True
87,zyg9,-0.878232,-5.03797,6.289178,True


True
[LibLinear]iter  1 act 1.826e+00 pre 1.632e+00 delta 2.770e-01 f 6.238e+00 |g| 1.458e+01 CG   2
iter  2 act 2.312e-01 pre 1.897e-01 delta 2.770e-01 f 4.412e+00 |g| 2.876e+00 CG   2
iter  3 act 4.831e-02 pre 4.229e-02 delta 2.770e-01 f 4.181e+00 |g| 8.453e-01 CG   3
iter  4 act 2.698e-03 pre 2.610e-03 delta 2.770e-01 f 4.132e+00 |g| 1.676e-01 CG   3
iter  5 act 7.280e-06 pre 7.267e-06 delta 2.770e-01 f 4.130e+00 |g| 9.178e-03 CG   3
On the training set: Accuracy: 0.7777777777777778, Recall: 0.8333333333333334, Precision: 0.8333333333333334
--------------- found interactors (klp-7) --------------


Unnamed: 0,gene,interacting,predicted_interaction,predicted_probability
79,tpxl1-18C,False,True,0.999893
18,cdk1,False,True,0.999231
40,gpb1,False,True,0.998239
74,such1dylt1,False,True,0.995707
67,plk1-18C,True,True,0.995308
21,cls2-18C,False,True,0.995134
73,such1L4440,False,True,0.994280
26,dyci1,False,True,0.991893
76,such1mdf1L4440,False,True,0.978143
...,...,...,...,...


Unnamed: 0,gene,principal component 1,principal component 2,principal component 3,interacting,predicted_interaction
0,ANA019none,-1.043194,1.466452,5.329549,False,False
1,JEP13gpr1,0.273005,-0.840942,-0.350799,True,False
2,JEP14gpr2-18C,-1.030865,-3.242924,1.638099,True,False
3,JEP15L4440,0.231701,-2.249044,4.431642,False,False
4,JEP1klp13-18C,-0.669484,-2.066330,0.301457,False,False
...,...,...,...,...,...,...
83,unc59,-0.178511,1.244468,-10.093683,False,False
84,vab8,-0.375866,-0.210577,0.648375,False,False
85,zen4,1.288270,8.995004,-10.353139,True,False
86,zyg11,-0.769377,-9.888831,-0.444174,False,False




{'air1', 'cdk1', 'tpxl1-18C'}

Number of known recovered interactions: 1   Number of known but NOT recovered interactions: 0 Number of novel interactions: 2   Total number of interactions: 3


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  details_interacting_genes[['principal component 1', 'principal component 2', 'principal component 3']] = details_interacting_genes[['principal component 1', 'principal component 2', 'principal component 3']].round(2)


Unnamed: 0,gene,principal component 1,principal component 2,principal component 3,interacting,predicted_interaction
14,air1,-4.21,1.17,-9.4,True,True
18,cdk1,-1.67,-5.07,-13.8,False,True
79,tpxl1-18C,-8.24,-3.02,-20.48,False,True


## spindle length tpxl-1

In [32]:
gois=['tpxl-1']
common_genes = set()
common_details = None
for idx,goi in enumerate(gois):
    print(f"=============== interactors for {goi} ===============")
    interactors, details = get_list_from_goi(goi)
    print(f"--------------- found interactors ({goi}) --------------")
    pd.set_option('display.max_rows', 0) 
    display(details.loc[details['predicted_interaction']][['gene', 'interacting', 'predicted_interaction', 'predicted_probability']].sort_values('predicted_probability',ascending=False))
    pd.set_option('display.max_rows', 10) 
    common_genes,common_details = assemble_list(idx,common_genes,common_details,interactors)
print("=============== Common genes =================")
details_interacting_genes = output_formater(common_genes,common_details)

--- goi=tpxl-1 name=WBGene00021470 ----
---------- training set tpxl-1-------


Unnamed: 0,gene,principal component 1,principal component 2,principal component 3,interacting
10,L4440-18C,-0.723055,-1.546943,0.525031,False
11,L4440ebp2,-0.498394,0.984421,2.054312,False
14,air1,-4.206413,1.172099,-9.398,True
63,none-18C,0.0,0.0,-0.0,False
79,tpxl1-18C,-8.239139,-3.023608,-20.483315,True


True
[LibLinear]iter  1 act 1.141e+00 pre 9.811e-01 delta 1.365e-01 f 3.466e+00 |g| 1.719e+01 CG   1
cg reaches trust region boundary
iter  2 act 3.809e-01 pre 3.133e-01 delta 2.181e-01 f 2.324e+00 |g| 4.935e+00 CG   2
cg reaches trust region boundary
iter  3 act 2.589e-01 pre 2.208e-01 delta 3.470e-01 f 1.944e+00 |g| 2.042e+00 CG   2
iter  4 act 1.108e-01 pre 1.039e-01 delta 3.470e-01 f 1.685e+00 |g| 8.055e-01 CG   2
iter  5 act 4.723e-03 pre 4.405e-03 delta 3.470e-01 f 1.574e+00 |g| 1.579e-01 CG   1
iter  6 act 1.482e-04 pre 1.468e-04 delta 3.470e-01 f 1.569e+00 |g| 2.093e-02 CG   2
On the training set: Accuracy: 1.0, Recall: 1.0, Precision: 1.0
--------------- found interactors (tpxl-1) --------------


Unnamed: 0,gene,interacting,predicted_interaction,predicted_probability
21,cls2-18C,False,True,0.999989
79,tpxl1-18C,True,True,0.999966
85,zen4,False,True,0.999903
18,cdk1,False,True,0.999372
83,unc59,False,True,0.998939
70,spd1,False,True,0.998781
40,gpb1,False,True,0.997557
14,air1,True,True,0.994481
20,clip1-18C,False,True,0.987957
74,such1dylt1,False,True,0.985495




{'JEP5mbk2',
 'air1',
 'cdk1',
 'clip1-18C',
 'cls2-18C',
 'gpb1',
 'klp7',
 'par4',
 'spd1',
 'spd2-18C',
 'spn4',
 'such1L4440',
 'such1dylt1',
 'tpxl1-18C',
 'unc59',
 'zen4'}

Number of known recovered interactions: 2   Number of known but NOT recovered interactions: 0 Number of novel interactions: 14   Total number of interactions: 16


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  details_interacting_genes[['principal component 1', 'principal component 2', 'principal component 3']] = details_interacting_genes[['principal component 1', 'principal component 2', 'principal component 3']].round(2)


Unnamed: 0,gene,principal component 1,principal component 2,principal component 3,interacting,predicted_interaction
8,JEP5mbk2,-0.75,-0.44,-5.19,False,True
14,air1,-4.21,1.17,-9.40,True,True
18,cdk1,-1.67,-5.07,-13.80,False,True
20,clip1-18C,-0.92,1.17,-6.73,False,True
21,cls2-18C,-1.12,3.49,-16.64,False,True
...,...,...,...,...,...,...
73,such1L4440,0.42,-6.65,-7.32,False,True
74,such1dylt1,0.52,-6.24,-8.41,False,True
79,tpxl1-18C,-8.24,-3.02,-20.48,True,True
83,unc59,-0.18,1.24,-10.09,False,True


## spindle length after Greenan 2010

In [33]:
gois=['tpxl-1', 'air-1']
for idx,goi in enumerate(gois):
    print(f"=============== interactors for {goi} ===============")
    interactors, details = get_list_from_goi(goi)
    print(f"--------------- found interactors ({goi}) --------------")
    pd.set_option('display.max_rows', 0) 
    display(details.loc[details['predicted_interaction']][['gene', 'interacting', 'predicted_interaction', 'predicted_probability']].sort_values('predicted_probability',ascending=False))
    pd.set_option('display.max_rows', 10) 
    common_genes,common_details = assemble_list(idx,common_genes,common_details,interactors)
print("=============== Common genes =================")
details_interacting_genes = output_formater(common_genes,common_details)

--- goi=tpxl-1 name=WBGene00021470 ----
---------- training set tpxl-1-------


Unnamed: 0,gene,principal component 1,principal component 2,principal component 3,interacting
10,L4440-18C,-0.723055,-1.546943,0.525031,False
11,L4440ebp2,-0.498394,0.984421,2.054312,False
14,air1,-4.206413,1.172099,-9.398,True
63,none-18C,0.0,0.0,-0.0,False
79,tpxl1-18C,-8.239139,-3.023608,-20.483315,True


True
[LibLinear]iter  1 act 1.141e+00 pre 9.811e-01 delta 1.365e-01 f 3.466e+00 |g| 1.719e+01 CG   1
cg reaches trust region boundary
iter  2 act 3.809e-01 pre 3.133e-01 delta 2.181e-01 f 2.324e+00 |g| 4.935e+00 CG   2
cg reaches trust region boundary
iter  3 act 2.589e-01 pre 2.208e-01 delta 3.470e-01 f 1.944e+00 |g| 2.042e+00 CG   2
iter  4 act 1.108e-01 pre 1.039e-01 delta 3.470e-01 f 1.685e+00 |g| 8.055e-01 CG   2
iter  5 act 4.723e-03 pre 4.405e-03 delta 3.470e-01 f 1.574e+00 |g| 1.579e-01 CG   1
iter  6 act 1.482e-04 pre 1.468e-04 delta 3.470e-01 f 1.569e+00 |g| 2.093e-02 CG   2
On the training set: Accuracy: 1.0, Recall: 1.0, Precision: 1.0
--------------- found interactors (tpxl-1) --------------


Unnamed: 0,gene,interacting,predicted_interaction,predicted_probability
21,cls2-18C,False,True,0.999989
79,tpxl1-18C,True,True,0.999966
85,zen4,False,True,0.999903
18,cdk1,False,True,0.999372
83,unc59,False,True,0.998939
70,spd1,False,True,0.998781
40,gpb1,False,True,0.997557
14,air1,True,True,0.994481
20,clip1-18C,False,True,0.987957
74,such1dylt1,False,True,0.985495


--- goi=air-1 name=WBGene00000098 ----
---------- training set air-1-------


Unnamed: 0,gene,principal component 1,principal component 2,principal component 3,interacting
8,JEP5mbk2,-0.751909,-0.439614,-5.193219,True
10,L4440-18C,-0.723055,-1.546943,0.525031,False
11,L4440ebp2,-0.498394,0.984421,2.054312,False
14,air1,-4.206413,1.172099,-9.398000,True
17,bmk1-18C,-0.131047,1.620277,-1.108807,True
18,cdk1,-1.674856,-5.065348,-13.798673,True
23,dhc1,0.508078,-1.924023,-1.872476,True
25,dnc1,-0.724038,-1.214808,1.352019,True
30,ebp1,-0.589048,-1.572828,2.133720,True
...,...,...,...,...,...


True
[LibLinear]iter  1 act 5.943e+00 pre 5.044e+00 delta 3.085e-01 f 2.010e+01 |g| 4.596e+01 CG   2
cg reaches trust region boundary
iter  2 act 2.317e+00 pre 1.764e+00 delta 4.733e-01 f 1.416e+01 |g| 1.528e+01 CG   3
cg reaches trust region boundary
iter  3 act 1.446e+00 pre 1.180e+00 delta 6.682e-01 f 1.184e+01 |g| 7.012e+00 CG   3
iter  4 act 3.959e-01 pre 3.580e-01 delta 6.682e-01 f 1.040e+01 |g| 2.450e+00 CG   3
iter  5 act 1.310e-02 pre 1.273e-02 delta 6.682e-01 f 1.000e+01 |g| 4.316e-01 CG   3
iter  6 act 3.061e-05 pre 3.054e-05 delta 6.682e-01 f 9.987e+00 |g| 2.097e-02 CG   3
On the training set: Accuracy: 0.8620689655172413, Recall: 0.8846153846153846, Precision: 0.9583333333333334
--------------- found interactors (air-1) --------------


Unnamed: 0,gene,interacting,predicted_interaction,predicted_probability
79,tpxl1-18C,True,True,1.000000
18,cdk1,True,True,0.999995
14,air1,True,True,0.999941
21,cls2-18C,False,True,0.999819
40,gpb1,False,True,0.999753
26,dyci1,False,True,0.999740
67,plk1-18C,True,True,0.999302
74,such1dylt1,False,True,0.999229
73,such1L4440,False,True,0.999003
...,...,...,...,...


Unnamed: 0,gene,principal component 1,principal component 2,principal component 3,interacting,predicted_interaction
0,ANA019none,-1.043194,1.466452,5.329549,False,False
1,JEP13gpr1,0.273005,-0.840942,-0.350799,False,False
2,JEP14gpr2-18C,-1.030865,-3.242924,1.638099,False,False
3,JEP15L4440,0.231701,-2.249044,4.431642,False,False
4,JEP1klp13-18C,-0.669484,-2.066330,0.301457,False,False
...,...,...,...,...,...,...
83,unc59,-0.178511,1.244468,-10.093683,False,True
84,vab8,-0.375866,-0.210577,0.648375,False,False
85,zen4,1.288270,8.995004,-10.353139,False,False
86,zyg11,-0.769377,-9.888831,-0.444174,False,False




{'JEP5mbk2',
 'air1',
 'cdk1',
 'clip1-18C',
 'cls2-18C',
 'gpb1',
 'klp7',
 'par4',
 'spd2-18C',
 'spn4',
 'such1L4440',
 'such1dylt1',
 'tpxl1-18C',
 'unc59'}

Number of known recovered interactions: 5   Number of known but NOT recovered interactions: 0 Number of novel interactions: 9   Total number of interactions: 14


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  details_interacting_genes[['principal component 1', 'principal component 2', 'principal component 3']] = details_interacting_genes[['principal component 1', 'principal component 2', 'principal component 3']].round(2)


Unnamed: 0,gene,principal component 1,principal component 2,principal component 3,interacting,predicted_interaction
8,JEP5mbk2,-0.75,-0.44,-5.19,True,True
14,air1,-4.21,1.17,-9.40,True,True
18,cdk1,-1.67,-5.07,-13.80,True,True
20,clip1-18C,-0.92,1.17,-6.73,False,True
21,cls2-18C,-1.12,3.49,-16.64,False,True
...,...,...,...,...,...,...
72,spn4,0.53,-2.64,-4.55,False,True
73,such1L4440,0.42,-6.65,-7.32,False,True
74,such1dylt1,0.52,-6.24,-8.41,False,True
79,tpxl1-18C,-8.24,-3.02,-20.48,True,True


In [34]:
gois=['C27D9.1', 'ani-2','ima-3', 'F21H12.2'] # cid-1 was acquired at 18C
for idx,goi in enumerate(gois):
    print(f"=============== interactors for {goi} ===============")
    interactors, details = get_list_from_goi(goi,tempe='23C',proba_theshold_prediction=0.5)
    print(f"--------------- found interactors ({goi}) --------------")
    pd.set_option('display.max_rows', 0) 
    display(details.loc[details['predicted_interaction']][['gene', 'interacting', 'predicted_interaction', 'predicted_probability']].sort_values('predicted_probability',ascending=False))
    pd.set_option('display.max_rows', 10) 
    if idx == 0:
        common_genes = set(interactors)
    else:
        common_genes = common_genes.intersection(interactors)
print("=============== Common genes =================")
display(common_genes)

--- goi=C27D9.1 name=WBGene00016163 ----
Downloading the file
---------- training set C27D9.1-------


Unnamed: 0,gene,principal component 1,principal component 2,principal component 3,interacting


True


Exception: Something wrong in naming ? No interacting gene in our dataset

## spd-1 (central spindle)

In [None]:
gois=['spd-1'] 
for idx,goi in enumerate(gois):
    print(f"=============== interactors for {goi} ===============")
    interactors, details = get_list_from_goi(goi,tempe='18C',proba_theshold_prediction=0.95)
    print(f"--------------- found interactors ({goi}) --------------")
    pd.set_option('display.max_rows', 0) 
    display(details.loc[details['predicted_interaction']][['gene', 'interacting', 'predicted_interaction', 'predicted_probability']].sort_values('predicted_probability',ascending=False))
    pd.set_option('display.max_rows', 10) 
    common_genes,common_details = assemble_list(idx,common_genes,common_details,interactors)
print("=============== Common genes =================")
details_interacting_genes = output_formater(common_genes,common_details)