In [None]:
# importing all the required packages
import pandas as pd
import numpy as np
import scipy.stats
import math
from time import process_time_ns
import time
import timeit
import statsmodels.stats.multitest as ssm

In [None]:
# 10 SNPs
data = pd.read_csv("sample10.csv")
phenotype = data.iloc[:,-1]

In [None]:
# defining and initializing required variables - Only calculating computation time, only need to run this once

# defining the significance level 
alpha = .05

# storing the count of number of samples in the variable n
n = data.shape[0]

# defining degrees of freedom where n is number of samples
df = n-4

# defining the critical value
critical_value = scipy.stats.t.ppf(q=1-alpha/2,df=df)

In [None]:
# Algorithm for Interaction Coefficient for Pairwise Epistasis
"""Parameters for the method:
    snp1 - First snp in the pair to be checked for interaction
    snp2 - Second snp in the pair to be checked for interaction
    phenotype - Phenotype vector
"""
def epistatis(snp1, snp2, phenotype): 
    try:
        # removing the intercept by mean centering
        snp1_tilde = snp1 - snp1.mean()
        snp2_tilde = snp2 - snp2.mean()
        phenotype_tilde = phenotype - phenotype.mean()

        # declaring the interaction vector - will contain the interaction term of snp1 and snp2
        interaction_vector = pd.Series(dtype='float64')
    
        # defining the interaction vector - either using cartesian product or XOR
        interaction_vector = snp1.mul(snp2) # using cartesian product
        # interaction_vector = (snp1%2 + snp2%2)%2 # using XOR penetrance
    
        # print(interaction_vector)
        # mean centering the interaction vector 
        interaction_vector_tilde = interaction_vector - interaction_vector.mean()
   
        # computing the dot products as explained in the algorithm
        x = (snp1_tilde.dot(interaction_vector_tilde)/snp1_tilde.dot(snp1_tilde)) * (snp1_tilde) # 2nd term in the v variable, using to breakdown the formula
        v = interaction_vector_tilde - x
        q2 = snp2_tilde - (((snp1_tilde.dot(snp2_tilde)) / (snp1_tilde.dot(snp1_tilde))) * snp1_tilde)
        v = v - ((interaction_vector_tilde.dot(q2)/q2.dot(q2))*q2)
        b3 = (v.dot(phenotype_tilde)) / (v.dot(v)) # interactionn coefficient, referred as beta_3 in the paper


        # residual calculation
        residual = phenotype_tilde - (snp1_tilde.dot(phenotype_tilde)/snp1_tilde.dot(snp1_tilde))*snp1_tilde
        residual = residual - (phenotype_tilde.dot(q2)/q2.dot(q2))*q2
        residual = residual - b3 * v

        # print(b3)

        v = pd.Series(np.squeeze(np.asarray(v)), dtype='float64')
        residual = pd.Series(np.squeeze(np.asarray(residual)), dtype='float64') 


        t_test = np.sqrt(snp1.shape[0]-4)*np.sqrt(v.dot(v))*b3/(np.sqrt(residual.dot(residual)))

        # t_test = np.sqrt(df)*b3/(np.sqrt(1-b3*b3))
        p_val = scipy.stats.t.sf(abs(t_test), df) * 2
    except Exception as e:
        print("Error pair detected with error: ", e)
        b3 = 0
        t_test = 0
        p_val = 1

    return b3, t_test, p_val # returning the interaction coefficient,, t test value and p value

In [None]:
# Function to run the second script and measure time
def run_time_script():
    start_time = time.time()

    # applying the above defined method to the dataset - p-values are very low and do not match regression output or PLINK output
    p_value_locus_original = [] # list of tuples containing the p_value and the two interacting loci
    for i in range(0, data.shape[1]-1):
        for j in range(i+1, data.shape[1]-1):
            interacting_snp_1 = data.iloc[:,i]
            interacting_snp_2 = data.iloc[:,j]
            # print(interacting_snp_1 - interacting_snp_1.mean())
            #sandra's implementation
            b3, t_test, p_val = epistatis(interacting_snp_1, interacting_snp_2, phenotype)
            p_value_locus_original.append((p_val, data.columns[i], data.columns[j]))

            #print(" Interacting SNP 1 = {0} \t  Interacting SNP 2 ={1} \t  Beta Coefficient(r) = {2}  \t t_test = {3} \t p_val = {4} ".format(data.columns[i], data.columns[j], b3, t_test, p_val))
        
            # # printing the interacting pairs with ttest values greater than the selected critical value
            # if abs(t_test) > critical_value:
            #     print(" Interacting SNP 1 = {0} \t  Interacting SNP 2 ={1} \t  Beta Coefficient(r) = {2}  \t t_test = {3} \t p_val = {4} ".format(data.columns[i], data.columns[j], b3, t_test, p_val))

            # # printing exception cases
            # if b3==0 and t_test==0 and p_val==1:
            #     print("Error Pair: ")
            #     print(" Interacting SNP 1 = {0} \t  Interacting SNP 2 ={1} \t  Beta Coefficient(r) = {2}  \t t_test = {3} \t p_val = {4} ".format(data.columns[i], data.columns[j], b3, t_test, p_val))

    end_time = time.time()
    elapsed_time = end_time - start_time
    return elapsed_time

# Number of times to run the second script
num_runs = 10

# Measure the elapsed time for each run of the second script
elapsed_times_nick = [timeit.timeit(run_time_script, number=1) for _ in range(num_runs)]

# Print the results for the second script
for i, time_nick in enumerate(elapsed_times_nick):
    print(f"Run {i+1}: Elapsed time - {time_nick:.6f} seconds")

In [None]:
# 100 SNPs
data = pd.read_csv("sample100.csv")
phenotype = data.iloc[:,-1]

import time
import timeit

In [None]:
# Function to run the second script and measure time
def run_time_script():
    start_time = time.time()

    # applying the above defined method to the dataset - p-values are very low and do not match regression output or PLINK output
    p_value_locus_original = [] # list of tuples containing the p_value and the two interacting loci
    for i in range(0, data.shape[1]-1):
        for j in range(i+1, data.shape[1]-1):
            interacting_snp_1 = data.iloc[:,i]
            interacting_snp_2 = data.iloc[:,j]
            # print(interacting_snp_1 - interacting_snp_1.mean())
            #sandra's implementation
            b3, t_test, p_val = epistatis(interacting_snp_1, interacting_snp_2, phenotype)
            p_value_locus_original.append((p_val, data.columns[i], data.columns[j]))

            #print(" Interacting SNP 1 = {0} \t  Interacting SNP 2 ={1} \t  Beta Coefficient(r) = {2}  \t t_test = {3} \t p_val = {4} ".format(data.columns[i], data.columns[j], b3, t_test, p_val))
        
            # # printing the interacting pairs with ttest values greater than the selected critical value
            # if abs(t_test) > critical_value:
            #     print(" Interacting SNP 1 = {0} \t  Interacting SNP 2 ={1} \t  Beta Coefficient(r) = {2}  \t t_test = {3} \t p_val = {4} ".format(data.columns[i], data.columns[j], b3, t_test, p_val))

            # # printing exception cases
            # if b3==0 and t_test==0 and p_val==1:
            #     print("Error Pair: ")
            #     print(" Interacting SNP 1 = {0} \t  Interacting SNP 2 ={1} \t  Beta Coefficient(r) = {2}  \t t_test = {3} \t p_val = {4} ".format(data.columns[i], data.columns[j], b3, t_test, p_val))

    end_time = time.time()
    elapsed_time = end_time - start_time
    return elapsed_time

# Number of times to run the second script
num_runs = 10

# Measure the elapsed time for each run of the second script
elapsed_times_nick = [timeit.timeit(run_time_script, number=1) for _ in range(num_runs)]

# Print the results for the second script
for i, time_nick in enumerate(elapsed_times_nick):
    print(f"Run {i+1}: Elapsed time - {time_nick:.6f} seconds")

In [None]:
# 1,000 SNPs
data = pd.read_csv("sample1000.csv")
phenotype = data.iloc[:,-1]

import time
import timeit

In [None]:
# Function to run the second script and measure time
def run_time_script():
    start_time = time.time()

    # applying the above defined method to the dataset - p-values are very low and do not match regression output or PLINK output
    p_value_locus_original = [] # list of tuples containing the p_value and the two interacting loci
    for i in range(0, data.shape[1]-1):
        for j in range(i+1, data.shape[1]-1):
            interacting_snp_1 = data.iloc[:,i]
            interacting_snp_2 = data.iloc[:,j]
            # print(interacting_snp_1 - interacting_snp_1.mean())
            #sandra's implementation
            b3, t_test, p_val = epistatis(interacting_snp_1, interacting_snp_2, phenotype)
            p_value_locus_original.append((p_val, data.columns[i], data.columns[j]))

            #print(" Interacting SNP 1 = {0} \t  Interacting SNP 2 ={1} \t  Beta Coefficient(r) = {2}  \t t_test = {3} \t p_val = {4} ".format(data.columns[i], data.columns[j], b3, t_test, p_val))
        
            # # printing the interacting pairs with ttest values greater than the selected critical value
            # if abs(t_test) > critical_value:
            #     print(" Interacting SNP 1 = {0} \t  Interacting SNP 2 ={1} \t  Beta Coefficient(r) = {2}  \t t_test = {3} \t p_val = {4} ".format(data.columns[i], data.columns[j], b3, t_test, p_val))

            # # printing exception cases
            # if b3==0 and t_test==0 and p_val==1:
            #     print("Error Pair: ")
            #     print(" Interacting SNP 1 = {0} \t  Interacting SNP 2 ={1} \t  Beta Coefficient(r) = {2}  \t t_test = {3} \t p_val = {4} ".format(data.columns[i], data.columns[j], b3, t_test, p_val))

    end_time = time.time()
    elapsed_time = end_time - start_time
    return elapsed_time

# Number of times to run the second script
num_runs = 10

# Measure the elapsed time for each run of the second script
elapsed_times_nick = [timeit.timeit(run_time_script, number=1) for _ in range(num_runs)]

# Print the results for the second script
for i, time_nick in enumerate(elapsed_times_nick):
    print(f"Run {i+1}: Elapsed time - {time_nick:.6f} seconds")

In [None]:
#100 SNPs with 10 observations
data = pd.read_csv('sample100_10obs.csv')
phenotype = data.iloc[:,-1]

import time
import timeit

In [None]:
# Function to run the second script and measure time
def run_time_script():
    start_time = time.time()

    # applying the above defined method to the dataset - p-values are very low and do not match regression output or PLINK output
    p_value_locus_original = [] # list of tuples containing the p_value and the two interacting loci
    for i in range(0, data.shape[1]-1):
        for j in range(i+1, data.shape[1]-1):
            interacting_snp_1 = data.iloc[:,i]
            interacting_snp_2 = data.iloc[:,j]
            # print(interacting_snp_1 - interacting_snp_1.mean())
            #sandra's implementation
            b3, t_test, p_val = epistatis(interacting_snp_1, interacting_snp_2, phenotype)
            p_value_locus_original.append((p_val, data.columns[i], data.columns[j]))

            #print(" Interacting SNP 1 = {0} \t  Interacting SNP 2 ={1} \t  Beta Coefficient(r) = {2}  \t t_test = {3} \t p_val = {4} ".format(data.columns[i], data.columns[j], b3, t_test, p_val))
        
            # # printing the interacting pairs with ttest values greater than the selected critical value
            # if abs(t_test) > critical_value:
            #     print(" Interacting SNP 1 = {0} \t  Interacting SNP 2 ={1} \t  Beta Coefficient(r) = {2}  \t t_test = {3} \t p_val = {4} ".format(data.columns[i], data.columns[j], b3, t_test, p_val))

            # # printing exception cases
            # if b3==0 and t_test==0 and p_val==1:
            #     print("Error Pair: ")
            #     print(" Interacting SNP 1 = {0} \t  Interacting SNP 2 ={1} \t  Beta Coefficient(r) = {2}  \t t_test = {3} \t p_val = {4} ".format(data.columns[i], data.columns[j], b3, t_test, p_val))

    end_time = time.time()
    elapsed_time = end_time - start_time
    return elapsed_time

# Number of times to run the second script
num_runs = 10

# Measure the elapsed time for each run of the second script
elapsed_times_nick = [timeit.timeit(run_time_script, number=1) for _ in range(num_runs)]

# Print the results for the second script
for i, time_nick in enumerate(elapsed_times_nick):
    print(f"Run {i+1}: Elapsed time - {time_nick:.6f} seconds")

In [None]:
#100 SNPs with 100 observations
data = pd.read_csv('sample100_100obs.csv')
phenotype = data.iloc[:,-1]

import time
import timeit

In [None]:
# Function to run the second script and measure time
def run_time_script():
    start_time = time.time()

    # applying the above defined method to the dataset - p-values are very low and do not match regression output or PLINK output
    p_value_locus_original = [] # list of tuples containing the p_value and the two interacting loci
    for i in range(0, data.shape[1]-1):
        for j in range(i+1, data.shape[1]-1):
            interacting_snp_1 = data.iloc[:,i]
            interacting_snp_2 = data.iloc[:,j]
            # print(interacting_snp_1 - interacting_snp_1.mean())
            #sandra's implementation
            b3, t_test, p_val = epistatis(interacting_snp_1, interacting_snp_2, phenotype)
            p_value_locus_original.append((p_val, data.columns[i], data.columns[j]))

            #print(" Interacting SNP 1 = {0} \t  Interacting SNP 2 ={1} \t  Beta Coefficient(r) = {2}  \t t_test = {3} \t p_val = {4} ".format(data.columns[i], data.columns[j], b3, t_test, p_val))
        
            # # printing the interacting pairs with ttest values greater than the selected critical value
            # if abs(t_test) > critical_value:
            #     print(" Interacting SNP 1 = {0} \t  Interacting SNP 2 ={1} \t  Beta Coefficient(r) = {2}  \t t_test = {3} \t p_val = {4} ".format(data.columns[i], data.columns[j], b3, t_test, p_val))

            # # printing exception cases
            # if b3==0 and t_test==0 and p_val==1:
            #     print("Error Pair: ")
            #     print(" Interacting SNP 1 = {0} \t  Interacting SNP 2 ={1} \t  Beta Coefficient(r) = {2}  \t t_test = {3} \t p_val = {4} ".format(data.columns[i], data.columns[j], b3, t_test, p_val))

    end_time = time.time()
    elapsed_time = end_time - start_time
    return elapsed_time

# Number of times to run the second script
num_runs = 10

# Measure the elapsed time for each run of the second script
elapsed_times_nick = [timeit.timeit(run_time_script, number=1) for _ in range(num_runs)]

# Print the results for the second script
for i, time_nick in enumerate(elapsed_times_nick):
    print(f"Run {i+1}: Elapsed time - {time_nick:.6f} seconds")

In [None]:
#100 SNPs with 1,000 observations
data = pd.read_csv('sample100_1000obs.csv')
phenotype = data.iloc[:,-1]

import time
import timeit

In [None]:
# Function to run the second script and measure time
def run_time_script():
    start_time = time.time()

    # applying the above defined method to the dataset - p-values are very low and do not match regression output or PLINK output
    p_value_locus_original = [] # list of tuples containing the p_value and the two interacting loci
    for i in range(0, data.shape[1]-1):
        for j in range(i+1, data.shape[1]-1):
            interacting_snp_1 = data.iloc[:,i]
            interacting_snp_2 = data.iloc[:,j]
            # print(interacting_snp_1 - interacting_snp_1.mean())
            #sandra's implementation
            b3, t_test, p_val = epistatis(interacting_snp_1, interacting_snp_2, phenotype)
            p_value_locus_original.append((p_val, data.columns[i], data.columns[j]))

            #print(" Interacting SNP 1 = {0} \t  Interacting SNP 2 ={1} \t  Beta Coefficient(r) = {2}  \t t_test = {3} \t p_val = {4} ".format(data.columns[i], data.columns[j], b3, t_test, p_val))
        
            # # printing the interacting pairs with ttest values greater than the selected critical value
            # if abs(t_test) > critical_value:
            #     print(" Interacting SNP 1 = {0} \t  Interacting SNP 2 ={1} \t  Beta Coefficient(r) = {2}  \t t_test = {3} \t p_val = {4} ".format(data.columns[i], data.columns[j], b3, t_test, p_val))

            # # printing exception cases
            # if b3==0 and t_test==0 and p_val==1:
            #     print("Error Pair: ")
            #     print(" Interacting SNP 1 = {0} \t  Interacting SNP 2 ={1} \t  Beta Coefficient(r) = {2}  \t t_test = {3} \t p_val = {4} ".format(data.columns[i], data.columns[j], b3, t_test, p_val))

    end_time = time.time()
    elapsed_time = end_time - start_time
    return elapsed_time

# Number of times to run the second script
num_runs = 10

# Measure the elapsed time for each run of the second script
elapsed_times_nick = [timeit.timeit(run_time_script, number=1) for _ in range(num_runs)]

# Print the results for the second script
for i, time_nick in enumerate(elapsed_times_nick):
    print(f"Run {i+1}: Elapsed time - {time_nick:.6f} seconds")

In [None]:
#100 SNPs with 2,000 observations
data = pd.read_csv('sample100_2000obs.csv')
phenotype = data.iloc[:,-1]

import time
import timeit

In [None]:
# Function to run the second script and measure time
def run_time_script():
    start_time = time.time()

    # applying the above defined method to the dataset - p-values are very low and do not match regression output or PLINK output
    p_value_locus_original = [] # list of tuples containing the p_value and the two interacting loci
    for i in range(0, data.shape[1]-1):
        for j in range(i+1, data.shape[1]-1):
            interacting_snp_1 = data.iloc[:,i]
            interacting_snp_2 = data.iloc[:,j]
            # print(interacting_snp_1 - interacting_snp_1.mean())
            #sandra's implementation
            b3, t_test, p_val = epistatis(interacting_snp_1, interacting_snp_2, phenotype)
            p_value_locus_original.append((p_val, data.columns[i], data.columns[j]))

            #print(" Interacting SNP 1 = {0} \t  Interacting SNP 2 ={1} \t  Beta Coefficient(r) = {2}  \t t_test = {3} \t p_val = {4} ".format(data.columns[i], data.columns[j], b3, t_test, p_val))
        
            # # printing the interacting pairs with ttest values greater than the selected critical value
            # if abs(t_test) > critical_value:
            #     print(" Interacting SNP 1 = {0} \t  Interacting SNP 2 ={1} \t  Beta Coefficient(r) = {2}  \t t_test = {3} \t p_val = {4} ".format(data.columns[i], data.columns[j], b3, t_test, p_val))

            # # printing exception cases
            # if b3==0 and t_test==0 and p_val==1:
            #     print("Error Pair: ")
            #     print(" Interacting SNP 1 = {0} \t  Interacting SNP 2 ={1} \t  Beta Coefficient(r) = {2}  \t t_test = {3} \t p_val = {4} ".format(data.columns[i], data.columns[j], b3, t_test, p_val))

    end_time = time.time()
    elapsed_time = end_time - start_time
    return elapsed_time

# Number of times to run the second script
num_runs = 10

# Measure the elapsed time for each run of the second script
elapsed_times_nick = [timeit.timeit(run_time_script, number=1) for _ in range(num_runs)]

# Print the results for the second script
for i, time_nick in enumerate(elapsed_times_nick):
    print(f"Run {i+1}: Elapsed time - {time_nick:.6f} seconds")

In [None]:
#100 SNPs with 3,000 observations
data = pd.read_csv('sample100_3000obs.csv')
phenotype = data.iloc[:,-1]

import time
import timeit

In [None]:
# Function to run the second script and measure time
def run_time_script():
    start_time = time.time()

    # applying the above defined method to the dataset - p-values are very low and do not match regression output or PLINK output
    p_value_locus_original = [] # list of tuples containing the p_value and the two interacting loci
    for i in range(0, data.shape[1]-1):
        for j in range(i+1, data.shape[1]-1):
            interacting_snp_1 = data.iloc[:,i]
            interacting_snp_2 = data.iloc[:,j]
            # print(interacting_snp_1 - interacting_snp_1.mean())
            #sandra's implementation
            b3, t_test, p_val = epistatis(interacting_snp_1, interacting_snp_2, phenotype)
            p_value_locus_original.append((p_val, data.columns[i], data.columns[j]))

            #print(" Interacting SNP 1 = {0} \t  Interacting SNP 2 ={1} \t  Beta Coefficient(r) = {2}  \t t_test = {3} \t p_val = {4} ".format(data.columns[i], data.columns[j], b3, t_test, p_val))
        
            # # printing the interacting pairs with ttest values greater than the selected critical value
            # if abs(t_test) > critical_value:
            #     print(" Interacting SNP 1 = {0} \t  Interacting SNP 2 ={1} \t  Beta Coefficient(r) = {2}  \t t_test = {3} \t p_val = {4} ".format(data.columns[i], data.columns[j], b3, t_test, p_val))

            # # printing exception cases
            # if b3==0 and t_test==0 and p_val==1:
            #     print("Error Pair: ")
            #     print(" Interacting SNP 1 = {0} \t  Interacting SNP 2 ={1} \t  Beta Coefficient(r) = {2}  \t t_test = {3} \t p_val = {4} ".format(data.columns[i], data.columns[j], b3, t_test, p_val))

    end_time = time.time()
    elapsed_time = end_time - start_time
    return elapsed_time

# Number of times to run the second script
num_runs = 10

# Measure the elapsed time for each run of the second script
elapsed_times_nick = [timeit.timeit(run_time_script, number=1) for _ in range(num_runs)]

# Print the results for the second script
for i, time_nick in enumerate(elapsed_times_nick):
    print(f"Run {i+1}: Elapsed time - {time_nick:.6f} seconds")

In [None]:
#100 SNPs with 4,000 observations
data = pd.read_csv('sample100_4000obs.csv')
phenotype = data.iloc[:,-1]

import time
import timeit

In [None]:
# Function to run the second script and measure time
def run_time_script():
    start_time = time.time()

    # applying the above defined method to the dataset - p-values are very low and do not match regression output or PLINK output
    p_value_locus_original = [] # list of tuples containing the p_value and the two interacting loci
    for i in range(0, data.shape[1]-1):
        for j in range(i+1, data.shape[1]-1):
            interacting_snp_1 = data.iloc[:,i]
            interacting_snp_2 = data.iloc[:,j]
            # print(interacting_snp_1 - interacting_snp_1.mean())
            #sandra's implementation
            b3, t_test, p_val = epistatis(interacting_snp_1, interacting_snp_2, phenotype)
            p_value_locus_original.append((p_val, data.columns[i], data.columns[j]))

            #print(" Interacting SNP 1 = {0} \t  Interacting SNP 2 ={1} \t  Beta Coefficient(r) = {2}  \t t_test = {3} \t p_val = {4} ".format(data.columns[i], data.columns[j], b3, t_test, p_val))
        
            # # printing the interacting pairs with ttest values greater than the selected critical value
            # if abs(t_test) > critical_value:
            #     print(" Interacting SNP 1 = {0} \t  Interacting SNP 2 ={1} \t  Beta Coefficient(r) = {2}  \t t_test = {3} \t p_val = {4} ".format(data.columns[i], data.columns[j], b3, t_test, p_val))

            # # printing exception cases
            # if b3==0 and t_test==0 and p_val==1:
            #     print("Error Pair: ")
            #     print(" Interacting SNP 1 = {0} \t  Interacting SNP 2 ={1} \t  Beta Coefficient(r) = {2}  \t t_test = {3} \t p_val = {4} ".format(data.columns[i], data.columns[j], b3, t_test, p_val))

    end_time = time.time()
    elapsed_time = end_time - start_time
    return elapsed_time

# Number of times to run the second script
num_runs = 10

# Measure the elapsed time for each run of the second script
elapsed_times_nick = [timeit.timeit(run_time_script, number=1) for _ in range(num_runs)]

# Print the results for the second script
for i, time_nick in enumerate(elapsed_times_nick):
    print(f"Run {i+1}: Elapsed time - {time_nick:.6f} seconds")

In [None]:
#100 SNPs with 5,000 observations
data = pd.read_csv('sample100_5000obs.csv')
phenotype = data.iloc[:,-1]

import time
import timeit

In [None]:
# Function to run the second script and measure time
def run_time_script():
    start_time = time.time()

    # applying the above defined method to the dataset - p-values are very low and do not match regression output or PLINK output
    p_value_locus_original = [] # list of tuples containing the p_value and the two interacting loci
    for i in range(0, data.shape[1]-1):
        for j in range(i+1, data.shape[1]-1):
            interacting_snp_1 = data.iloc[:,i]
            interacting_snp_2 = data.iloc[:,j]
            # print(interacting_snp_1 - interacting_snp_1.mean())
            #sandra's implementation
            b3, t_test, p_val = epistatis(interacting_snp_1, interacting_snp_2, phenotype)
            p_value_locus_original.append((p_val, data.columns[i], data.columns[j]))

            #print(" Interacting SNP 1 = {0} \t  Interacting SNP 2 ={1} \t  Beta Coefficient(r) = {2}  \t t_test = {3} \t p_val = {4} ".format(data.columns[i], data.columns[j], b3, t_test, p_val))
        
            # # printing the interacting pairs with ttest values greater than the selected critical value
            # if abs(t_test) > critical_value:
            #     print(" Interacting SNP 1 = {0} \t  Interacting SNP 2 ={1} \t  Beta Coefficient(r) = {2}  \t t_test = {3} \t p_val = {4} ".format(data.columns[i], data.columns[j], b3, t_test, p_val))

            # # printing exception cases
            # if b3==0 and t_test==0 and p_val==1:
            #     print("Error Pair: ")
            #     print(" Interacting SNP 1 = {0} \t  Interacting SNP 2 ={1} \t  Beta Coefficient(r) = {2}  \t t_test = {3} \t p_val = {4} ".format(data.columns[i], data.columns[j], b3, t_test, p_val))

    end_time = time.time()
    elapsed_time = end_time - start_time
    return elapsed_time

# Number of times to run the second script
num_runs = 10

# Measure the elapsed time for each run of the second script
elapsed_times_nick = [timeit.timeit(run_time_script, number=1) for _ in range(num_runs)]

# Print the results for the second script
for i, time_nick in enumerate(elapsed_times_nick):
    print(f"Run {i+1}: Elapsed time - {time_nick:.6f} seconds")

In [None]:
#100 SNPs with 6,000 observations
data = pd.read_csv('sample100_6000obs.csv')
phenotype = data.iloc[:,-1]

import time
import timeit

In [None]:
# Function to run the second script and measure time
def run_time_script():
    start_time = time.time()

    # applying the above defined method to the dataset - p-values are very low and do not match regression output or PLINK output
    p_value_locus_original = [] # list of tuples containing the p_value and the two interacting loci
    for i in range(0, data.shape[1]-1):
        for j in range(i+1, data.shape[1]-1):
            interacting_snp_1 = data.iloc[:,i]
            interacting_snp_2 = data.iloc[:,j]
            # print(interacting_snp_1 - interacting_snp_1.mean())
            #sandra's implementation
            b3, t_test, p_val = epistatis(interacting_snp_1, interacting_snp_2, phenotype)
            p_value_locus_original.append((p_val, data.columns[i], data.columns[j]))

            #print(" Interacting SNP 1 = {0} \t  Interacting SNP 2 ={1} \t  Beta Coefficient(r) = {2}  \t t_test = {3} \t p_val = {4} ".format(data.columns[i], data.columns[j], b3, t_test, p_val))
        
            # # printing the interacting pairs with ttest values greater than the selected critical value
            # if abs(t_test) > critical_value:
            #     print(" Interacting SNP 1 = {0} \t  Interacting SNP 2 ={1} \t  Beta Coefficient(r) = {2}  \t t_test = {3} \t p_val = {4} ".format(data.columns[i], data.columns[j], b3, t_test, p_val))

            # # printing exception cases
            # if b3==0 and t_test==0 and p_val==1:
            #     print("Error Pair: ")
            #     print(" Interacting SNP 1 = {0} \t  Interacting SNP 2 ={1} \t  Beta Coefficient(r) = {2}  \t t_test = {3} \t p_val = {4} ".format(data.columns[i], data.columns[j], b3, t_test, p_val))

    end_time = time.time()
    elapsed_time = end_time - start_time
    return elapsed_time

# Number of times to run the second script
num_runs = 10

# Measure the elapsed time for each run of the second script
elapsed_times_nick = [timeit.timeit(run_time_script, number=1) for _ in range(num_runs)]

# Print the results for the second script
for i, time_nick in enumerate(elapsed_times_nick):
    print(f"Run {i+1}: Elapsed time - {time_nick:.6f} seconds")

In [None]:
#100 SNPs with 7,000 observations
data = pd.read_csv('sample100_7000obs.csv')
phenotype = data.iloc[:,-1]

import time
import timeit

In [None]:
# Function to run the second script and measure time
def run_time_script():
    start_time = time.time()

    # applying the above defined method to the dataset - p-values are very low and do not match regression output or PLINK output
    p_value_locus_original = [] # list of tuples containing the p_value and the two interacting loci
    for i in range(0, data.shape[1]-1):
        for j in range(i+1, data.shape[1]-1):
            interacting_snp_1 = data.iloc[:,i]
            interacting_snp_2 = data.iloc[:,j]
            # print(interacting_snp_1 - interacting_snp_1.mean())
            #sandra's implementation
            b3, t_test, p_val = epistatis(interacting_snp_1, interacting_snp_2, phenotype)
            p_value_locus_original.append((p_val, data.columns[i], data.columns[j]))

            #print(" Interacting SNP 1 = {0} \t  Interacting SNP 2 ={1} \t  Beta Coefficient(r) = {2}  \t t_test = {3} \t p_val = {4} ".format(data.columns[i], data.columns[j], b3, t_test, p_val))
        
            # # printing the interacting pairs with ttest values greater than the selected critical value
            # if abs(t_test) > critical_value:
            #     print(" Interacting SNP 1 = {0} \t  Interacting SNP 2 ={1} \t  Beta Coefficient(r) = {2}  \t t_test = {3} \t p_val = {4} ".format(data.columns[i], data.columns[j], b3, t_test, p_val))

            # # printing exception cases
            # if b3==0 and t_test==0 and p_val==1:
            #     print("Error Pair: ")
            #     print(" Interacting SNP 1 = {0} \t  Interacting SNP 2 ={1} \t  Beta Coefficient(r) = {2}  \t t_test = {3} \t p_val = {4} ".format(data.columns[i], data.columns[j], b3, t_test, p_val))

    end_time = time.time()
    elapsed_time = end_time - start_time
    return elapsed_time

# Number of times to run the second script
num_runs = 10

# Measure the elapsed time for each run of the second script
elapsed_times_nick = [timeit.timeit(run_time_script, number=1) for _ in range(num_runs)]

# Print the results for the second script
for i, time_nick in enumerate(elapsed_times_nick):
    print(f"Run {i+1}: Elapsed time - {time_nick:.6f} seconds")

In [None]:
#100 SNPs with 8,000 observations
data = pd.read_csv('sample100_8000obs.csv')
phenotype = data.iloc[:,-1]

import time
import timeit

In [None]:
# Function to run the second script and measure time
def run_time_script():
    start_time = time.time()

    # applying the above defined method to the dataset - p-values are very low and do not match regression output or PLINK output
    p_value_locus_original = [] # list of tuples containing the p_value and the two interacting loci
    for i in range(0, data.shape[1]-1):
        for j in range(i+1, data.shape[1]-1):
            interacting_snp_1 = data.iloc[:,i]
            interacting_snp_2 = data.iloc[:,j]
            # print(interacting_snp_1 - interacting_snp_1.mean())
            #sandra's implementation
            b3, t_test, p_val = epistatis(interacting_snp_1, interacting_snp_2, phenotype)
            p_value_locus_original.append((p_val, data.columns[i], data.columns[j]))

            #print(" Interacting SNP 1 = {0} \t  Interacting SNP 2 ={1} \t  Beta Coefficient(r) = {2}  \t t_test = {3} \t p_val = {4} ".format(data.columns[i], data.columns[j], b3, t_test, p_val))
        
            # # printing the interacting pairs with ttest values greater than the selected critical value
            # if abs(t_test) > critical_value:
            #     print(" Interacting SNP 1 = {0} \t  Interacting SNP 2 ={1} \t  Beta Coefficient(r) = {2}  \t t_test = {3} \t p_val = {4} ".format(data.columns[i], data.columns[j], b3, t_test, p_val))

            # # printing exception cases
            # if b3==0 and t_test==0 and p_val==1:
            #     print("Error Pair: ")
            #     print(" Interacting SNP 1 = {0} \t  Interacting SNP 2 ={1} \t  Beta Coefficient(r) = {2}  \t t_test = {3} \t p_val = {4} ".format(data.columns[i], data.columns[j], b3, t_test, p_val))

    end_time = time.time()
    elapsed_time = end_time - start_time
    return elapsed_time

# Number of times to run the second script
num_runs = 10

# Measure the elapsed time for each run of the second script
elapsed_times_nick = [timeit.timeit(run_time_script, number=1) for _ in range(num_runs)]

# Print the results for the second script
for i, time_nick in enumerate(elapsed_times_nick):
    print(f"Run {i+1}: Elapsed time - {time_nick:.6f} seconds")

In [None]:
#100 SNPs with 9,000 observations
data = pd.read_csv('sample100_9000obs.csv')
phenotype = data.iloc[:,-1]

import time
import timeit

In [None]:
# Function to run the second script and measure time
def run_time_script():
    start_time = time.time()

    # applying the above defined method to the dataset - p-values are very low and do not match regression output or PLINK output
    p_value_locus_original = [] # list of tuples containing the p_value and the two interacting loci
    for i in range(0, data.shape[1]-1):
        for j in range(i+1, data.shape[1]-1):
            interacting_snp_1 = data.iloc[:,i]
            interacting_snp_2 = data.iloc[:,j]
            # print(interacting_snp_1 - interacting_snp_1.mean())
            #sandra's implementation
            b3, t_test, p_val = epistatis(interacting_snp_1, interacting_snp_2, phenotype)
            p_value_locus_original.append((p_val, data.columns[i], data.columns[j]))

            #print(" Interacting SNP 1 = {0} \t  Interacting SNP 2 ={1} \t  Beta Coefficient(r) = {2}  \t t_test = {3} \t p_val = {4} ".format(data.columns[i], data.columns[j], b3, t_test, p_val))
        
            # # printing the interacting pairs with ttest values greater than the selected critical value
            # if abs(t_test) > critical_value:
            #     print(" Interacting SNP 1 = {0} \t  Interacting SNP 2 ={1} \t  Beta Coefficient(r) = {2}  \t t_test = {3} \t p_val = {4} ".format(data.columns[i], data.columns[j], b3, t_test, p_val))

            # # printing exception cases
            # if b3==0 and t_test==0 and p_val==1:
            #     print("Error Pair: ")
            #     print(" Interacting SNP 1 = {0} \t  Interacting SNP 2 ={1} \t  Beta Coefficient(r) = {2}  \t t_test = {3} \t p_val = {4} ".format(data.columns[i], data.columns[j], b3, t_test, p_val))

    end_time = time.time()
    elapsed_time = end_time - start_time
    return elapsed_time

# Number of times to run the second script
num_runs = 10

# Measure the elapsed time for each run of the second script
elapsed_times_nick = [timeit.timeit(run_time_script, number=1) for _ in range(num_runs)]

# Print the results for the second script
for i, time_nick in enumerate(elapsed_times_nick):
    print(f"Run {i+1}: Elapsed time - {time_nick:.6f} seconds")

In [None]:
#100 SNPs with 10,000 observations
data = pd.read_csv('sample100_10000obs.csv')
phenotype = data.iloc[:,-1]

import time
import timeit

In [None]:
# Function to run the second script and measure time
def run_time_script():
    start_time = time.time()

    # applying the above defined method to the dataset - p-values are very low and do not match regression output or PLINK output
    p_value_locus_original = [] # list of tuples containing the p_value and the two interacting loci
    for i in range(0, data.shape[1]-1):
        for j in range(i+1, data.shape[1]-1):
            interacting_snp_1 = data.iloc[:,i]
            interacting_snp_2 = data.iloc[:,j]
            # print(interacting_snp_1 - interacting_snp_1.mean())
            #sandra's implementation
            b3, t_test, p_val = epistatis(interacting_snp_1, interacting_snp_2, phenotype)
            p_value_locus_original.append((p_val, data.columns[i], data.columns[j]))

            #print(" Interacting SNP 1 = {0} \t  Interacting SNP 2 ={1} \t  Beta Coefficient(r) = {2}  \t t_test = {3} \t p_val = {4} ".format(data.columns[i], data.columns[j], b3, t_test, p_val))
        
            # # printing the interacting pairs with ttest values greater than the selected critical value
            # if abs(t_test) > critical_value:
            #     print(" Interacting SNP 1 = {0} \t  Interacting SNP 2 ={1} \t  Beta Coefficient(r) = {2}  \t t_test = {3} \t p_val = {4} ".format(data.columns[i], data.columns[j], b3, t_test, p_val))

            # # printing exception cases
            # if b3==0 and t_test==0 and p_val==1:
            #     print("Error Pair: ")
            #     print(" Interacting SNP 1 = {0} \t  Interacting SNP 2 ={1} \t  Beta Coefficient(r) = {2}  \t t_test = {3} \t p_val = {4} ".format(data.columns[i], data.columns[j], b3, t_test, p_val))

    end_time = time.time()
    elapsed_time = end_time - start_time
    return elapsed_time

# Number of times to run the second script
num_runs = 10

# Measure the elapsed time for each run of the second script
elapsed_times_nick = [timeit.timeit(run_time_script, number=1) for _ in range(num_runs)]

# Print the results for the second script
for i, time_nick in enumerate(elapsed_times_nick):
    print(f"Run {i+1}: Elapsed time - {time_nick:.6f} seconds")