In [41]:
from pandas_genomics.scalars import Variant
from pandas_genomics import sim, io, scalars, GenotypeDtype
import pandas as pd
import numpy as np
import clarite
from clarite import analyze
import time

In [6]:
start = time.time()

# Define the variant for two SNPs

variant1 = Variant("1", 1, id="rs1", ref="A", alt=["C"])
variant2 = Variant("1", 2, id="rs2", ref="G", alt=["T"])

# Define Case-Control ratio
num_samples = 1000
case_control_ratio = "1:3"
n_cases = int(num_samples / 4)
n_controls = num_samples - n_cases
PEN_BASE = 0.05
PEN_DIFF = 0.25
MAFA = 0.05
MAFB = 0.05
SNR = 0.01
# Result files
EDGE_alpha_Final = pd.DataFrame()
Recessive_Results_Final = pd.DataFrame()
Sub_Additive_Results_Final = pd.DataFrame()
Additive_Results_Final = pd.DataFrame()
Super_Additive_Results_Final = pd.DataFrame()
Dominant_Results_Final = pd.DataFrame()
Het_Results_Final = pd.DataFrame()
EDGE_Results_Final = pd.DataFrame()
NULL_Results_Final = pd.DataFrame()
All_Results_Final = pd.DataFrame()

In [12]:
train_seed = 1
test_seed = 2001

startcycle = time.time()
# Recessive Main Effect for SNP1 without interaction
# Training data
eff1 = sim.SNPEffectEncodings.RECESSIVE
eff2 = sim.SNPEffectEncodings.ADDITIVE

train_rec_main_effect00 = sim.BAMS.from_model(
        eff1=sim.SNPEffectEncodings.RECESSIVE,
        eff2=sim.SNPEffectEncodings.ADDITIVE,
        penetrance_base=PEN_BASE,
        penetrance_diff=PEN_DIFF,
        main1=1,
        main2=0,
        interaction=0,
        snp1=variant1,
        snp2=variant2,
        random_seed=train_seed,
    )

train_rec_main_effect00   

<pandas_genomics.sim.biallelic_model_simulator.BAMS at 0x7ff389693460>

In [24]:
train_rec_me_pb000 = train_rec_main_effect00.generate_case_control(
    n_cases=n_cases, n_controls=n_controls, maf1=MAFA, maf2=MAFB, snr=SNR)

train_rec_me_pb000

Unnamed: 0,Outcome,SNP1,SNP2
0,Control,A/A,G/G
1,Control,A/C,G/G
2,Control,A/A,G/G
3,Control,A/A,G/G
4,Case,A/A,G/G
...,...,...,...
995,Control,A/A,G/G
996,Case,A/A,G/G
997,Control,A/C,G/G
998,Case,A/A,G/G


In [37]:
edge_weights_rec_me_pb000 = (train_rec_me_pb000.genomics.calculate_edge_encoding_values(
            data=train_rec_me_pb000["Outcome"], outcome_variable="Outcome"
        )
)
edge_weights_rec_me = edge_weights_rec_me_pb000.copy()
edge_weights_rec_me.insert(loc=0, column="BioAct", value="Recessive")
edge_weights_rec_me.insert(loc=0, column="TrainSeed", value=train_seed)
edge_weights_rec_me.insert(loc=0, column="TestSeed", value=test_seed)
edge_weights_rec_me

Binary Outcome (family = Binomial): 'Outcome'
	250 occurrences of 'Case' coded as 0
	750 occurrences of 'Control' coded as 1


Unnamed: 0,TestSeed,TrainSeed,BioAct,Variant ID,Alpha Value,Ref Allele,Alt Allele,Minor Allele Frequency
0,2001,1,Recessive,rs1,-1.844349,A,C,0.0535
1,2001,1,Recessive,rs2,0.050942,G,T,0.054


In [26]:
# Test data
test_rec_main_effect00 = sim.BAMS.from_model(
        eff1=sim.SNPEffectEncodings.RECESSIVE,
        eff2=sim.SNPEffectEncodings.ADDITIVE,
        penetrance_base=PEN_BASE,
        penetrance_diff=PEN_DIFF,
        main1=1,
        main2=0,
        interaction=0,
        snp1=variant1,
        snp2=variant2,
        random_seed=test_seed,
    )

In [38]:
test_rec_me_pb000 = test_rec_main_effect00.generate_case_control(
        n_cases=n_cases, n_controls=n_controls, maf1=MAFA, maf2=MAFB, snr=SNR
    )
#test_rec_me_pb000["Outcome"].cat.reorder_categories(["Control", "Case"], inplace=True)
test_rec_me_pb000


Unnamed: 0,Outcome,SNP1,SNP2
0,Control,A/C,G/G
1,Control,A/A,G/G
2,Control,A/A,G/G
3,Control,A/A,G/G
4,Control,A/A,G/G
...,...,...,...
995,Control,A/A,G/G
996,Case,A/A,G/G
997,Control,A/A,G/G
998,Control,A/A,G/G


In [36]:
# Run Regression by using weightes from CLARITE
# Addtive Encoding
test_rec_me_pb000_ADD = test_rec_me_pb000.genomics.encode_additive()
test_rec_me_pb000_ADD


Unnamed: 0,Outcome,SNP1,SNP2
0,Control,0.0,0.0
1,Control,0.0,0.0
2,Control,0.0,0.0
3,Control,0.0,0.0
4,Control,0.0,0.0
...,...,...,...
995,Control,0.0,0.0
996,Case,0.0,0.0
997,Control,0.0,0.0
998,Control,0.0,0.0


In [42]:
add_results_rec_me_pb000 = analyze.association_study(data=test_rec_me_pb000_ADD, outcomes="Outcome")
add_results_rec_me_pb000["odds ratio"] = np.exp(add_results_rec_me_pb000["Beta"])
add_results_rec_me_pb000.insert(loc=0, column="Encoding", value="Additive")
add_results_rec_me_pb000.insert(loc=0, column="BioAct", value="Recessive")
add_results_rec_me_pb000.insert(loc=0, column="TrainSeed", value=train_seed)
add_results_rec_me_pb000.insert(loc=0, column="TestSeed", value=test_seed)

AttributeError: module 'clarite.modules.analyze' has no attribute 'association_study'