# Shotgun Stochastic Search
<br>

> #### Gino Prasad
> #### 03/06/2024
<br>


In [1]:
import bf
import fm
import importlib
import os
from tqdm import tqdm
bf = importlib.reload(bf)
fm = importlib.reload(fm)

In [2]:
import numpy as np

python src/fm.py \
  -z  \
  -r example/eQTL/region.LOC284581.chr1.205831207.205865215.dosage.p1e-12.LD \
  -o pyfm_results \
  -n 471 -c 2 -t 0 -e 0.1 -a 1.6

In [3]:
def parse_args(zfile='../example/eQTL/region.LOC284581.chr1.205831207.205865215.dosage.p1e-12.z', rfile='../example/eQTL/region.LOC284581.chr1.205831207.205865215.dosage.p1e-12.LD', outdir='../pyfm_results',
              max_causal=2, epsilon=0.1, sample_number=471, approx_bf=False, prior_values='1.6', prior_type=0, rho=1):
    n = int(sample_number)
    approx_bf = approx_bf

    # TODO: make sure zfile and rfile exist
    data = fm.Data(zfile, rfile, n, approx_bf)

    # TODO: make sure outdir exists otherwise make folders and subfolders as needed
    outdir = outdir
    prior_type = bool(prior_type)
    pve_for_prior = int(prior_type) > 0 # switch arg for prior_type
    prior_values = np.array([float(x) for x in prior_values.split()])
    e = float(epsilon)

    if not pve_for_prior:
        prior_values = prior_values**2  # sigmaa^2
    else:
        prior_values = prior_values / (1 - prior_values)

    max_causal = int(max_causal)

    configs_method = 'AllConfigurations'

    rho = float(rho)
    return (
        data,
        n,
        pve_for_prior,
        prior_values,
        e,
        max_causal,
        configs_method,
        approx_bf,
        rho,
        outdir,
    )

In [4]:
(
    data,
    n,
    pve_for_prior,
    prior_values,
    e,
    max_causal,
    configs_method,
    approx_bf,
    rho,
    outdir,
) = parse_args()

[[ 1.          0.86813357  0.71818377 ...  0.30319694  0.30546027
  -0.08762142]
 [ 0.86813357  1.          0.74716836 ...  0.29319762  0.29436166
  -0.07504671]
 [ 0.71818377  0.74716836  1.         ...  0.24585774  0.24368099
  -0.14142469]
 ...
 [ 0.30319694  0.29319762  0.24585774 ...  1.          0.99077505
  -0.65526116]
 [ 0.30546027  0.29436166  0.24368099 ...  0.99077505  1.
  -0.64255341]
 [-0.08762142 -0.07504671 -0.14142469 ... -0.65526116 -0.64255341
   1.        ]]
(237, 237)


In [5]:
config_scores, max_BF = bf.calculate_BFs(
    data,
    n,
    pve_for_prior,
    prior_values,
    e,
    max_causal,
    configs_method,
    approx_bf,
    os.path.join(outdir, "BF.tsv"),
)

Expored models with 2 causal variant in 0.07363581657409668 seconds
n_causal 2
best config [153]
best score 150.473966548052


In [6]:
n_causal2log_prior = fm.calculate_priors(data.m, max_causal)

In [7]:
config_scores, total_score = fm.calculate_scores(
    config_scores, n_causal2log_prior, max_BF
)

In [8]:
u = (data.pve.values if data.pve is not None else np.ones(data.m)) * (
    n if approx_bf else (n - 1)
)

In [10]:
config_iter = fm.configurations.SSSConfigurations(
    max_causal=max_causal,
    m=data.m,
    score_config=lambda t: bf.config_BF(
        t, data, n, pve_for_prior, prior_values, e, approx_bf, u
    ),
)

In [11]:
config_iter.search(num_steps=100)

100%|█████████████████████████| 100/100 [00:05<00:00, 17.21it/s]


(array([156, 153]), 167.52641869933765)

In [12]:
config_iter = fm.configurations.AllConfigurations(
    max_causal=max_causal,
    m=data.m,
    score_config=lambda t: bf.config_BF(
        t, data, n, pve_for_prior, prior_values, e, approx_bf, u
    ),
)
config_iter.search()

Expored models with 2 causal variant in 0.07263612747192383 seconds
n_causal 2
best config [153]
best score 150.473966548052


In [16]:
dir(config_iter)

['__class__',
 '__delattr__',
 '__dict__',
 '__dir__',
 '__doc__',
 '__eq__',
 '__format__',
 '__ge__',
 '__getattribute__',
 '__gt__',
 '__hash__',
 '__init__',
 '__init_subclass__',
 '__le__',
 '__lt__',
 '__module__',
 '__ne__',
 '__new__',
 '__reduce__',
 '__reduce_ex__',
 '__repr__',
 '__setattr__',
 '__sizeof__',
 '__str__',
 '__subclasshook__',
 '__weakref__',
 'best_config',
 'best_score',
 'config',
 'current_score',
 'ended',
 'get_scores_by_n_causal',
 'm',
 'max_causal',
 'n_causal',
 'next',
 'score_config',
 'search',
 'visited_config_scores']

In [14]:
config_iter.score_config(np.array([156, 153]))

167.52641869933765