In [1]:
import numpy as np
import h5py
import glob
import datetime
import matplotlib.pyplot as plt
import pandas as pd
import logging
import argparse

import sys
import os
import sib_ldsc_z as ld

def print_call(call):
    
    '''
    Gives the call given to python
    in a nice string
    '''
    
    message = ''
    for i in range(len(call)):
        if call[i][0] != "-":
            message += call[i]
            message += ' \\ \n'
        else:
            message += call[i] + " "
    
    return message[1:-1]

In [3]:
files = glob.glob("/disk/genetics/ukb/alextisyoung/haplotypes/relatives/traits/12//chr_*.hdf5")

file = files[0]
print("Reading in file: ", file)
hf = h5py.File(file, 'r')
metadata = hf.get("bim")[()]
chromosome = metadata[:, 0]
snp = metadata[:, 1]
bp = metadata[:, 3]
theta  = hf.get('estimate')[()]
se  = hf.get('estimate_ses')[()]
N = hf.get('N_L')[()]
S = hf.get('estimate_covariance')[()]
f = hf.get('freqs')[()]

# normalizing S
sigma2 = hf.get('sigma2')[()]
tau = hf.get('tau')[()]
phvar = sigma2+sigma2/tau

if len(files) > 1:
    for file in files[1:]:
        print("Reading in file: ", file)
        hf = h5py.File(file, 'r')
        metadata = hf.get("bim")[()]
        chromosome_file = metadata[:, 0]  
        snp_file = metadata[:, 1]
        bp_file = metadata[:, 3]
        theta_file  = hf.get('estimate')[()]
        se_file  = hf.get('estimate_ses')[()]
        S_file = hf.get('estimate_covariance')[()]
        f_file = hf.get('freqs')[()]
        N_file = hf.get('N_L')[()]

        # normalizing S
        sigma2 = hf.get('sigma2')[()]
        tau = hf.get('tau')[()]

        chromosome = np.append(chromosome, chromosome_file, axis = 0)
        snp = np.append(snp, snp_file, axis = 0)
        bp = np.append(bp, bp_file, axis = 0)
        theta = np.append(theta, theta_file, axis = 0)
        se = np.append(se, se_file, axis = 0)
        S = np.append(S, S_file, axis = 0)
        f = np.append(f, f_file, axis = 0)
        N = np.append(N, N_file, axis = 0)

# Constructing dataframe of data
zdata = pd.DataFrame({'CHR' : chromosome,
                    'SNP' : snp,
                    'BP' : bp,
                    'N' : N,
                    "f" : f,
                    'theta' : theta.tolist(),
                    'se' : se.tolist(),
                    "S" : S.tolist()})


zdata['CHR'] = zdata['CHR'].astype(int)
zdata['SNP'] = zdata['SNP'].astype(str).str.replace("b'", "").str[:-1]
zdata['BP'] = zdata['BP'].astype(str).str.replace("b'", "").str[:-1]
zdata['BP'] = zdata['BP'].astype('int')

Reading in file:  /disk/genetics/ukb/alextisyoung/haplotypes/relatives/traits/12/chr_14.hdf5
Reading in file:  /disk/genetics/ukb/alextisyoung/haplotypes/relatives/traits/12/chr_21.hdf5
Reading in file:  /disk/genetics/ukb/alextisyoung/haplotypes/relatives/traits/12/chr_10.hdf5
Reading in file:  /disk/genetics/ukb/alextisyoung/haplotypes/relatives/traits/12/chr_6.hdf5
Reading in file:  /disk/genetics/ukb/alextisyoung/haplotypes/relatives/traits/12/chr_18.hdf5
Reading in file:  /disk/genetics/ukb/alextisyoung/haplotypes/relatives/traits/12/chr_2.hdf5
Reading in file:  /disk/genetics/ukb/alextisyoung/haplotypes/relatives/traits/12/chr_7.hdf5
Reading in file:  /disk/genetics/ukb/alextisyoung/haplotypes/relatives/traits/12/chr_19.hdf5
Reading in file:  /disk/genetics/ukb/alextisyoung/haplotypes/relatives/traits/12/chr_3.hdf5
Reading in file:  /disk/genetics/ukb/alextisyoung/haplotypes/relatives/traits/12/chr_15.hdf5
Reading in file:  /disk/genetics/ukb/alextisyoung/haplotypes/relatives/tra

In [9]:
zdata[zdata['f'] >= 0.05].shape

(582648, 8)

In [10]:
zdata.shape

(597358, 8)