In [1]:
import pandas as pd
import numpy as np

from functions import calculate_OR, calculate_P



In [2]:

# Load dataset
data = pd.read_csv('../data/join_filled.csv')

x = data['ГГ'].copy()
y = data.iloc[:,114:].copy()

data = pd.concat([x, y], axis=1)
data = data.rename(columns={'ГГ': 'case'})
data['case'] = (data['case'] >= data['case'].mean()).astype(int)

In [3]:

# list of columns for which to calculate odds ratio
genotype_cols = [col for col in data.columns if col != 'case']

In [4]:

all_values = {}
# loop over genotype columns
for col in genotype_cols:
    # split into case and control groups
    case_group = data[data['case'] == 1][col]
    control_group = data[data['case'] == 0][col]

    # extract wild type and mutant type from column name

    w, m = col[-3:].split('>')
    wild, heterozygous, mutant = w+w, w+m, m+m

    # calculate odds ratio and confidence interval for dominant model
    dominant = calculate_OR(case_group, control_group, wild, heterozygous, mutant, model='dominant')
    recessive = calculate_OR(case_group, control_group, wild, heterozygous, mutant, model='recessive')
    codominant = calculate_OR(case_group, control_group, wild, heterozygous, mutant, model='codominant')
    allele = calculate_OR(case_group, control_group, wild, heterozygous, mutant, model='allele')

    p_all, p_gen = calculate_P(data['case'], data[col])

    all_values[col] = [
        p_all, p_gen,
        *allele,
        *codominant,
        *dominant,
        *recessive
    ]

In [None]:
res = pd.DataFrame(all_values).T
res.columns = [
    'p_value_allele',
    'p_value_genotype',
    'allele_M', 'allele_M_CI',
    'allele_W', 'allele_W_CI',
    'general_W', 'general_W_CI',
    'general_Ht', 'general_Ht_CI',
    'general_M', 'general_M_CI',
    'dominant_W', 'dominant_W_CI',
    'dominant_Ht_M', 'dominant_Ht_M_CI',
    'recessive_W_Ht', 'recessive_W_Ht_CI',
    'recessive_M', 'recessive_M_CI',
    ]
# res.reset_index(inplace=True)
# res.rename(columns={'index':'rsid'}, inplace=True)

In [None]:
# res.to_csv('./data/stats.csv')