Skip to content

Commit

Permalink
Release 2.3.1
Browse files Browse the repository at this point in the history
  • Loading branch information
AndreRicoPSU committed Feb 8, 2023
1 parent 33cabbb commit 98f78ee
Show file tree
Hide file tree
Showing 8 changed files with 39 additions and 26 deletions.
7 changes: 5 additions & 2 deletions clarite/internal/utilities.py
@@ -1,6 +1,6 @@
from functools import wraps
from importlib.util import find_spec
from typing import Optional, List, Union
from typing import List, Optional, Union

import click
import pandas as pd
Expand Down Expand Up @@ -208,7 +208,10 @@ def _remove_empty_categories(
existing_cats = data[var].cat.categories
if data[var].cat.ordered:
print()
data[var] = data[var].cat.remove_unused_categories()
# GITHUB ISSUE #120: SettingWithCopyWarning on Regression runs
# data[var] = data[var].cat.remove_unused_categories()
data.loc[:, var] = data[var].cat.remove_unused_categories()

removed_categories = set(existing_cats) - set(data[var].cat.categories)
if len(removed_categories) > 0:
removed_cats[var] = removed_categories
Expand Down
5 changes: 4 additions & 1 deletion clarite/modules/analyze/regression/glm_regression.py
Expand Up @@ -17,6 +17,9 @@
from ..utils import fix_names, statsmodels_var_regex
from .base import Regression

# GITHUB ISSUE #119: Regressions with Error after Multiprocessing release python > 3.8
multiprocessing.get_start_method("fork")


class GLMRegression(Regression):
"""
Expand Down Expand Up @@ -127,7 +130,7 @@ def __init__(
counts = self.data[self.outcome_variable].value_counts().to_dict()

categories = self.data[self.outcome_variable].cat.categories
# GITHUB/ISSUES 115: Keep control as 0 and case as 1
# GITHUB ISSUES #115: Keep control as 0 and case as 1
if categories[0] == "Case" and categories[1] == "Control":
categories = sorted(categories, reverse=True)

Expand Down
3 changes: 3 additions & 0 deletions clarite/modules/analyze/regression/interaction_regression.py
Expand Up @@ -15,6 +15,9 @@
from ..utils import fix_names
from . import GLMRegression

# GITHUB ISSUE #119: Regressions with Error after Multiprocessing release python > 3.8
multiprocessing.get_start_method("fork")


class InteractionRegression(GLMRegression):
"""
Expand Down
20 changes: 13 additions & 7 deletions clarite/modules/analyze/regression/weighted_glm_regression.py
@@ -1,20 +1,24 @@
import multiprocessing
import re
from itertools import repeat
from typing import Optional, Dict, List, Tuple
from typing import Dict, List, Optional, Tuple

import click
import numpy as np
import pandas as pd
import patsy
import scipy
import pandas as pd
import statsmodels.api as sm

from .glm_regression import GLMRegression
from clarite.modules.survey import SurveyDesignSpec, SurveyModel
from clarite.internal.calculations import regTermTest
from clarite.internal.utilities import _remove_empty_categories, _get_dtypes
from ..utils import statsmodels_var_regex, fix_names
from clarite.internal.utilities import _get_dtypes, _remove_empty_categories
from clarite.modules.survey import SurveyDesignSpec, SurveyModel

from ..utils import fix_names, statsmodels_var_regex
from .glm_regression import GLMRegression

# GITHUB ISSUE #119: Regressions with Error after Multiprocessing release python > 3.8
multiprocessing.get_start_method("fork")


class WeightedGLMRegression(GLMRegression):
Expand Down Expand Up @@ -385,7 +389,9 @@ def _run_weighted_rv(
~data[[rv, outcome_variable] + covariates].isna().any(axis=1)
)
# If allowed (an error hasn't been raised) negate missing_weight_mask so True=keep to drop those
complete_case_mask = complete_case_mask & ~missing_weight_mask
# GITHUB ISSUE #117: Error type variable on Weight Regression with Clusters
if missing_weight_mask is not None:
complete_case_mask = complete_case_mask & ~missing_weight_mask

# Count restricted rows
restricted_rows = survey_design_spec.subset_array & complete_case_mask
Expand Down
8 changes: 7 additions & 1 deletion clarite/modules/survey/survey_design.py
@@ -1,4 +1,4 @@
from typing import Optional, Union, Dict, Tuple
from typing import Dict, Optional, Tuple, Union

import click
import numpy as np
Expand Down Expand Up @@ -605,6 +605,12 @@ def get_survey_design(self, regression_variable, complete_case_idx):
self.cluster_values.loc[self.subset_array],
)
has_weights, weight_name, weight_values = self.get_weights(regression_variable)
# GITHUB ISSUE #118: Function self.get_weights(regression_variable) return None
if not has_weights:
has_weights, weight_values = (
False,
self.weight_values.loc[self.subset_array],
)

# Filter out any incomplete cases
strata_values = strata_values.loc[complete_case_idx]
Expand Down
2 changes: 1 addition & 1 deletion pyproject.toml
@@ -1,6 +1,6 @@
[tool.poetry]
name = "clarite"
version = "2.3.0"
version = "2.3.1"
description = "CLeaning to Analysis: Reproducibility-based Interface for Traits and Exposures"
authors = ["Andre Rico <alr6366@psu.edu>"]
license = "BSD-3-Clause"
Expand Down
11 changes: 1 addition & 10 deletions tests/analyze/test_association_study.py
Expand Up @@ -996,22 +996,13 @@ def test_edge_encondig_logistic_regression():
interaction=0,
random_seed=2021,
)
test = sim.BAMS.from_model(
eff1=sim.SNPEffectEncodings.ADDITIVE,
eff2=sim.SNPEffectEncodings.ADDITIVE,
penetrance_base=0.45,
main1=1,
main2=0,
interaction=0,
)
train_add = train.generate_case_control(n_cases=5000, n_controls=5000)
test_add = test.generate_case_control(n_cases=5000, n_controls=5000)
edge_weights = train_add.genomics.calculate_edge_encoding_values(
data=train_add["Outcome"], outcome_variable="Outcome"
)

edge_results = clarite.analyze.association_study(
data=test_add,
data=train_add,
outcomes="Outcome",
encoding="edge",
edge_encoding_info=edge_weights,
Expand Down
9 changes: 5 additions & 4 deletions tests/analyze/test_gwas.py
@@ -1,7 +1,6 @@
import pytest

import pandas as pd
import numpy as np
import pandas as pd
import pytest

import clarite
from clarite.modules.survey import SurveyDesignSpec
Expand Down Expand Up @@ -30,7 +29,7 @@ def test_bams_interaction(genotype_case_control_rec_rec_onlyinteraction):
assert result_interaction.loc[("SNP1", "SNP2", "Outcome"), "LRT_pvalue"] <= 1e-5


@pytest.mark.slow
# @pytest.mark.slow
@pytest.mark.parametrize("process_num", [None, 1])
def test_largeish_gwas(large_gwas_data, process_num):
"""10k samples with 1000 SNPs"""
Expand All @@ -52,6 +51,8 @@ def test_largeish_gwas(large_gwas_data, process_num):
weights="weights",
),
)
assert results == results
assert results_weighted == results_weighted
# TODO: Add useful asserts rather than just making sure it runs


Expand Down

0 comments on commit 98f78ee

Please sign in to comment.