Skip to content

Commit

Permalink
align with pandas 1.5
Browse files Browse the repository at this point in the history
  • Loading branch information
AndreRicoPSU committed Jul 3, 2023
1 parent 584fbb5 commit 817ccad
Show file tree
Hide file tree
Showing 5 changed files with 41,549 additions and 6 deletions.
14 changes: 12 additions & 2 deletions clarite/modules/analyze/regression/glm_regression.py
Expand Up @@ -18,7 +18,7 @@
from .base import Regression

# GITHUB ISSUE #119: Regressions with Error after Multiprocessing release python > 3.8
multiprocessing.get_start_method("fork")
# multiprocessing.get_start_method("fork")


class GLMRegression(Regression):
Expand Down Expand Up @@ -129,7 +129,9 @@ def __init__(
# Use the order according to the categorical
counts = self.data[self.outcome_variable].value_counts().to_dict()

categories = self.data[self.outcome_variable].cat.categories
# Add sorted
categories = sorted(self.data[self.outcome_variable].cat.categories)

# GITHUB ISSUES #115: Keep control as 0 and case as 1
if categories[0] == "Case" and categories[1] == "Control":
categories = sorted(categories, reverse=True)
Expand All @@ -138,6 +140,14 @@ def __init__(

codes, categories = zip(*enumerate(categories))
self.data[self.outcome_variable].replace(categories, codes, inplace=True)

# After upgrade to Pandas >= 1.5 the replace stop to covert as float
# if stay as category, when create y will create to columns and will invert the
# beta signal.
self.data[self.outcome_variable] = self.data[self.outcome_variable].astype(
float
)

self.description += (
f"Binary Outcome (family = Binomial): '{self.outcome_variable}'\n"
f"\t{counts[categories[0]]:,} occurrences of '{categories[0]}' coded as 0\n"
Expand Down
9 changes: 6 additions & 3 deletions clarite/modules/analyze/regression/r_survey_regression.py
Expand Up @@ -3,10 +3,10 @@

import pandas as pd

from clarite.internal.utilities import requires, _get_dtypes
from clarite.internal.utilities import _get_dtypes, requires

from .base import Regression
from ...survey import SurveyDesignSpec
from .base import Regression


class RSurveyRegression(Regression):
Expand Down Expand Up @@ -50,6 +50,8 @@ def __init__(
min_n: int = 200,
report_categorical_betas: bool = False,
standardize_data: bool = False,
encoding=None, # TODO: Error on call
edge_encoding_info=None, # TODO: Error on call
):
# base class init
# This takes in minimal regression params (data, outcome_variable, covariates) and
Expand Down Expand Up @@ -175,7 +177,8 @@ def run(self):
# Source R script to define the function
import rpy2.robjects as ro
from rpy2.robjects import pandas2ri
from .r_code.r_utilities import ewasresult2py, df_pandas2r

from .r_code.r_utilities import df_pandas2r, ewasresult2py

r_code_folder = Path(__file__).parent / "r_code"
filename = str(r_code_folder / "ewas_r.R")
Expand Down
2 changes: 1 addition & 1 deletion pyproject.toml
@@ -1,6 +1,6 @@
[tool.poetry]
name = "clarite"
version = "2.3.3"
version = "2.3.4"
description = "CLeaning to Analysis: Reproducibility-based Interface for Traits and Exposures"
authors = ["Andre Rico <alr6366@psu.edu>"]
license = "BSD-3-Clause"
Expand Down
55 changes: 55 additions & 0 deletions tests/analyze/test_interaction_study.py
Expand Up @@ -2,6 +2,7 @@

import numpy as np
import pandas as pd

import clarite

TESTS_PATH = Path(__file__).parent.parent
Expand Down Expand Up @@ -225,3 +226,57 @@ def test_interactions_nhanes_pairwise(data_NHANES):
)
assert (grouped_bonf == python_result_nobeta["LRT_pvalue_bonferroni"]).all()
assert (grouped_fdr == python_result_nobeta["LRT_pvalue_fdr"]).all()


def test_interaction_exe():
nested_table = clarite.load.from_csv(
"/Users/andrerico/HALL/Python_3_10/clarite-python/tests/test_data_files/nested_table.csv"
)
# Return same result if not change data type
# list_bin = (
# "female",
# "black",
# "mexican",
# "other_hispanic",
# "other_eth",
# )
# list_cat = (
# "SDDSRVYR",
# "SES_LEVEL",
# )
# list_cont = (
# "BMXBMI",
# "RIDAGEYR",
# "LBXCOT",
# "IRON_mg",
# "DR1TSFAT",
# "DRDSDT1",
# )

# nested_table = clarite.modify.make_binary(data=nested_table, only=(list_bin))
# nested_table = clarite.modify.make_categorical(data=nested_table, only=(list_cat))
# nested_table = clarite.modify.make_continuous(data=nested_table, only=(list_cont))

e1 = "DR1TSFAT"
e2 = "DRDSDT1"
list_covariant = [
"female",
"black",
"mexican",
"other_hispanic",
"other_eth",
"SDDSRVYR",
"BMXBMI",
"SES_LEVEL",
"RIDAGEYR",
"LBXCOT",
"IRON_mg",
]
retorno = clarite.analyze.interaction_study(
data=nested_table,
outcomes="LBXHGB",
interactions=[(e1, e2)],
covariates=list_covariant,
)

assert 2 == 2

0 comments on commit 817ccad

Please sign in to comment.