align with pandas 1.5

HallLab · Jul 3, 2023 · 817ccad · 817ccad
1 parent 584fbb5
commit 817ccad
Show file tree

Hide file tree

Showing 5 changed files with 41,549 additions and 6 deletions.
diff --git a/clarite/modules/analyze/regression/glm_regression.py b/clarite/modules/analyze/regression/glm_regression.py
@@ -18,7 +18,7 @@
 from .base import Regression
 
 # GITHUB ISSUE #119: Regressions with Error after Multiprocessing release python > 3.8
-multiprocessing.get_start_method("fork")
+# multiprocessing.get_start_method("fork")
 
 
 class GLMRegression(Regression):
@@ -129,7 +129,9 @@ def __init__(
             # Use the order according to the categorical
             counts = self.data[self.outcome_variable].value_counts().to_dict()
 
-            categories = self.data[self.outcome_variable].cat.categories
+            # Add sorted
+            categories = sorted(self.data[self.outcome_variable].cat.categories)
+
             # GITHUB ISSUES #115: Keep control as 0 and case as 1
             if categories[0] == "Case" and categories[1] == "Control":
                 categories = sorted(categories, reverse=True)
@@ -138,6 +140,14 @@ def __init__(
 
             codes, categories = zip(*enumerate(categories))
             self.data[self.outcome_variable].replace(categories, codes, inplace=True)
+
+            # After upgrade to Pandas >= 1.5 the replace stop to covert as float
+            # if stay as category, when create y will create to columns and will invert the
+            # beta signal.
+            self.data[self.outcome_variable] = self.data[self.outcome_variable].astype(
+                float
+            )
+
             self.description += (
                 f"Binary Outcome (family = Binomial): '{self.outcome_variable}'\n"
                 f"\t{counts[categories[0]]:,} occurrences of '{categories[0]}' coded as 0\n"

diff --git a/clarite/modules/analyze/regression/r_survey_regression.py b/clarite/modules/analyze/regression/r_survey_regression.py
@@ -3,10 +3,10 @@
 
 import pandas as pd
 
-from clarite.internal.utilities import requires, _get_dtypes
+from clarite.internal.utilities import _get_dtypes, requires
 
-from .base import Regression
 from ...survey import SurveyDesignSpec
+from .base import Regression
 
 
 class RSurveyRegression(Regression):
@@ -50,6 +50,8 @@ def __init__(
         min_n: int = 200,
         report_categorical_betas: bool = False,
         standardize_data: bool = False,
+        encoding=None,  # TODO: Error on call
+        edge_encoding_info=None,  # TODO: Error on call
     ):
         # base class init
         # This takes in minimal regression params (data, outcome_variable, covariates) and
@@ -175,7 +177,8 @@ def run(self):
         # Source R script to define the function
         import rpy2.robjects as ro
         from rpy2.robjects import pandas2ri
-        from .r_code.r_utilities import ewasresult2py, df_pandas2r
+
+        from .r_code.r_utilities import df_pandas2r, ewasresult2py
 
         r_code_folder = Path(__file__).parent / "r_code"
         filename = str(r_code_folder / "ewas_r.R")

diff --git a/pyproject.toml b/pyproject.toml
@@ -1,6 +1,6 @@
 [tool.poetry]
 name = "clarite"
-version = "2.3.3"
+version = "2.3.4"
 description = "CLeaning to Analysis: Reproducibility-based Interface for Traits and Exposures"
 authors = ["Andre Rico <alr6366@psu.edu>"]
 license = "BSD-3-Clause"

diff --git a/tests/analyze/test_interaction_study.py b/tests/analyze/test_interaction_study.py
@@ -2,6 +2,7 @@
 
 import numpy as np
 import pandas as pd
+
 import clarite
 
 TESTS_PATH = Path(__file__).parent.parent
@@ -225,3 +226,57 @@ def test_interactions_nhanes_pairwise(data_NHANES):
     )
     assert (grouped_bonf == python_result_nobeta["LRT_pvalue_bonferroni"]).all()
     assert (grouped_fdr == python_result_nobeta["LRT_pvalue_fdr"]).all()
+
+
+def test_interaction_exe():
+    nested_table = clarite.load.from_csv(
+        "/Users/andrerico/HALL/Python_3_10/clarite-python/tests/test_data_files/nested_table.csv"
+    )
+    # Return same result if not change data type
+    # list_bin = (
+    #     "female",
+    #     "black",
+    #     "mexican",
+    #     "other_hispanic",
+    #     "other_eth",
+    # )
+    # list_cat = (
+    #     "SDDSRVYR",
+    #     "SES_LEVEL",
+    # )
+    # list_cont = (
+    #     "BMXBMI",
+    #     "RIDAGEYR",
+    #     "LBXCOT",
+    #     "IRON_mg",
+    #     "DR1TSFAT",
+    #     "DRDSDT1",
+    # )
+
+    # nested_table = clarite.modify.make_binary(data=nested_table, only=(list_bin))
+    # nested_table = clarite.modify.make_categorical(data=nested_table, only=(list_cat))
+    # nested_table = clarite.modify.make_continuous(data=nested_table, only=(list_cont))
+
+    e1 = "DR1TSFAT"
+    e2 = "DRDSDT1"
+    list_covariant = [
+        "female",
+        "black",
+        "mexican",
+        "other_hispanic",
+        "other_eth",
+        "SDDSRVYR",
+        "BMXBMI",
+        "SES_LEVEL",
+        "RIDAGEYR",
+        "LBXCOT",
+        "IRON_mg",
+    ]
+    retorno = clarite.analyze.interaction_study(
+        data=nested_table,
+        outcomes="LBXHGB",
+        interactions=[(e1, e2)],
+        covariates=list_covariant,
+    )
+
+    assert 2 == 2