In [12]:
import snowflake.connector
import snowflake.connector as sc
import os
from cryptography.hazmat.primitives import serialization

# Load environment variables
from dotenv import load_dotenv
load_dotenv()

True

#  snowflake_helper

In [23]:
# --- snowflake_helper.py (updated) ---
import os
import threading
from queue import Queue
from contextlib import contextmanager
import snowflake.connector
import pandas as pd

class SnowflakeHelper:
    def __init__(self, database, schema, minconn=1, maxconn=5):
        self.database = database
        self.schema = schema
        self.minconn = minconn
        self.maxconn = maxconn
        self.pool = Queue(maxconn)
        self.lock = threading.Lock()
        self._initialize_pool()

    def _initialize_pool(self):
        """Initialize the connection pool with minconn connections."""
        for _ in range(self.minconn):
            self.pool.put(self._create_connection())

    def _create_connection(self):
        """Create a new Snowflake connection using env variables (key-pair or pwd; both supported)."""
        return snowflake.connector.connect(
            user=os.getenv("SNOWFLAKE_USER"),
            account=os.getenv("SNOWFLAKE_ACCOUNT"),
            warehouse=os.getenv("SNOWFLAKE_WAREHOUSE"),
            role=os.getenv("SNOWFLAKE_ROLE"),
            database=self.database,
            schema=self.schema,
            # If using key-pair/JWT:
            authenticator=os.getenv("SNOWFLAKE_AUTHENTICATOR"),
            private_key_file=os.getenv("SNOWFLAKE_PRIVATE_KEY"),
            # If using password auth, expose PWD via env and pass it instead of the two lines above.
            # password=os.getenv("SNOWFLAKE_PASSWORD"),
        )

    @contextmanager
    def _get_connection(self):
        """Fetch a connection from the pool and return it after use."""
        conn = None
        try:
            conn = self.pool.get(block=True, timeout=10)
            yield conn
        finally:
            # If connection object exists, return it to the pool.
            # (If it's broken, the next execute will error and trigger retry replacement.)
            if conn is not None:
                try:
                    self.pool.put(conn, block=False)
                except Exception:
                    try:
                        conn.close()
                    except Exception:
                        pass

    def _replace_connection(self):
        """Replace a potentially bad connection with a fresh one (best-effort)."""
        try:
            self.pool.put(self._create_connection(), block=False)
        except Exception:
            pass

    def execute_query(self, query: str, params=None, as_df: bool = False, max_retries: int = 1):
        """
        Execute a SQL query using the pool and return (result, error_message).

        Returns:
            tuple[result, str|None]
              - If as_df=True: (pd.DataFrame, error)
              - Else: (list[tuple], error)
        """
        # Do NOT strip semicolons; Snowflake accepts them.
        params = params or {}
        last_err = None

        for attempt in range(max_retries + 1):
            cursor = None
            try:
                with self._get_connection() as conn:
                    cursor = conn.cursor()
                    cursor.execute(query, params)

                    if as_df:
                        df = cursor.fetch_pandas_all()
                        return df, None
                    else:
                        rows = cursor.fetchall()
                        return rows, None

            except Exception as e:
                last_err = str(e)
                # Best-effort: try to replace a potentially broken connection for next attempt
                if attempt < max_retries:
                    self._replace_connection()
                else:
                    # Final failure → return typed empty result + error
                    if as_df:
                        return pd.DataFrame(), last_err
                    else:
                        return [], last_err
            finally:
                if cursor is not None:
                    try:
                        cursor.close()
                    except Exception:
                        pass


In [24]:
# Example - database/schema passed at initialization
sf = SnowflakeHelper(database="ahp", schema="qa", minconn=2, maxconn=10)

INFO:snowflake.connector.connection:Snowflake Connector for Python Version: 3.17.4, Python Version: 3.10.10, Platform: Windows-10-10.0.19045-SP0
INFO:snowflake.connector.connection:Connecting to GLOBAL Snowflake domain
INFO:snowflake.connector.connection:Snowflake Connector for Python Version: 3.17.4, Python Version: 3.10.10, Platform: Windows-10-10.0.19045-SP0
INFO:snowflake.connector.connection:Connecting to GLOBAL Snowflake domain


In [None]:
# cqo_chatbot_case_level
# cqo_chatbot_product_level
# cqo_chatbot_robotics

In [16]:
tbl_name = "cqo_chatbot_case_level"

In [27]:
df, error = sf.execute_query("SELECT * from cqo_chatbot_case_level limit 10", as_df=True)

In [30]:
error

In [26]:
df 

(                                         EMR_P_EVENT EMR_HOSPITAL_ROLLUP  \
 0         MARYVIEW MEDICAL CENTER-920242285156-28750       HAMPTON ROADS   
 1  MERCY HEALTH - ST. JOSEPH WARREN HOSPITAL-7212...          YOUNGSTOWN   
 2  MERCY HEALTH - ST. ELIZABETH BOARDMAN HOSPITAL...          YOUNGSTOWN   
 3  MERCY HEALTH - FAIRFIELD HOSPITAL-616233478346...          CINCINNATI   
 4              LOURDES HOSPITAL-101252021123-0FT44ZZ            KENTUCKY   
 5  MERCY HEALTH - ST. ELIZABETH BOARDMAN HOSPITAL...          YOUNGSTOWN   
 6  MERCY HEALTH - ST. ELIZABETH BOARDMAN HOSPITAL...          YOUNGSTOWN   
 7      ST. FRANCIS MEDICAL CENTER-933240211300-44970            RICHMOND   
 8             ST FRANCIS EASTSIDE-901241131133-27447          GREENVILLE   
 9  MERCY HEALTH - ST. ANNE HOSPITAL-205230060170-...              TOLEDO   
 
                  EMR_FACILITY_NAME  \
 0          MARYVIEW MEDICAL CENTER   
 1       ST. JOSEPH WARREN HOSPITAL   
 2  ST. ELIZABETH BOARDMAN HOSPITA

In [None]:
df = sf.execute_query(query, as_df=True)
column_list = df['COLUMN_NAME'].unique().tolist()

['EMR_P_EVENT',
 'EMR_HOSPITAL_ROLLUP',
 'EMR_FACILITY_NAME',
 'EMR_MS_DRG',
 'EMR_SURGICAL_HIERARCHY',
 'EMR_PRIMARY_PROCEDURE',
 'EMR_SERVICE_LINE',
 'EMR_PATIENT_TYPE_BUCKET',
 'EMR_PATIENT_CLASS',
 'EMR_LEAD_SURGEON',
 'EMR_PAYOR_GROUP',
 'EMR_OUTCOME_SCORE',
 'EMR_LOS',
 'EMR_GMLOS',
 'EMR_ACCOUNT_NUMBER',
 'EMR_TOTAL_ACQUISITION_COST',
 'EMR_CONTRACTED_SPEND',
 'EMR_OFF_CONTRACTED_SPEND',
 'EMR_LOCAL_CONTRACTED_SPEND',
 'EMR_MONTH_YR',
 'EMR_SSI',
 'EMR_BLOOD_TRANSFUSION_FLAG',
 'EMR_IP_READMISSION_INDEX_CASE',
 'EMR_IP_MORTALITY',
 'EMR_TOTAL_QTY',
 'EMR_MED_SUPPLY_VAR_DIRECT_COST',
 'EMR_IMPLANT_VAR_DIRECT_COST',
 'EMR_DISCHARGE_DATE',
 'EMR_ASA_RATING',
 'EMR_BMI_BUCKET',
 'EMR_PATIENT_AGE_BUCKET',
 'EMR_DIABETIC_STATUS',
 'EMR_SMOKING_STATUS',
 'EMR_PRIMARY_ICD10_PX_CODE',
 'EMR_BILLED_CPT_CODE',
 'OUTLIER_MODE_CLASSIFICATION',
 'EMR_FINAL_OUTLIER_TAGGING',
 'EMR_TOTAL_CHARGES',
 'EMR_ACTUAL_REVENUE',
 'EMR_MEDICARE_NORMALIZED_REVENUE',
 'EMR_ACTUAL_INCOME',
 'EMR_ACTUAL_CONT