In [1]:
import psycopg2 as ps
from sshtunnel import SSHTunnelForwarder
import configparser as cp
import os

import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
%matplotlib inline

  """)


## config stuff

In [2]:
# pandas configs
# pd.set_option("display.max_rows", 250)
# pd.set_option("display.width", 80)
pd.options.display.max_rows = None
pd.options.display.max_columns = None

In [3]:
userpath = os.path.expanduser("~/")
Config = cp.ConfigParser(interpolation=None)
Config.read("{}{}".format(userpath, ".redshift_creds.ini"))

['/Users/zlee/.redshift_creds.ini']

In [4]:
def ConfigSectionMap(section):
    dict1 = {}
    options = Config.options(section)
    for option in options:
        try:
            dict1[option] = Config.get(section, option)
            if dict1[option] == -1:
                DebugPrint("skip: %s" % option)
        except:
            print("exception on %s!" % option)
            dict1[option] = None
    return dict1

In [5]:
try:
    SSH_USERNAME = ConfigSectionMap("REDSHIFT").get('ssh_username')
    DB_USER = ConfigSectionMap("REDSHIFT").get('db_username')
    DB_PASSWORD = ConfigSectionMap("REDSHIFT").get('db_password')
    DB_PORT = ConfigSectionMap("REDSHIFT").get('db_port')
except Exception as e: 
    print(e)
SSH_PRIVATE_KEY = "~/.ssh/id_rsa"
DB_HOST = '127.0.0.1'


## Connect to redshift

In [6]:
with SSHTunnelForwarder(
('ec2-107-23-57-72.compute-1.amazonaws.com', 22),
    ssh_username=SSH_USERNAME,
    ssh_private_key=SSH_PRIVATE_KEY,
    remote_bind_address=('localhost', 5439)) as server:

    server.start()
    print("server connected via ssh")
    
    params = {
        'database':'claims',
        'user': DB_USER,
        'password': DB_PASSWORD,
        'host': DB_HOST,
        'port': DB_PORT
    }
    conn = ps.connect(**params)
    curs = conn.cursor()
    print("db connected")


server connected via ssh
db connected


## Get most recent table tables for med, rx 

In [7]:
def get_most_recent_table(tablename):
    get_table_name_sql = """SELECT tablename
    FROM svv_external_tables
    WHERE tablename LIKE '%{}%'
    ORDER BY tablename DESC
    LIMIT 1;
    """.format(tablename)
    curs.execute(get_table_name_sql)

    most_recent_table_name = curs.fetchall()[0][0]
    most_recent_table_name = "{}{}".format("claims_spectrum.",most_recent_table_name)
    return most_recent_table_name

In [8]:
main_med_table_name = get_most_recent_table('main_med_bronze')
target_med_table_name = get_most_recent_table('target_med_bronze')
main_rx_table_name = get_most_recent_table('main_rx_bronze')
target_rx_table_name = get_most_recent_table('target_rx_bronze')

## Helper functions

In [9]:
def query_results(query):
    curs.execute(query)
    res = curs.fetchall()
    return res

In [10]:
def query_all_tables(query):
    m_med_query = query.format(main_med_table_name)
    m_med_results = query_results(m_med_query)
    
    t_med_query = query.format(target_med_table_name)
    t_med_results = query_results(t_med_query)
    
    m_rx_query = query.format(main_rx_table_name)
    m_rx_results = query_results(m_rx_query)
    
    t_rx_query = query.format(target_rx_table_name)
    t_rx_results = query_results(t_rx_query)
    
    return {'main_med': m_med_results,'target_med': t_med_results, 'main_rx': m_rx_results, 'target_rx': t_rx_results}

In [11]:
def results_to_df(results, column_names):
    results_df_dict = dict()
    for k,v in results.items():
        results_df_dict[k] = pd.DataFrame(v, columns=column_names)
    merged_results = dict()
    merged_results['med_results'] = merge_df([results_df_dict['main_med'], results_df_dict['target_med']])
    merged_results['rx_results'] = merge_df([results_df_dict['main_rx'], results_df_dict['target_rx']])
    return merged_results
        

In [12]:
def merge_df(dataframes):
    return pd.concat(dataframes).sort_values('customer').reset_index(drop=True)

In [13]:
def print_results(results):   
    print('-------------------------MED--------------------------')
    display(results['med_results'])
    print('\n-------------------------RX--------------------------')
    display(results['rx_results'])

In [14]:
# THROWS ERROR BECUASE OF MEDIAN see https://docs.aws.amazon.com/redshift/latest/dg/r_MEDIAN.html
# copay_sql = """SELECT gr_customer, gr_vendor,
# avg(copay), max(copay), min(copay), stddev_pop(copay), median(copay),
# avg(coinsurance), max(coinsurance), min(coinsurance), stddev_pop(coinsurance), median(coinsurance),
# avg(deductible), max(deductible), min(deductible), stddev_pop(deductible), median(deductible),
# avg(paid_by_insurance), max(paid_by_insurance), min(paid_by_insurance), stddev_pop(paid_by_insurance), median(paid_by_insurance),
# avg(paid_by_patient), max(paid_by_patient), min(paid_by_patient), stddev_pop(paid_by_patient), median(paid_by_patient)

# FROM {}
# GROUP BY gr_customer, gr_vendor
# ORDER BY gr_customer"""
# copay = query_all_tables(copay_sql)
# copay_df = results_to_df(copay, ['customer', 'vendor',
#                                  'copay_average', 'copay_max', 'copay_min', 'copay_stddev_pop', 'copay_median',
#                                  'coinsurance_average', 'coinsurance_max', 'coinsurance_min', 'coinsurance_stddev_pop', 'coinsurance_median',
#                                  'deductible_average', 'deductible_max', 'deductible_min', 'deductible_stddev_pop', 'deductible_median',
#                                  'paid_by_insurance_average', 'paid_by_insurance_max', 'paid_by_insurance_min', 'paid_by_insurance_stddev_pop', 'paid_by_insurance_median',
#                                  'paid_by_patient_average', 'paid_by_patient_max', 'paid_by_patient_min', 'paid_by_patient_stddev_pop', 'paid_by_patient_median',
#                                  ])

## Stats

In [15]:
stats_sql = """SELECT gr_customer, gr_vendor,
avg(copay), max(copay), min(copay), stddev_pop(copay),
avg(coinsurance), max(coinsurance), min(coinsurance), stddev_pop(coinsurance),
avg(deductible), max(deductible), min(deductible), stddev_pop(deductible),
avg(paid_by_insurance), max(paid_by_insurance), min(paid_by_insurance), stddev_pop(paid_by_insurance),
avg(paid_by_patient), max(paid_by_patient), min(paid_by_patient), stddev_pop(paid_by_patient)

FROM {}
GROUP BY gr_customer, gr_vendor
ORDER BY gr_customer"""
stats = query_all_tables(stats_sql)
stats_df = results_to_df(stats, ['customer', 'vendor',
                                 'copay_average', 'copay_max', 'copay_min', 'copay_stddev_pop',
                                 'coinsurance_average', 'coinsurance_max', 'coinsurance_min', 'coinsurance_stddev_pop',
                                 'deductible_average', 'deductible_max', 'deductible_min', 'deductible_stddev_pop',
                                 'paid_by_insurance_average', 'paid_by_insurance_max', 'paid_by_insurance_min', 'paid_by_insurance_stddev_pop',
                                 'paid_by_patient_average', 'paid_by_patient_max', 'paid_by_patient_min', 'paid_by_patient_stddev_pop'
                                 ])

In [16]:
print_results(stats_df)

-------------------------MED--------------------------


Unnamed: 0,customer,vendor,copay_average,copay_max,copay_min,copay_stddev_pop,coinsurance_average,coinsurance_max,coinsurance_min,coinsurance_stddev_pop,deductible_average,deductible_max,deductible_min,deductible_stddev_pop,paid_by_insurance_average,paid_by_insurance_max,paid_by_insurance_min,paid_by_insurance_stddev_pop,paid_by_patient_average,paid_by_patient_max,paid_by_patient_min,paid_by_patient_stddev_pop
0,ACSHIC,Highmark,1.8,100.0,-100.0,8.706599,0.37,7739.81,-7000.0,17.271715,0.48,2000.0,-1719.0,11.32785,112.48,953471.04,-282964.59,1410.854436,2.66,7739.81,-7200.0,24.23136
1,AT&T,HCSC,0.2,235.0,-235.0,2.94731,7.76,30900.0,-30900.0,63.763654,14.84,24000.0,-24000.0,88.077176,120.32,1832818.49,-1755818.98,1706.70654,22.81,33900.0,-33900.0,113.563184
2,AT&T,UHC,0.17,900.0,-900.0,2.602663,8.6,58129.8,-58129.8,109.891278,14.9,24000.0,-20000.0,119.02046,118.69,99999.99,-99999.99,2006.24751,23.68,58129.8,-58129.8,175.649379
3,Activision Blizzard,Collective Health,6.05,340.0,0.0,17.340681,2.99,3000.0,0.0,32.678698,7.2,3650.0,0.0,43.627749,91.83,459874.36,0.0,1142.453748,16.24,5576.02,0.0,58.97472
4,Aggreko,Cigna,0.0,93.89,-100.0,0.501532,6.99,990.28,-967.25,39.045347,18.8,999.75,-999.75,70.942794,53.86,999.95,-100350.0,215.229305,25.74,1684.33,-1528.45,80.685382
5,Alliance Data,Cigna,1.84,350.0,-350.0,11.777727,9.25,999.12,-999.12,55.465144,16.84,999.7,-998.26,67.910931,52.17,999.98,-161480.25,266.300427,27.89,1837.66,-1775.6,88.722131
6,Alliance Data,Meritain,1.83,558.6,-350.0,11.879878,11.22,11653.85,-3500.0,81.569104,23.47,3500.0,-3500.0,103.351608,131.68,348818.4,-271670.91,1316.612117,36.52,12415.7,-6319.76,138.091063
7,Anixter,UHC,0.0,0.0,0.0,0.0,11.91,24461.1,-24461.1,110.73852,14.96,8000.0,-4000.0,90.877201,132.27,99999.99,-99999.99,2040.181393,26.87,24461.1,-24461.1,151.90624
8,Ball,Cigna,0.09,620.0,-125.0,2.211583,9.48,998.92,-995.31,53.834011,20.29,999.93,-999.2,73.641488,50.36,999.96,-108700.19,161.803322,29.79,1969.01,-1969.01,91.139687
9,Barnes Group,Aetna,1.43,1151.6,-1151.6,11.993218,6.56,3674.5,-2386.74,60.031433,14.74,4494.44,-4000.0,95.091014,98.2,99509.45,-80000.0,1074.528825,22.75,6224.25,-5713.82,122.391695


Unnamed: 0,customer,vendor,copay_average,copay_max,copay_min,copay_stddev_pop,coinsurance_average,coinsurance_max,coinsurance_min,coinsurance_stddev_pop,deductible_average,deductible_max,deductible_min,deductible_stddev_pop,paid_by_insurance_average,paid_by_insurance_max,paid_by_insurance_min,paid_by_insurance_stddev_pop,paid_by_patient_average,paid_by_patient_max,paid_by_patient_min,paid_by_patient_stddev_pop
0,ACSHIC,Highmark,7.81,970.68,-315.0,15.485199,1.85,186.53,-60.0,3.860903,0.0,0.0,0.0,0.0,93.69,69653.67,-44591.55,631.339555,9.66,970.68,-315.0,15.024401
1,AT&T,HCSC,10.0,10.0,10.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,11.3,11.3,11.3,0.0,10.0,10.0,10.0,0.0
2,AT&T,CVS-Caremark,6.87,4992.76,-10865.73,23.080689,2.43,6763.39,-6763.39,26.255449,11.16,6000.0,-6000.0,90.656718,109.68,670195.0,-249669.33,1327.579315,20.46,6763.39,-16643.67,99.213199
3,Activision Blizzard,Collective Health,8.76,180.0,0.0,16.127515,0.05,129.02,0.0,1.912671,2.0,2990.89,0.0,28.54129,100.52,37151.12,0.0,653.716284,10.81,3035.89,0.0,32.864958
4,Aggreko,CVS-Caremark,6.07,250.0,-250.0,15.530699,0.06,202.75,-129.59,3.163193,16.43,3500.0,-3500.0,114.853794,89.25,34164.53,-29727.15,717.162997,22.57,3750.0,-3750.0,116.852993
5,Alliance Data,ESI,,,,,7.74,4000.0,-1584.96,52.988969,15.52,3500.0,-2966.6,94.908214,95.67,137377.39,-14387.68,948.420177,23.26,6850.0,-3855.02,119.83931
6,Bemis,Highmark,4.81,500.0,-500.0,38.396651,0.1,2600.0,-2600.0,27.693076,0.46,2600.0,-2600.0,41.263261,65.6,34701.09,-34701.09,1000.836934,5.38,5200.0,-5200.0,72.69025
7,Bemis,CVS-Caremark,5.66,312.5,-312.5,22.174007,0.06,434.22,-111.27,3.365055,1.26,2700.0,-1131.39,35.601165,123.77,79491.46,-32906.03,772.00492,6.99,2950.0,-1131.39,43.235207
8,Children's Health,CVS-Caremark,8.56,300.0,-200.0,25.626179,4.57,6318.0,-4638.24,63.923756,3.9,3000.0,-3000.0,65.026741,134.03,109808.89,-98416.3,1258.333114,17.04,6763.9,-5459.3,106.421235
9,Comcast,CVS-Caremark,7.79,120.0,-100.0,13.620973,6.85,35424.48,-35424.48,98.825089,0.0,0.0,0.0,0.0,99.91,143149.55,-100369.29,920.022219,14.65,35424.48,-35424.48,99.222245



-------------------------RX--------------------------
