# Custom Scaling QA Checks
This notebook performs QA checks on custom scaling of prices, including loading and processing price change data, applying exclusions and override, identifying violations in price overrides and CVS over independent pharmacy prices, visualizing price change distribution, and checking the number of changes within specific thresholds

In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import BQ
import util_funcs as uf
from google.cloud import bigquery
import os
import sys

sys.path.append(os.path.abspath(os.path.join(os.getcwd(), '..')))
from GER_LP_Code.CPMO_lp_functions import *
from GER_LP_Code.CPMO_shared_functions import *

In [None]:
# Define the output folder
output_folder = 'Outputs/'

# Load the price change file for the run
output_prices = pd.read_csv(output_folder + 'Price_Changes_Scaling_2_20250131.csv', dtype={'GPI': str, 'NDC': str})

# List of MAC suffixes to exclude
exclude_list = ['10', '12', '13', 'SX', 'SX1', 'S3', 'S9', 'E23', 'N23', 'E92', '92', '24', 'E77', 'P77', '77', '78', '88', 'A1', 'A2', 'A3', 'A4', 'A5', 'L1', 'L2', 'L3', 'L4', 'L5', 'LD', '80', 'E80', 'M78']

# List of customer IDs to subset
subset_list = ['2016']

# List of MEDD clients
MEDD_client = ['2016']

# List of clients with states parity
state_parity = []

# List of customers with R90 MACs
R90_exists = ['2016']

# Dictionary of MEDD CVS over INDY ratios
medd_cvs_over_indy_ratio = {
   '2016': 2.0
}

# Generate a list of MACs to exclude based on customer IDs and suffixes
exclude_MAC_list = [f'MAC{customer_id}{suffix}' for customer_id in subset_list for suffix in exclude_list]

In [None]:
# Ensure there are no MACLISTs from the excluded list in the output prices file
assert len(output_prices[output_prices['MACLIST'].isin(exclude_MAC_list)]) == 0, 'There are MACLISTs from the excluded list in the output prices file'

In [None]:
# Exclude MACLISTs from the output prices file based on the exclude_MAC_list
output_prices = output_prices[~output_prices['MACLIST'].isin(exclude_MAC_list)]

In [None]:
customer_id_list = subset_list
# The dictionary of desired R30 and R90 scaling factors
# Values must match the ones used in Price_Scaling.py file
desired_r30 = {            

   '2016': 1 - 0.25, 
              }

desired_r90 = {    
   "2016" : 1 + 0.00,

              }
desired_mail = {      
   "2016" : 1 + 0.0,
               }

In [None]:
def customer_id_finder(x,customer_id_list):
    """
    This function takes a string `x` and a list of customer IDs `customer_id_list`.
    It checks if the string `x` starts with any of the customer IDs in the list.
    If a match is found, it returns the matching customer ID.
    If no match is found, it returns None.

    Parameters:
    x (str): The string to check.
    customer_id_list (list): The list of customer IDs to check against.

    Returns:
    str or None: The matching customer ID or None if no match is found.
    """
    for cid in customer_id_list: 
        if x.startswith(cid):
            return cid
    return None

def generate_r30_r90_MAC_combo(customer_id, combined_pairs, OK_vcml = False):
    """
    This function generates a list of MAC combinations for R30 and R90 pairs.
    If OK_vcml is True, it also generates combinations with 'OK' suffix.

    Parameters:
    customer_id (str): The customer ID to be used in the MAC combinations.
    combined_pairs (list): A list of tuples, where each tuple contains two elements representing R30 and R90 pairs.
    OK_vcml (bool): A flag to indicate if 'OK' suffix combinations should be generated.

    Returns:
    list: A list of lists containing MAC combinations.
    """
    r30_r90_combo_macs = [['MAC' + customer_id + pair[0], 'MAC' + customer_id + pair[1]] for pair in combined_pairs]
    if OK_vcml:
        r30_r90_combo_OK = [['MAC' + customer_id + pair[0], 'MAC' + customer_id + 'OK'] for pair in combined_pairs]
        r30_r90_combo_macs += r30_r90_combo_OK
        
    return r30_r90_combo_macs

def input_read(customer_id_list):
    """
    This function reads input data from BigQuery and returns several dataframes.

    Parameters:
    customer_id_list (list): A list of customer IDs to filter the data.

    Returns:
    tuple: A tuple containing the following dataframes:
        - raw_prices: Dataframe containing raw prices data.
        - gpi_exclusions: Dataframe containing GPI exclusions data.
        - mac_price_overrides: Dataframe containing MAC price overrides data.
        - wmt_price_overrides: Dataframe containing Walmart price overrides data.
        - mac1026: Dataframe containing MAC 1026 data.
    """
    bqclient = bigquery.Client()
    raw_prices = bqclient.query(
        f"""select customer_id, chnl_ind, maclist.* from `pbm-mac-lp-prod-de.ds_pro_lp.mac_list` maclist
            join `pbm-mac-lp-prod-de.ds_pro_lp.vcml_reference` vcml_ref on vcml_ref.vcml_id=maclist.mac
            where customer_id in ("{'", "'.join(customer_id_list)}")
        """
    ).to_dataframe()
    
    gpi_exclusions = uf.read_BQ_data(
        BQ.gpi_change_exclusion_ndc,
        project_id='pbm-mac-lp-prod-de',
        dataset_id='ds_pro_lp',
        table_id='gpi_change_exclusion_ndc'
    )


    mac_price_overrides = uf.read_BQ_data(
        BQ.ger_opt_mac_price_override_custom.format(_customer_id = uf.get_formatted_string(customer_id_list),
                                                    _project = 'pbm-mac-lp-prod-de',
                                                    _landing_dataset = 'ds_pro_lp',
                                                    _table_id = "ger_opt_mac_price_override"),
        project_id='pbm-mac-lp-prod-de',
        dataset_id='ds_pro_lp',
        table_id="ger_opt_mac_price_override",
        custom = True
    )
    

    wmt_price_overrides = uf.read_BQ_data(
            BQ.wmt_unc_override_custom.format(_customer_id = uf.get_formatted_string(customer_id_list),
                                              _project = "pbm-mac-lp-prod-ai",
                                              _dataset = 'ds_sandbox',
                                              _table = "wmt_unc_override"),
            project_id =  "pbm-mac-lp-prod-ai",
            dataset_id = 'ds_sandbox',
            table_id = "wmt_unc_override",
            custom = True
        )

    mac1026 = bqclient.query(
        f"""select *
        from `pbm-mac-lp-prod-de.ds_pro_lp.mac_1026`
            """
        ).to_dataframe()

    mac1026=standardize_df(mac1026)
    mac1026['GPI_NDC'] = mac1026['GPI']+'_'+mac1026['NDC']
    mac1026['MAC'] = mac1026['MAC_LIST']
    mac1026['MAC_LIST'] = mac1026['MAC_LIST'].str[3:]
    
    raw_prices=standardize_df(raw_prices)
    raw_prices['MAC_LIST']=raw_prices['MAC_LIST'].astype(str)
    mac_price_overrides=standardize_df(mac_price_overrides)
    wmt_price_overrides=standardize_df(wmt_price_overrides)

    return raw_prices, gpi_exclusions, mac_price_overrides, wmt_price_overrides, mac1026

def create_combo_prices(customer_id_list,raw_prices, output_prices, gpi_exclusions, mac_price_overrides, wmt_price_overrides,R30_R90_Pairs):
    """
    This function creates a combined DataFrame of raw prices and price changes, applying exclusions and overrides.
    It adds columns to indicate if the price has changed, if the price is mutable, the new price, the client, 
    the old MAC price, the current MAC price, the measurement, and the price override amount.

    Parameters:
    customer_id_list (list): A list of customer IDs.
    raw_prices (DataFrame): DataFrame containing raw prices data.
    output_prices (DataFrame): DataFrame containing output prices data.
    gpi_exclusions (DataFrame): DataFrame containing GPI exclusions data.
    mac_price_overrides (DataFrame): DataFrame containing MAC price overrides data.
    wmt_price_overrides (DataFrame): DataFrame containing Walmart price overrides data.
    R30_R90_Pairs (list): A list of tuples, where each tuple contains two elements representing R30 and R90 pairs.

    Returns:
    DataFrame: A DataFrame containing the combined prices with applied exclusions and overrides.
    """
    changed_prices = output_prices.copy()
    changed_prices['CUSTOMER_ID'] = changed_prices['client_name'].astype(str)
    assert len(changed_prices[changed_prices['NDC11'] != "***********"]) == 0, \
            f"There are {len(changed_prices[changed_prices['NDC11'] != '***********'])} NDCs in the output prices.\n" \
            f"{changed_prices[changed_prices['NDC11'] != '***********']}"
    print("combo prices before", len(raw_prices), len(changed_prices))
    changed_prices.rename(columns={'NDC11': 'NDC'}, inplace=True)
    combo_prices = raw_prices.merge(changed_prices, left_on=['CUSTOMER_ID','MAC', 'GPI', 'NDC'], right_on=['CUSTOMER_ID','MACLIST', 'GPI', 'NDC'], how='left')
    combo_prices['CHANGED'] = ~combo_prices['MACPRC'].isna()
    print("combo prices after", len(raw_prices), len(changed_prices), "with", combo_prices['CHANGED'].sum(), 'changed prices')

    
    combo_prices['NEW_PRICE'] = combo_prices['MACPRC']
    combo_prices.loc[combo_prices['NEW_PRICE'].isna(), 'NEW_PRICE'] = combo_prices.loc[combo_prices['PRICE'].isna(), 'NEW_PRICE']


    combo_prices = combo_prices[~combo_prices['GPI'].isin(gpi_exclusions['GPI_CD'].unique())]

    # Ensure we are only looking at the overrides for non-NDC GPIs
    overrides_2 = mac_price_overrides[mac_price_overrides['NDC']=='***********']
    overrides_3 = wmt_price_overrides[wmt_price_overrides['NDC']=='***********']
    price_override = pd.concat([overrides_2, overrides_3])

    combo_prices = combo_prices[combo_prices['NDC']=='***********']
    # changed = combo_prices[combo_prices['CHANGED']].copy()
    combo_prices.loc[:, 'CLIENT'] = combo_prices['CUSTOMER_ID'].astype(str)
    # this overwrites the price_mutable comming from output_prices
    combo_prices.rename(columns={'PRICE_MUTABLE': 'PRICE_MUTABLE_IN_RUN'}, inplace=True)
    combo_prices['PRICE_MUTABLE'] = 1
    price_override['CLIENT'] = price_override['CLIENT'].astype(str)
    combo_prices.drop(columns=['MAC_LIST'],inplace=True)
    combo_prices.rename(columns={'MACLIST':'MAC_LIST'},inplace=True)
    combo_prices['OLD_MAC_PRICE'] = combo_prices['CurrentMAC']
    combo_prices['CURRENT_MAC_PRICE'] = combo_prices['CurrentMAC']
    price_override.reset_index(drop=True, inplace=True)
    combo_prices.reset_index(drop=True, inplace=True)
    combo_prices = price_overrider_function(price_override, combo_prices)
    combo_prices.rename(columns={'MAC_LIST':'MACLIST'},inplace=True)
    combo_prices.rename(columns={'CURRENT_MAC_PRICE':'PRICE_OVRD_AMT'},inplace=True)
    combo_prices.PRICE_OVRD_AMT = combo_prices.PRICE_OVRD_AMT.astype(float)
    combo_prices.loc[:,'PRICE_OVRD_AMT'] = combo_prices.PRICE_OVRD_AMT.round(4)


    R30_MACs = list(set(f'MAC{customer_id}{pair[0]}' for customer_id in customer_id_list for pair in R30_R90_Pairs))
    R90_MACs = list(set(f'MAC{customer_id}{pair[1]}' for customer_id in customer_id_list for pair in R30_R90_Pairs))
    M30_MACs = list(set(f'MAC{customer_id}2' for customer_id in customer_id_list))

    combo_prices['MEASUREMENT'] = combo_prices['MAC'].apply(lambda x: 'R30' if x in R30_MACs else 'R90' if x in R90_MACs else None)
    combo_prices.loc[combo_prices['MEASUREMENT'].isnull(), 'MEASUREMENT'] = combo_prices['MAC'].apply(lambda x: 'M30' if x in M30_MACs else x)
    
    return combo_prices

def check_overrides_violations (combo_prices,customer_id_list,overrides_1, overrides_2, overrides_3):
    """
    This function checks for violations in price overrides for a given set of combo prices and customer IDs.
    It identifies violations based on price overrides, specialty exclusions, MAC price overrides, and Walmart UNC overrides.

    Parameters:
    combo_prices (DataFrame): DataFrame containing the combined prices with applied exclusions and overrides.
    customer_id_list (list): A list of customer IDs.
    overrides_1 (DataFrame): DataFrame containing specialty exclusion overrides.
    overrides_2 (DataFrame): DataFrame containing MAC price overrides.
    overrides_3 (DataFrame): DataFrame containing Walmart UNC overrides.

    Returns:
    DataFrame: A DataFrame containing the violations in price overrides.
    """
    violations_overrides = pd.DataFrame()
    changed = combo_prices[combo_prices['CHANGED']]

    price_override_violations = changed[(changed['PRICE_MUTABLE']==0)&(changed['PRICE_OVRD_AMT']!= changed['MACPRC'])]
    price_override_violations['REASON'] = 'Price Override'
    
    violations_overrides = pd.concat([violations_overrides, price_override_violations])
    
    
    # check overrides 1
    print("\nChecking Exclusion GPI Overrides:")
    print("-----------------------------")
    exclusions = changed[changed['GPI'].isin(overrides_1['GPI_CD'].unique())]
    exclusions['REASON'] = 'Specialty Exclusion'
    violations_overrides = pd.concat([violations_overrides,exclusions])

    check_overrides_1 = changed['GPI'].isin(overrides_1['GPI_CD'].unique()).sum()
    print("Disallowed prices changed:", check_overrides_1)
    

    # check overrides 2
    print("\nChecking MAC Price Overrides:")
    print("-----------------------------")
    for customer_id in customer_id_list:
        mac_price_o = overrides_2[overrides_2['CLIENT']==str(customer_id)]
        price_changes = price_override_violations[price_override_violations['CUSTOMER_ID']==str(customer_id)]
        mac_price_overrides_violations = price_changes['GPI'].isin(mac_price_o['GPI'].unique()).sum()
        print("Disallowed prices changed for", customer_id, ":", mac_price_overrides_violations)

    # check overrides 3
    print("\nChecking WMT Unc Override:")
    print("-----------------------------")
    for customer_id in customer_id_list:
        wmt_unc_o = overrides_3[overrides_3['CLIENT']==str(customer_id)]
        if len(wmt_unc_o['VCML_ID'].unique())==0:
            price_changes = price_override_violations[(price_override_violations['CUSTOMER_ID']==str(customer_id))]
            wmt_unc_overrides_violations = 0
            print("Disallowed prices changed for", customer_id, ":", wmt_unc_overrides_violations, "out of", len(price_changes))
        else:    
            for vcml_id in wmt_unc_o['VCML_ID'].unique():
                print("WMT Unc Override Violations for", customer_id, vcml_id, ":")
                wmt_unc_o_vcml = wmt_unc_o[wmt_unc_o['VCML_ID']==vcml_id]
                price_changes = price_override_violations[(price_override_violations['CUSTOMER_ID']==str(customer_id)) & (price_override_violations['MACLIST']==vcml_id)]
                wmt_unc_overrides_violations = price_changes['GPI'].isin(wmt_unc_o_vcml['GPI'].unique()).sum()
                print("Disallowed prices changed for", customer_id, vcml_id, ":", wmt_unc_overrides_violations, "out of", len(price_changes))


        # gpi lists to override if we found disallowed price


        if len(wmt_unc_o) > 0:
            print("List of GPIs and MACs with disallowed price changes")
            check_override3 = pd.merge(wmt_unc_o,changed[(changed['CUSTOMER_ID']==str(customer_id))], how ='inner', left_on = ['VCML_ID','GPI'], right_on = ['MAC','GPI'])

            print(check_override3['GPI'].tolist())

            print(check_override3['MAC'].unique())

    return violations_overrides



def check_cvs_indy_violations(combo_prices, customer_id, R30_R90_combo_df, MEDD_client, state_parity, medd_cvs_over_indy_ratio, R90_exists):
    """
    This function checks for violations where CVS prices are higher than independent pharmacy prices.
    It identifies violations based on minimum retail prices, state parity, and other conditions.

    Parameters:
    combo_prices (DataFrame): DataFrame containing the combined prices with applied exclusions and overrides.
    customer_id (str): The customer ID to check for violations.
    R30_R90_combo_df (DataFrame): DataFrame containing R30 and R90 combinations.
    MEDD_client (list): List of MEDD clients.
    state_parity (list): List of clients with state parity.
    medd_cvs_over_indy_ratio (dict): Dictionary containing the ratio of CVS over independent prices for MEDD clients.
    R90_exists (list): List of clients with R90 prices.

    Returns:
    DataFrame: A DataFrame containing the violations where CVS prices are higher than independent pharmacy prices.
    """

    price_changes = combo_prices[combo_prices['CUSTOMER_ID']==customer_id].copy()
    price_changes.rename(columns={'MACLIST': 'MAC ID'}, inplace=True)


    def get_min_retail_prices(df, exclude_list, tag=''):

        return df.loc[~df['MAC ID'].isin([mac for mac in exclude_list])].groupby(['CUSTOMER_ID', 'GPI', 'NDC'], as_index=False)['MACPRC'].min().rename(columns={'MACPRC': f'MACPRC_MINRETAIL{tag}'})

    if customer_id in MEDD_client:
        capped_pharm_30_suffix = ['2','C4','15','16','90','61','H1','53','P96','P55','P40','8','54','P63','18','14','6','59','5','7','P37']
        
        exclude_suffix = ['2','4','41']
        exclude_MACS = [f'MAC{customer_id}{suffix}' for suffix in exclude_suffix]
        capped_pharm_30_MACS = [f'MAC{customer_id}{suffix}' for suffix in capped_pharm_30_suffix]
        
        exclusion = exclude_MACS + capped_pharm_30_MACS + R30_R90_combo_df.loc[R30_R90_combo_df['CUSTOMER_ID'] == customer_id, 'R90'].tolist()
        price_match_prices = get_min_retail_prices(price_changes, exclusion)
    else:
        exclude_suffix = ['2','4','41']
        exclude_MACS = [f'MAC{customer_id}{suffix}' for suffix in exclude_suffix]
        exclusion = exclude_MACS + R30_R90_combo_df.loc[R30_R90_combo_df['CUSTOMER_ID'] == customer_id, 'R90'].tolist()
        price_match_prices = get_min_retail_prices(price_changes, exclusion)

    price_changes = price_changes.merge(price_match_prices, how='left', on=['CUSTOMER_ID', 'GPI', 'NDC'])

    if customer_id in state_parity:
        price_changes.loc[price_changes['MAC ID']!='MAC'+customer_id+'41', 'MACPRC_MINRETAIL'] = np.inf
    else:
        # Set eveything that is not CVS to infinity (we don't want to change them)
        cvs_r30_vcml = ['MAC'+customer_id + v for v in ['4','P78','M78']]
        price_changes.loc[~price_changes['MAC ID'].isin(cvs_r30_vcml), 'MACPRC_MINRETAIL'] = np.inf

        
    if customer_id in MEDD_client:
        price_changes['MACPRC_MINRETAIL'] *= medd_cvs_over_indy_ratio[customer_id]

    if customer_id in R90_exists:
        if customer_id in MEDD_client:
            exclude_suffix = ['2','34']
            exclude_MACS = [f'MAC{customer_id}{suffix}' for suffix in exclude_suffix]
            capped_pharm_90_suffix = ['2','C9','43','45','E5','E90','E61','H3','E53','E96','E55','38','E40','E54','E63','46','48','42','36','E59','35','37','E37']
            capped_pharm_90_MACS = [f'MAC{customer_id}{suffix}' for suffix in capped_pharm_90_suffix]
            
            exclusion = capped_pharm_90_MACS + exclude_MACS + R30_R90_combo_df.loc[R30_R90_combo_df['CUSTOMER_ID'] == customer_id, 'R30'].tolist()
            price_match_prices = get_min_retail_prices(price_changes, exclusion, tag = '_90')
            
        else:
            exclude_suffix = ['2','34']
            exclude_MACS = [f'MAC{customer_id}{suffix}' for suffix in exclude_suffix]
            exclusion = exclude_MACS + R30_R90_combo_df.loc[R30_R90_combo_df['CUSTOMER_ID'] == customer_id, 'R30'].tolist()
            price_match_prices = get_min_retail_prices(price_changes, exclusion, tag = '_90')
        

        price_changes = price_changes.merge(price_match_prices.rename(columns={'MACPRC_MINRETAIL': 'MACPRC_MINRETAIL_90'}), how='left', on=['CUSTOMER_ID', 'GPI', 'NDC'])
        
        if customer_id in state_parity:
            if any(price_changes['MAC ID']==f"MAC{customer_id}91"):
                price_changes.loc[price_changes['MAC ID'] != f'MAC{customer_id}91', 'MACPRC_MINRETAIL_90'] = np.inf
            elif any(price_changes['MAC ID']==f"MAC{customer_id}34"):
                price_changes.loc[price_changes['MAC ID'] != f'MAC{customer_id}34', 'MACPRC_MINRETAIL_90'] = np.inf
            else:
                price_changes.loc[price_changes['MAC ID'] != f'MAC{customer_id}3', 'MACPRC_MINRETAIL_90'] = np.inf

        else:
            cvs_r90_vcml = ['MAC'+customer_id + v for v in ['34','E78']]
            if any(price_changes['MAC ID'].isin(cvs_r90_vcml)):

                price_changes.loc[~price_changes['MAC ID'].isin(cvs_r90_vcml), 'MACPRC_MINRETAIL_90'] = np.inf
            else:
                price_changes.loc[price_changes['MAC ID'] != f'MAC{customer_id}3', 'MACPRC_MINRETAIL_90'] = np.inf

        if customer_id in MEDD_client:
            price_changes['MACPRC_MINRETAIL_90'] *= medd_cvs_over_indy_ratio[customer_id]

        
        
        
        price_changes['MACPRC_MINRETAIL'] = price_changes[['MACPRC_MINRETAIL', 'MACPRC_MINRETAIL_90']].min(axis=1)



    price_changes['MACPRC_MINRETAIL'] = price_changes['MACPRC_MINRETAIL'].round(4)
    price_changes['MACPRC_MINRETAIL'] = price_changes['MACPRC_MINRETAIL'].fillna(np.inf)
    price_changes.loc[price_changes['MACPRC_MINRETAIL'] == 0, 'MACPRC_MINRETAIL'] = np.inf

    violations_retail_over_CVS = price_changes[(~price_changes['AT_RUN_ID'].isna())\
                                               & (price_changes['CHANGED'])\
                                               & (price_changes['PRICE_MUTABLE']==1)
                                               & (price_changes['MACPRC'] > price_changes['MACPRC_MINRETAIL']+0.0001)
                                               &  ~(np.isclose(price_changes['MACPRC'], price_changes['MACPRC_MINRETAIL'] + 0.0001, atol=1e-10))
                                               ].reset_index(drop=True)
    
    print(f"Violations of CVS Over Independents for {customer_id}: {len(violations_retail_over_CVS)}")

    return violations_retail_over_CVS




In [None]:
# Read input data from BigQuery for the given customer IDs
raw_prices, gpi_exclusions, mac_price_overrides, wmt_price_overrides, mac1026 = input_read(customer_id_list)

In [None]:
# Exclude MACs from raw_prices based on customer IDs and suffixes in exclude_list
raw_prices = raw_prices[~raw_prices['MAC'].isin(['MAC' + c_id + s
                                                 for c_id in customer_id_list 
                                                 for s in exclude_list])]

In [None]:
# Ensure the 'client_name' column in output_prices is of type string
output_prices['client_name'] = output_prices['client_name'].astype(str)

In [None]:
# List of R30 and R90 pairs with comments indicating the chain group
R30_R90_Pairs = [
['16','45'] ,# AHD
['15','43'] ,# ALB
['15','45'] ,# ALB
['15','E5'] ,# ALB
['P5','E5'] ,# ALB
['P5','45'] ,# ALB
['P5','43'] ,# ALB
['C4','C9'] ,# CHD
['33','30'] ,# CRD
['P24','E24'] ,# CSC
['P24','47'] ,# CSC
['17','47'] ,# CSC
['17','E24'] ,# CSC
['M78','E78'] ,# CVS
['4','34'] ,# CVS
['4','91'] ,# CVS
['P78','E78'] ,# CVS
['P78','34'] ,# CVS
['P78','91'] ,# CVS
['41','91'] ,# CVS
['41','34'] ,# CVS
['41','E78'] ,# CVS
['4','E78'] ,# CVS
['M78','34'] ,# CVS
['M78','91'] ,# CVS 
['24','E78'] ,# CVS
['24','34'] ,# CVS
['24','91'] ,# CVS
['M78', '91SP'], #CVS
['24', '91SP'], #CVS
['P78', '91SP'], #CVS
['41', '91SP'], #CVS
['4', '91SP'], #CVS
['44','40'] ,# ELV
['55','50'] ,# EPC
['90','E90'] ,# GIA
['P93','E93'] ,# HAD
['11','20'] ,# HMA
['61','E61'] ,# HRT
['P48','E48'] ,# HVD
['H1','H3'] ,# HYV
['H1','E53'] ,# HYV
['H1','E96'] ,# HYV
['53','H3'] ,# HYV
['53','E53'] ,# HYV
['53','E96'] ,# HYV
['P96','H3'] ,# HYV
['P96','E53'] ,# HYV
['P96','E96'] ,# HYV
['65','E65'] ,# ING
['P40','38'] ,# KGR
['8','E40'] ,# KGR
['8','38'] ,# KGR
['P40','E40'] ,# KGR
['P58','E58'] ,# KIN
['54','E54'] ,# LWD
['54','E63'] ,# LWD
['P63','E54'] ,# LWD
['P63','E63'] ,# LWD
['177','377'] ,# MDP
['18','46'] ,# MJR
['18','48'] ,# MJR
['81','E81'] ,# MPB
['81','E94'] ,# MPB
['P95','E95'] ,# MPB
['P95','E94'] ,# MPB
['P94','E81'] ,# MPB
['P94','E95'] ,# MPB
['P94','E94'] ,# MPB
['P95','E81'] ,# MPB
['81','E95'] ,# MPB
['P38','E38'] ,# MPF
['P55','E55'] ,# MPI (IGD)
['28','E28'] ,# MPM
['P11','E11'] ,# MPW
['1','9'] ,# NAT
['1','3'] ,# NAT
['P88','E88'] ,# PCD
['P89','42'] ,# PUB
['P89','E67'] ,# PUB
['P89','E89'] ,# PUB
['67','E67'] ,# PUB
['14','42'] ,# PUB
['14','E67'] ,# PUB
['14','E89'] ,# PUB
['67','E89'] ,# PUB
['67','42'] ,# PUB
['62','E62'] ,# PUR
['6','36'] ,# RAD
['16','E91'] ,# RBS
['16','46'] ,# RBS
['P91','E91'] ,# RBS
['P91','46'] ,# RBS
['R1','R3'] ,# RUR
['XR','XT'] ,# STR
['XA','XT'] ,# STR
['59','E59'] ,# THF
['66','60'] ,# TPS
['29','E29'] ,# WAD
['5','35'] ,# WAG
['7','37'] ,# WMT
['P37','37'] ,# WMT
['7','E37'] ,# WMT
['P37','E37'] ,# WMT
['P57','E57'] ,# KSD: Still active on vcml reference table
['P49','E49'] ,# HBD: Still active on vcml reference table
['19','39'],# LEWIS : Still active on vcml reference table
['64', 'E64'], # AMZ
['22','322'], # ART
['227','322'], # ART
['229','322'], # ART
['22','3227'], # ART
['227','3227'], # ART
['229','3227'], # ART
['22','3229'], # ART
['227','3229'], # ART
['229','3229'], # ART
]
# Extract unique VCMLs from the list of R30 and R90 pairs
unique_vcmls_w_mapping = list(set([vcml for pair in R30_R90_Pairs for vcml in pair]))


In [None]:
# Initialize an empty DataFrame to store R30 and R90 combinations for each customer
R30_R90_combo_df = pd.DataFrame(columns=['R30', 'R90', 'CUSTOMER_ID'])

# Iterate over each customer ID in the customer_id_list
for customer_id in customer_id_list:
    # Extract unique VCML suffixes for the current customer
    unique_vcml_suffix = list(set(raw_prices[raw_prices['CUSTOMER_ID']==customer_id]['MAC_LIST'].str.replace(f"{customer_id}","").unique()))
    
    # Check if 'OK' suffix is present in the unique VCML suffixes
    OK_vcml = 'OK' in unique_vcml_suffix
    
    # Generate R30 and R90 MAC combinations for the current customer
    R30_R90_combo = generate_r30_r90_MAC_combo(customer_id, R30_R90_Pairs, OK_vcml)
    
    # Identify any VCMLs that do not have a mapping in R30_R90_Pairs
    missing_vcml_mapping = [vcml for vcml in unique_vcml_suffix if (vcml not in unique_vcmls_w_mapping+['2', 'OK'])]
    
    # Assert that there are no missing VCML mappings
    assert not missing_vcml_mapping, f"There are {len(missing_vcml_mapping)} VCMLs for {customer_id} that do not have a mapping in R30_R90_Pairs. Missing VCMLs: {missing_vcml_mapping}"

    # Create a temporary DataFrame for the current customer's R30 and R90 combinations
    temp_df = pd.DataFrame(R30_R90_combo, columns=['R30', 'R90'])
    temp_df['CUSTOMER_ID'] = customer_id
    
    # Concatenate the temporary DataFrame with the main DataFrame
    R30_R90_combo_df = pd.concat([R30_R90_combo_df, temp_df], ignore_index=True)

In [None]:
print(len(output_prices.loc[output_prices['MACLIST']=='MAC30252']))
print(output_prices['AT_RUN_ID'].drop_duplicates())

In [None]:
# Create a copy of the output prices DataFrame
tester = output_prices.copy()
tester

In [None]:
# Create a copy of the output prices DataFrame
tester = output_prices.copy()
tester['client_name'] = tester['client_name'].apply(str)
tester['diff'] = (tester['MACPRC'])/tester['CurrentMAC']
# tester[(tester["diff"] < 0.74) & (tester["diff"] > 0.1) ]

In [None]:
# Filter the tester DataFrame to include only rows where 'client_name' is in the subset_list
tester_subset = tester[tester["client_name"].isin(subset_list)]

# Print the length of the subset DataFrame
print("len of subset is ", len(tester_subset))

# Iterate over each customer ID in the customer_id_list
for cid in customer_id_list:
    print("Client_id : {}".format(cid))
    client_subset = tester_subset[tester_subset['client_name']==cid]
    # Print the length of the client subset DataFrame
    print("len is : ", len(client_subset))
    
    # Print the count of rows where the price has increased (diff > 1)
    print("count with increases : ", len(client_subset[client_subset["diff"]> 1]))
    
    # Print the count of rows where the price has decreased (diff < 1)
    print("count with decreases : ",len(client_subset[client_subset["diff"]< 1]))
    
    # Print the count of rows where the price has remained the same (diff == 1)
    print("count with price remaining same : ",len(client_subset[client_subset["diff"] ==  1]))
    
    print("----------------------------------------------------")

In [None]:
# THRESHOLD CHECK - Very Important
# Define a list of thresholds to check
thresholds = [0.5, 1, 1.25, 2, 3, 4, 5, 10, 20, 100]

# Initialize a dictionary to store the ratio of prices above each threshold for each client
threshold_dicts = {t: {} for t in thresholds}

# Iterate over each threshold
for threshold in thresholds:
    # Get the unique client names from the tester DataFrame
    unique_clients = tester["client_name"].unique()
    
    # Iterate over each client
    for client in unique_clients:
        # Count the number of prices above the current threshold for the current client
        count_above_threshold = len(tester[(tester["diff"] > threshold) & (tester["client_name"] == client)])
        
        # Get the total number of prices for the current client
        total_count = len(tester[tester["client_name"] == client])
        
        # Calculate the ratio of prices above the threshold to the total number of prices
        if total_count > 0:
            ratio = count_above_threshold / total_count
        else:
            ratio = 0
        
        # Store the ratio in the dictionary
        threshold_dicts[threshold][client] = ratio


In [None]:
# Price decrease Threshold checking ---- confirm with real example... not tested
# THRESHOLD CHECK - Very Important

# Define a list of thresholds to check for price decreases
thresholds = [0.9, 0.75, 0.5, 0.25, 0.1]

# Initialize a dictionary to store the ratio of prices below each threshold for each client
threshold_dicts_reduce = {t: {} for t in thresholds}

# Iterate over each threshold
for threshold in thresholds:
    # Get the unique client names from the tester DataFrame
    unique_clients = tester["client_name"].unique()
    
    # Iterate over each client
    for client in unique_clients:
        # Count the number of prices below the current threshold for the current client
        count_above_threshold = len(tester[(tester["diff"] < threshold) & (tester["client_name"] == client)])
        
        # Get the total number of prices for the current client
        total_count = len(tester[tester["client_name"] == client])
        
        # Calculate the ratio of prices below the threshold to the total number of prices
        if total_count > 0:
            ratio = count_above_threshold / total_count
        else:
            ratio = 0
            
        # Store the ratio in the dictionary
        threshold_dicts_reduce[threshold][client] = ratio


In [None]:
threshold_dicts


In [None]:
threshold_dicts_reduce

In [None]:
tester_subset = tester[tester["client_name"].isin(subset_list)]
print("len of subset is ", len(tester_subset))
print("count with increases : ", len(tester_subset[tester_subset["diff"]> 1]))
print("count with decreases : ",len(tester_subset[tester_subset["diff"]< 1]))
print("count with price remaining same : ",len(tester_subset[tester_subset["diff"] ==  1]))

In [None]:
# Create a combined DataFrame of raw prices and price changes, applying exclusions and overrides, identifying changed prices and measurements
combo_prices = create_combo_prices(customer_id_list, raw_prices, output_prices, gpi_exclusions, mac_price_overrides, wmt_price_overrides, R30_R90_Pairs)

In [None]:
# Check distribution of changes for retail
for cid in customer_id_list:
    cid = str(cid)
    print("Customer ID", cid)
    mailmac="MAC"+cid+"2"
    cp_r = combo_prices[(combo_prices['CUSTOMER_ID']==cid) \
                        & combo_prices['CHANGED'] \
                            & (combo_prices['MACLIST']!=mailmac) \
                                & (combo_prices['MACPRC']!=combo_prices['CurrentMAC'])].copy()
    # & (combo_prices['chnl_ind']!='R90')
    cp_r['ratio'] = cp_r['MACPRC']/cp_r['CurrentMAC']
    print(cp_r['ratio'].describe())
    fig, ax = plt.subplots()
    ax.hist(cp_r['ratio'], bins=20)
    ax.axvline(desired_r30[cid], color='r')
    ax.axvline(desired_r90[cid], color='g')
    ax.text(desired_r30[cid], -max(ax.get_ylim()) * 0.1, f'Desired_r30  ={desired_r30[cid]:.2f}', ha='right', color='r')
    ax.text(desired_r90[cid], -max(ax.get_ylim()) * 0.15, f'Desired_r90 ={desired_r90[cid]:.2f}', ha='right', color='g')
    ax.set_xlabel("New price/old price", labelpad=30) 
    ax.set_xlabel("New price/old price")
    ax.set_ylabel("Number of prices")
    ax.set_title("Retail "+cid)


In [None]:
# Check distribution of changes for retail R30
for cid in customer_id_list:
    cid = str(cid)
    print("Customer ID", cid)
    mailmac="MAC"+cid+"2"
    cp_r30 = combo_prices[(combo_prices['CUSTOMER_ID']==cid) \
                        & combo_prices['CHANGED'] \
                            & (combo_prices['MACLIST']!=mailmac) \
                                & (combo_prices['MACPRC'] != combo_prices['CurrentMAC'])\
                                    & (combo_prices['MEASUREMENT']=='R30')].copy() 
            
    # & (combo_prices['chnl_ind']!='R90')
    cp_r30['ratio'] = cp_r30['MACPRC']/cp_r30['CurrentMAC']
    print(cp_r30['ratio'].describe())
    fig, ax = plt.subplots()
    ax.hist(cp_r30['ratio'], bins=20)
    ax.axvline(desired_r30[cid], color='r')
    ax.text(desired_r30[cid], -max(ax.get_ylim()) * 0.1, f'Desired_r30  ={desired_r30[cid]:.2f}', ha='right', color='r')
    ax.set_xlabel("New price/old price", labelpad=30) 
    ax.set_xlabel("New price/old price")
    ax.set_ylabel("Number of prices")
    ax.set_title("R30 "+cid)

In [None]:
# Check distribution of changes for retail R90
for cid in customer_id_list:
    cid = str(cid)
    print("Customer ID", cid)
    mailmac="MAC"+cid+"2"
    cp_r90 = combo_prices[(combo_prices['CUSTOMER_ID']==cid) \
                        & combo_prices['CHANGED'] \
                            & (combo_prices['MACLIST']!=mailmac) \
                                & (combo_prices['MACPRC'] != combo_prices['CurrentMAC'])\
                                    & (combo_prices['MEASUREMENT']=='R90')].copy() 
            
    # & (combo_prices['chnl_ind']!='R90')
    cp_r90['ratio'] = cp_r90['MACPRC']/cp_r90['CurrentMAC']
    print(cp_r90['ratio'].describe())
    fig, ax = plt.subplots()
    ax.hist(cp_r90['ratio'], bins=20)
    ax.axvline(desired_r90[cid], color='g')
    ax.text(desired_r90[cid], -max(ax.get_ylim()) * 0.15, f'Desired_r90 ={desired_r90[cid]:.2f}', ha='right', color='g')
    ax.set_xlabel("New price/old price", labelpad=30) 
    ax.set_xlabel("New price/old price")
    ax.set_ylabel("Number of prices")
    ax.set_title("R90 "+cid)

In [None]:
# Check distribution of changes for mail
for cid in customer_id_list:
    cid = str(cid)
    print("Customer ID", cid)
    mailmac="MAC"+cid+"2"
    cp_m = combo_prices[(combo_prices['CUSTOMER_ID']==cid) \
                        & combo_prices['CHANGED'] \
                            & (combo_prices['MACLIST']==mailmac) \
                                & (combo_prices['MACPRC']!=combo_prices['CurrentMAC'])].copy()
    cp_m['ratio'] = cp_m['MACPRC']/cp_m['CurrentMAC']
    fig, ax = plt.subplots()
    ax.hist(cp_m['ratio'])
    ax.axvline(desired_mail[cid], color='r')
    ax.text(desired_mail[cid], -max(ax.get_ylim()) * 0.1, f'Desired_mail ={desired_mail[cid]:.2f}', ha='right', color='r')
    ax.set_xlabel("New price/old price", labelpad=30)
    ax.set_ylabel("Number of prices")
    ax.set_title("Mail "+cid)


In [None]:
# Check for violations of price overrides for the given combo prices and customer IDs
# This function identifies violations based on price overrides, specialty exclusions, MAC price overrides, and Walmart UNC overrides
override_violations = check_overrides_violations(combo_prices, customer_id_list, gpi_exclusions, mac_price_overrides, wmt_price_overrides)

In [None]:
# Display the DataFrame containing violations in price overrides
# This DataFrame includes violations based on price overrides, specialty exclusions, MAC price overrides, and Walmart UNC overrides
override_violations

In [None]:
# Checking any violations in the raw prices for Mail less than R30 and R90
for cid in customer_id_list:

    combo_prices['IS_MAIL'] = combo_prices['MEASUREMENT'] == 'M30'
    combo_prices['IS_R90'] = combo_prices['MEASUREMENT'] == 'R90'

    cid = str(cid)
    
    # Filter combo_prices for the current customer ID and exclude mail measurements
    cp = combo_prices[(combo_prices['CUSTOMER_ID'] == cid) & ~combo_prices['IS_MAIL']]
    
    # Group by 'GPI' and 'NDC' and get the minimum price for comparison
    min_retail = cp.groupby(['GPI', 'NDC'], as_index=False).agg({'PRICE': "min"}).rename(columns={'PRICE': 'COMPARISON_PRICE'})
    
    # Filter combo_prices for the current customer ID and mail measurements, then merge with min_retail
    cp_check = combo_prices[(combo_prices['CUSTOMER_ID'] == cid) & combo_prices['IS_MAIL']].merge(min_retail, on=['GPI', 'NDC'])
    
    # Print the number of violations where the mail price is greater than the comparison price
    print("Violations in RAW PRICES for", cid, "MAIL:", (cp_check['PRICE'] > cp_check['COMPARISON_PRICE']).sum())

In [None]:
# Check violations where CVS prices are higher than independent pharmacy prices
violations_retail_over_CVS = pd.DataFrame()

# Iterate over each customer ID in the customer_id_list
for customer_id in customer_id_list:
    # Check for violations where CVS prices are higher than independent pharmacy prices
    violations = check_cvs_indy_violations(combo_prices, customer_id, R30_R90_combo_df, MEDD_client, state_parity, medd_cvs_over_indy_ratio, R90_exists)
    
    # Concatenate the violations DataFrame with the main DataFrame
    violations_retail_over_CVS = pd.concat([violations_retail_over_CVS, violations])

In [None]:
# Display the DataFrame containing violations where CVS prices are higher than independent pharmacy prices
violations_retail_over_CVS