Import the libraries needed

In [1]:
import pandas as pd
import numpy as np
import os
import pyodbc
pyodbc.drivers()

from datetime import datetime
import psutil

# For the Yahoo Finance api
from pandas_datareader import data as pdr
import yfinance as yf
yf.pdr_override() # <== that's all it takes :-)



Accessing the legacy Oracle database through a SQL Server linked server.  This is to cut down on intergration points.  The recent switch data will exist on SQl Server and a linked server exists on the same instance to the legacy Oracle data.

### Database connections and SQL to retrieve the various data needed.

In [2]:
SQL_server = 'xxx\yyy'
Reporting_database = 'zzz'

pd.set_option('max_row', None)

# Set a value for the home folder.
home_folder = "."

# Set values for the various paths.
input_path = home_folder + "\data"

print('==========================')
now = datetime.now()
print(now)
print('==========================')
print(psutil.virtual_memory())
print('==========================')

filename = input_path + "\iSuite_Product_list.csv"
print(filename)

2024-08-05 22:07:42.160814
svmem(total=16756752384, available=8681152512, percent=48.2, used=8075599872, free=8681152512)
.\data\iSuite_Product_list.csv


# 1. Get all iSuite policies that can have switchable funds.
## - Start by determining what's the population of all policies that can possibly have a switch.
##  - Get a list of products that have had a switch at some point.  
## - This allows us to look for a list of all policies that can ever have, or may have had, a fund switch.
## - This determines the list of products that allows us to determine the full switchable population at any time by identifying policies in force for any of the products found.

In [3]:
# READ IN directly from DW
def iSuite_Product_list(server, database):
    conn = pyodbc.connect(
         'DRIVER={ODBC Driver 17 for SQL Server};'
         'SERVER='+server+';'
         'DATABASE='+database+';'
         'Trusted_Connection=yes;')
  
    query = """SELECT DISTINCT pol.Product_Code
		    FROM [CLE_Reporting].[user].[CLV_GBS_Unit_Transactions] UT
	        INNER JOIN CLE_Reporting.[user].CLV_Fund F ON F.Fund_Code = UT.[Fund_Code]
	        INNER JOIN CLE_Reporting.dbo.CLT_Policy pol ON pol.Policy_Number = UT.Policy_Number
			WHERE Transaction_Type = 'Funds Switch/Shift'
			AND	((F.Asset_Type = 'UWP-Fonds' AND UWP_Type_Indicator = 'S') OR (F.Asset_Type <> 'UWP-Fonds' AND UWP_Type_Indicator = 'A')) 
			AND Reverse_Indicator = 'N'"""    
        
    return pd.read_sql(query, conn)

# Call the SQL proc to get a list of distinct iSuite product codes, 
# of products that have ever had a switch.
df_iSuite_Product_list = iSuite_Product_list(SQL_server, Reporting_database)

# Strip all blanks.
df_iSuite_Product_list = df_iSuite_Product_list.applymap(lambda x: " ".join(x.split()) if isinstance(x, str) else x)

df_iSuite_Product_list.head(10)

# Write out to a comma separated values file.
filename = input_path + "\iSuite_Product_list.csv"
print(filename)

df_iSuite_Product_list.to_csv(filename, encoding='utf-8', index=False)        

# Turn off dislays to protect PII
#df_iSuite_Product_list.head(5)

Error: ('HY000', '[HY000] [Microsoft][ODBC Driver 17 for SQL Server]SQL Server Network Interfaces: The system cannot contact a domain controller to service the authentication request. Please try again later.\r\n (-2146892976) (SQLDriverConnect); [HY000] [Microsoft][ODBC Driver 17 for SQL Server]Cannot generate SSPI context (-2146892976); [HY000] [Microsoft][ODBC Driver 17 for SQL Server]SQL Server Network Interfaces: The system cannot contact a domain controller to service the authentication request. Please try again later.\r\n (-2146892976); [HY000] [Microsoft][ODBC Driver 17 for SQL Server]Cannot generate SSPI context (-2146892976)')

# 2. Using the list of products which have had a switch, get details for all policies that can have switchable funds.
## This is the population of policies that can possibly switch funds.

In [4]:
conn = pyodbc.connect(
         'DRIVER={ODBC Driver 17 for SQL Server};'
         'SERVER='+SQL_server+';'
         'DATABASE='+Reporting_database+';'
         'Trusted_Connection=yes;')

main_query = f"""SELECT pol.Policy_Number         AS ContractNumber
                       ,prd.Product_Category_Code AS ProdCat
                       ,prd.Product_Code          AS ProdCode
                       ,psh.Policy_Status_Code    AS PolicyStatus
                       ,pol.Cover_End_Date        AS CoverEndDt
                       ,psh.Snapshot_Date         AS SnapshotDt
                 FROM CLT_Policy pol
                 INNER JOIN CLE_Reporting.dbo.CLT_Product prd ON prd.Product_Id = pol.Product_Id
                 INNER JOIN CLT_Policy_Snapshot_Hist psh ON pol.Policy_Number = psh.Policy_Number 
                 WHERE prd.Product_Id = pol.Product_Id
                 AND   prd.Product_Code IN ('FKP_2006V1','FKP_2006V2','FKP_2008V1','FKP_2010V1','FKP_2012V1','FKP_2017V1',
                                            'GNPP_2016V1','GNPP_2017V1','GNPP_2022V1','GNPP_2023V1',
                                            'GNRP_2008V1','GNRP_2010V1','GNRP_2012V1','GNRP_2017V1','GNRP_2022V1',
                                            'XRPP_2005V1','XRPP_2008V1','XRPP_2010V1','XRPP_2012V1','XRPP_2015V1')
                 ORDER BY psh.Snapshot_Date,pol.Policy_Number;"""



df_main = pd.read_sql(main_query, conn)

df_main['SnapshotDt'] = pd.to_datetime(df_main['SnapshotDt'])
df_main['SnapshotYrMth'] = df_main['SnapshotDt'].dt.strftime('%Y%m')
#df_main = df_main.drop(['SnapshotDt'],axis=1)

# Types and row count
print(len(df_main.index))
print(df_main.dtypes)

# Write out to a comma separated values file.
filename = input_path + '\Full_Population.csv'
df_main.to_csv(filename, encoding='utf-8', index=False)        
print(filename)

# Turn off dislays to protect PII
#df_main.head(10)

Error: ('HY000', '[HY000] [Microsoft][ODBC Driver 17 for SQL Server]SQL Server Network Interfaces: The system cannot contact a domain controller to service the authentication request. Please try again later.\r\n (-2146892976) (SQLDriverConnect); [HY000] [Microsoft][ODBC Driver 17 for SQL Server]Cannot generate SSPI context (-2146892976); [HY000] [Microsoft][ODBC Driver 17 for SQL Server]SQL Server Network Interfaces: The system cannot contact a domain controller to service the authentication request. Please try again later.\r\n (-2146892976); [HY000] [Microsoft][ODBC Driver 17 for SQL Server]Cannot generate SSPI context (-2146892976)')

# 3. Get the fund switch data.

## Get the Paxus fund switches¶

In [5]:
# READ IN directly from Paxus
def Get_All_Paxus_Switches(server, database):
    conn = pyodbc.connect(
         'DRIVER={ODBC Driver 17 for SQL Server};'
         'SERVER='+server+';'
         'DATABASE='+database+';'
         'Trusted_Connection=yes;'
         )

    query = """SELECT * FROM OPENQUERY(GERPDB,'
    SELECT  p.policy            AS "POLICY_NUMBER",
            u.fund              AS "LEGACY_FUND_CODE",
            p.cov_cobe          AS "PRODUCT",
            to_date(u.proc_date,''YYYYMMDD'') AS "PROCESSING_DATE",
            to_date(u.eff_date,''YYYYMMDD'')  AS "EFFECTIVE_DATE",
            u.unit_type                       AS "UNIT_TYPE",
            u.charge_type                     AS "CHARGE_TYPE",
            u.units_posted_cash               AS "CASH_VALUE",
            to_date(p.rcd,''YYYYMMDD'')       AS "RISK_COMMENCEMENT_DATE",
            p.agent                             AS "Servicing_Broker_Code",
            to_date(p.issued_date,''YYYYMMDD'') AS "Issue_Date",
            to_date(''19000101'',''YYYYMMDD'')  AS "Cover_End_Date",
            to_date(''19000101'',''YYYYMMDD'')  AS "Anniversary_Date",
            ''P''                               AS "Source"
    FROM GERPDBA.UNIT u,gerpdba.policy p 
    WHERE p.policy = u.policy 
    AND u.trans_code = ''P135'' 
    AND to_date(u.proc_date,''YYYYMMDD'') > ''2018-12-31''
    order by PROC_DATE, p.policy, u.fund
    ')"""    
    
    
    return pd.read_sql(query, conn)

# Call the SQL process to retrieve the Paxus (old system) fund switch data.
df_All_Paxus_Switches = Get_All_Paxus_Switches(SQL_server, Reporting_database)

# Types and row count
print(len(df_All_Paxus_Switches.index))
print(df_All_Paxus_Switches.dtypes)

# Write out to a comma separated values file.
filename = input_path + '\All_Paxus_Switches.csv'
df_All_Paxus_Switches.to_csv(filename, encoding='utf-8', index=False)        
print(filename)

# Turn off dislays to protect PII
#df_All_Paxus_Switches.head(10)

Error: ('HY000', '[HY000] [Microsoft][ODBC Driver 17 for SQL Server]SQL Server Network Interfaces: The system cannot contact a domain controller to service the authentication request. Please try again later.\r\n (-2146892976) (SQLDriverConnect); [HY000] [Microsoft][ODBC Driver 17 for SQL Server]Cannot generate SSPI context (-2146892976); [HY000] [Microsoft][ODBC Driver 17 for SQL Server]SQL Server Network Interfaces: The system cannot contact a domain controller to service the authentication request. Please try again later.\r\n (-2146892976); [HY000] [Microsoft][ODBC Driver 17 for SQL Server]Cannot generate SSPI context (-2146892976)')

# 2. Get the iSuite fund switches.

In [6]:
def Get_All_iSuite_Switches(server, database):
    conn = pyodbc.connect(        'DRIVER={ODBC Driver 17 for SQL Server};'
        'SERVER='+server+';'
        'DATABASE='+database+';'
        'Trusted_Connection=yes;'  )

    query_201 = """Select 
	FS_List.Policy_Number         AS "POLICY_NUMBER",
	Legacy_Fund_Code              AS "LEGACY_FUND_CODE",
    PC.Product_Code               AS "PRODUCT",
	FS_List.Processing_Date       AS "PROCESSING_DATE",
	FS_List.Effective_Date        AS "EFFECTIVE_DATE",
	FS_List.Unit_Type             AS "UNIT_TYPE",
    FS_List.Charge_Type           AS "CHARGE_TYPE",
    Cash_Value                    AS "CASH_VALUE",
	PC.Risk_Commencement_Date     AS "RISK_COMMENCEMENT_DATE",
    Servicing_Broker_Code,
	Issue_Date,
    Cover_End_Date,
    Anniversary_Date,
    'I'                         AS "Source"

FROM [CLE_Reporting].[user].[CLV_Policy_Core] PC 
INNER JOIN (SELECT Policy_Number,
		           Transaction_Type,
				   UT.Processing_Date,
				   UT.Effective_Date,
				   UT.Unit_Type, 
				   UT.Charge_Type,                    
				   Cash_Value,
				   F.Fund_Code,
				   F.Legacy_Fund_Code
		    FROM [CLE_Reporting].[user].[CLV_GBS_Unit_Transactions] UT
	        INNER JOIN CLE_Reporting.[user].CLV_Fund F 
			ON F.Fund_Code = UT.[Fund_Code]
			WHERE Transaction_Type = 'Funds Switch/Shift'
			AND	((F.Asset_Type = 'UWP-Fonds' AND UWP_Type_Indicator = 'S') OR (F.Asset_Type <> 'UWP-Fonds' AND UWP_Type_Indicator = 'A')) 
			AND Reverse_Indicator = 'N') FS_LIST
ON FS_LIST.[Policy_Number] = PC.Policy_Number
WHERE FS_List.Processing_Date > '2018-12-31'
ORDER BY Processing_Date,Policy_Number
    """
    return pd.read_sql(query_201, conn)

# Call the SQL process to retrieve the Paxus (old system) fund switch data.
df_All_iSuite_Switches = Get_All_iSuite_Switches(SQL_server, Reporting_database)

# Types and row count
print(len(df_All_iSuite_Switches.index))
print(df_All_iSuite_Switches.dtypes)

# Write out to a comma separated values file.
filename = input_path + '\All_iSuite_Switches.csv'
df_All_iSuite_Switches.to_csv(filename, encoding='utf-8', index=False)      
print(filename)

# Turn off dislays to protect PII
#df_All_iSuite_Switches.head(10)

Error: ('HY000', '[HY000] [Microsoft][ODBC Driver 17 for SQL Server]SQL Server Network Interfaces: The system cannot contact a domain controller to service the authentication request. Please try again later.\r\n (-2146892976) (SQLDriverConnect); [HY000] [Microsoft][ODBC Driver 17 for SQL Server]Cannot generate SSPI context (-2146892976); [HY000] [Microsoft][ODBC Driver 17 for SQL Server]SQL Server Network Interfaces: The system cannot contact a domain controller to service the authentication request. Please try again later.\r\n (-2146892976); [HY000] [Microsoft][ODBC Driver 17 for SQL Server]Cannot generate SSPI context (-2146892976)')

# Convert Paxus Cobes to Product Name

In [7]:
def Get_Product_Mapping(server, database):
    conn = pyodbc.connect(
        'DRIVER={ODBC Driver 17 for SQL Server};'
        'SERVER='+server+';'
        'DATABASE='+database+';'
        'Trusted_Connection=yes;'
        )

    query_201 = "Select * FROM CLT_Product_Mapping"
    return pd.read_sql(query_201, conn)

# Call the SQL to get Paxus to iSuite product mappings.
df_Product_Mapping = Get_Product_Mapping(SQL_server, Reference_Data_database)  

# Strip all blanks.
df_Product_Mapping = df_Product_Mapping.applymap(lambda x: " ".join(x.split()) if isinstance(x, str) else x)

# Turn off dislays to protect PII
#df_Product_Mapping.head(99)

NameError: name 'Reference_Data_database' is not defined

## Write out to a comma separated values file.

In [8]:
# Write out to a comma separated values file.
filename = input_path + '\df_Product_Mapping.csv'
df_Product_Mapping.to_csv(filename, encoding='utf-8', index=False)       
print(filename)

# Turn off dislays to protect PII
#df_Product_Mapping.head(10)

NameError: name 'df_Product_Mapping' is not defined

## Join the Paxus switch to teh Paxus product mapping to make it have the same mapping as iSuite.

In [9]:
# Strip the blanks from the product to allow them to join.
df_All_Paxus_Switches['PRODUCT'] = df_All_Paxus_Switches['PRODUCT'].str.strip()

# Join them
df_All_Paxus_Switches_With_Product_Name = pd.merge(df_All_Paxus_Switches, df_Product_Mapping, left_on = ['PRODUCT'], right_on = ['Cobe_Code'] , how = 'left') 

# Turn off dislays to protect PII
#df_All_Paxus_Switches_With_Product_Name.head(5)

NameError: name 'df_All_Paxus_Switches' is not defined

## Clean the data and write to file.

In [10]:
# Remove unused columns after merge.
df_All_Paxus_Switches_With_Product_Name = df_All_Paxus_Switches_With_Product_Name.drop(['PRODUCT','Cobe_Code'], axis=1)

# Rename columns to match.
df_All_Paxus_Switches_With_Product_Name = df_All_Paxus_Switches_With_Product_Name.rename(columns={"Product_Code":"PRODUCT"})
df_All_Paxus_Switches_With_Product_Name = df_All_Paxus_Switches_With_Product_Name.rename(columns={"Source"      :"SOURCE"})

# Reorder the data before the merge.
neworder = ['POLICY_NUMBER','LEGACY_FUND_CODE','PRODUCT','PROCESSING_DATE','EFFECTIVE_DATE','UNIT_TYPE','CHARGE_TYPE','CASH_VALUE','RISK_COMMENCEMENT_DATE','Servicing_Broker_Code','Issue_Date','Cover_End_Date','Anniversary_Date','SOURCE']
df_All_Paxus_Switches_With_Product_Name=df_All_Paxus_Switches_With_Product_Name.reindex(columns=neworder)

# Example 1: Convert datetype to string
df_All_Paxus_Switches_With_Product_Name['PROCESSING_DATE']=df_All_Paxus_Switches_With_Product_Name['PROCESSING_DATE'].astype(str)
df_All_Paxus_Switches_With_Product_Name['EFFECTIVE_DATE']=df_All_Paxus_Switches_With_Product_Name['EFFECTIVE_DATE'].astype(str)
df_All_Paxus_Switches_With_Product_Name['RISK_COMMENCEMENT_DATE']=df_All_Paxus_Switches_With_Product_Name['RISK_COMMENCEMENT_DATE'].astype(str)
df_All_Paxus_Switches_With_Product_Name['Issue_Date']=df_All_Paxus_Switches_With_Product_Name['Issue_Date'].astype(str)
df_All_Paxus_Switches_With_Product_Name['Cover_End_Date']=df_All_Paxus_Switches_With_Product_Name['Cover_End_Date'].astype(str)
df_All_Paxus_Switches_With_Product_Name['Anniversary_Date']=df_All_Paxus_Switches_With_Product_Name['Anniversary_Date'].astype(str)


# Types and row count
print(len(df_All_Paxus_Switches_With_Product_Name.index))
print(df_All_Paxus_Switches_With_Product_Name.dtypes)

# Write out to a comma separated values file.
filename = input_path + '\All_Paxus_Switches_With_Product_Name.csv'
print(filename)
df_All_Paxus_Switches_With_Product_Name.to_csv(filename, encoding='utf-8', index=False)        

# Turn off dislays to protect PII
#df_All_Paxus_Switches_With_Product_Name.head(10)


NameError: name 'df_All_Paxus_Switches_With_Product_Name' is not defined

# Merge all switches into one dataframe and write to file.

In [12]:
# Merge the Paxus and iSuite switch data.
df_All_Switches= df_All_iSuite_Switches.append(df_All_Paxus_Switches_With_Product_Name)

# Soryt by policy number and processing date.
df_All_Switches = df_All_Switches.sort_values(['PROCESSING_DATE','POLICY_NUMBER','SOURCE']).reset_index(drop=False)

# Types and row count
print(len(df_All_Switches.index))
print(df_All_Switches.dtypes)

# Write out to a comma separated values file.
filename = input_path + '\All_Switches.csv'
df_All_Switches.to_csv(filename, encoding='utf-8', index=False)       
print(filename)

# Turn off dislays to protect PII
#df_All_Switches.head(10)

NameError: name 'df_All_iSuite_Switches' is not defined

In [13]:
%who_ls DataFrame 

[]

In [14]:
del df_All_Paxus_Switches
del df_All_Paxus_Switches_With_Product_Name
del df_All_Switches
del df_All_iSuite_Switches
del df_Product_Mapping
del df_iSuite_Product_list
del df_main



NameError: name 'df_All_Paxus_Switches' is not defined

In [15]:
print('==========================')
now = datetime.now()
print(now)
print('==========================')
print(psutil.virtual_memory())
print('==========================')

2024-08-05 22:09:57.197903
svmem(total=16756752384, available=8700919808, percent=48.1, used=8055832576, free=8700919808)
