In [1]:
import pandas as pd
import os
import glob
import string
import random
import psycopg2
from sqlalchemy import create_engine


# 0. Define Random String Generator

In [2]:
def random_string_generator(length, character_set=string.ascii_uppercase+' '):
    return ''.join(random.choices(character_set, k=length))

# 1. Load Data

In [3]:
# list all data available
dataDir = os.getcwd().replace('Script','Data')
files = os.listdir(dataDir)
fileDir = [f'{dataDir}\\{file}' for file in files]

In [4]:
data = pd.read_csv(fileDir[0])

In [5]:
# drop account number and CIF NO
data.drop(['ACCT_NO','GCIF_NO'],inplace=True,axis=1)

In [6]:
# container for new dummy Agree_ID
AGREE_ID = data[['AGREE_ID']].drop_duplicates()

In [7]:
# generate dummy AGREE_ID
dummAgreeID= []
uniqueDummAgreID= ()
dummLength= len(AGREE_ID.index)
uniqueDummLength= 0
while dummLength!= uniqueDummLength:
    for i in range(dummLength-uniqueDummLength):
        dummGen= random_string_generator(16,string.ascii_uppercase+string.digits)
        dummAgreeID.append(dummGen)
    uniqueDummAgreeID= set(dummAgreeID)
    uniqueDummLength= len(uniqueDummAgreeID)
    dummAgreeID=list(uniqueDummAgreeID)
AGREE_ID['AGREE_DUMM']= dummAgreeID

In [8]:
CIF = data[['CIF_NO','GCIF_NAME']].drop_duplicates()

In [9]:
# generate CIF with dummy
dummCIF= []
uniqueDummAgreID= ()
dummLength= len(CIF.index)
uniqueDummLength= 0
while dummLength!= uniqueDummLength:
    for i in range(dummLength-uniqueDummLength):
        dummGen= random_string_generator(10,string.digits)
        dummCIF.append(dummGen)
    uniqueDummCIF= set(dummCIF)
    uniqueDummLength= len(uniqueDummCIF)
    dummCIF=list(uniqueDummCIF)
CIF['CIF_DUMM']= dummCIF

In [10]:
## generate NAME with dummy
dummNAME= []
uniqueDummAgreID= ()
dummLength= len(CIF.index)
uniqueDummLength= 0
while dummLength!= uniqueDummLength:
    for i in range(dummLength-uniqueDummLength):
        dummGen= random_string_generator(random.randint(5, 20),string.ascii_uppercase+' ')
        dummNAME.append(dummGen)
    uniqueDummNAME= set(dummNAME)
    uniqueDummLength= len(uniqueDummNAME)
    dummNAME=list(uniqueDummNAME)
CIF['NAME_DUMM']= dummNAME

In [11]:
data=data.merge(AGREE_ID, on='AGREE_ID', how='left')
data=data.merge(CIF, on=['CIF_NO', 'GCIF_NAME'], how='left')

In [12]:
data.drop(['AGREE_ID','CIF_NO','GCIF_NAME'],inplace=True, axis=1)

In [13]:
# fixing column names
data.columns= ['BASE_DT', 'BASE_DT_PARSED', 'BASE_YM', 'FLAG', 'REGION', 'AREA',
       'BRANCH', 'CUST_TYPE', 'PROD_NM', 'SUB_PROD_NM', 'SEGMENT', 'PROD_TYPE',
       'CURR_CODE', 'COLT', 'RATE_DPK', 'BASE_AMT_FIX', 'MTD_AVG_AMT_FIX',
       'DTD', 'MTD', 'YTD', 'DIVISION', 'SOURCE', 'SEGMENT_FIX',
       'BASE_AMT_ACCUM_MTD', 'INT_EXP_ACCUM_MTD', 'COF_MTD', 'HIGH_COF_FLAG',
       'LOB_SORT', 'CASA_TD', 'DTD_10B', 'MTD_10B', 'BLOCK', 'AGREE_ID',
       'CIF_NO', 'GCIF_NAME']

In [14]:
# reorder columns because why not
data= data[['BASE_DT', 'BASE_DT_PARSED', 'BASE_YM', 'AGREE_ID', 'FLAG', 'REGION',
       'AREA', 'BRANCH', 'CIF_NO', 'CUST_TYPE', 'PROD_NM', 'SUB_PROD_NM',
       'SEGMENT', 'GCIF_NAME', 'PROD_TYPE', 'CURR_CODE', 'COLT', 'RATE_DPK',
       'BASE_AMT_FIX', 'MTD_AVG_AMT_FIX', 'DTD', 'MTD', 'YTD', 'DIVISION',
       'SOURCE', 'SEGMENT_FIX', 'BASE_AMT_ACCUM_MTD', 'INT_EXP_ACCUM_MTD',
       'COF_MTD', 'HIGH_COF_FLAG', 'LOB_SORT', 'CASA_TD', 'DTD_10B', 'MTD_10B',
       'BLOCK']]

# 2. Save to Postgres

In [15]:
host= 'localhost'
port= '5432'
database= 'postgres'
username= 'postgres'
password= os.environ['PGPW']

In [16]:
engine = create_engine(f"postgresql://{username}:{password}@{host}:{port}/{database}")

In [20]:
data.to_sql(
    name="MASTER_FUNDING",
    con=engine,
    index=False,
    if_exists='append',
    method='multi'
)

10000