In [1]:
import json
import os
import pandas as pd
import sqlite3

# Function to read JSON files
def read_json(file_path):
    if not os.path.exists(file_path):
        raise FileNotFoundError(f"The file {file_path} does not exist.")
    with open(file_path, 'r') as file:
        return json.load(file)

# Define file paths
customer_file_path = 'C:/Users/Learner_9ZH3Z104/Development/Capstone/LatestDataSet/cdw_sapp_customer.json'
branch_file_path = 'C:/Users/Learner_9ZH3Z104/Development/Capstone/LatestDataSet/cdw_sapp_branch.json'
credit_card_file_path = 'C:/Users/Learner_9ZH3Z104/Development/Capstone/LatestDataSet/cdw_sapp_credit.json'

# Read data
customers = read_json(customer_file_path)
branches = read_json(branch_file_path)
credit_cards = read_json(credit_card_file_path)

# Function to format phone numbers
def format_phone_number(x):
    if pd.isnull(x) or not isinstance(x, str) or len(x) != 10:
        return x
    return f"({x[:3]}){x[3:6]}-{x[6:]}"

# Transformation function for customers
def transform_customers(data):
    df = pd.DataFrame(data)
    df['FIRST_NAME'] = df['FIRST_NAME'].str.title()
    df['MIDDLE_NAME'] = df['MIDDLE_NAME'].str.lower()
    df['LAST_NAME'] = df['LAST_NAME'].str.title()
    df['FULL_STREET_ADDRESS'] = df['STREET_NAME'] + ", " + df['APT_NO']
    df['CUST_PHONE'] = df['CUST_PHONE'].astype(str).apply(format_phone_number)
    return df.drop_duplicates(subset=['SSN', 'CREDIT_CARD_NO'])

# Transformation function for branches
def transform_branches(data):
    df = pd.DataFrame(data)
    df['BRANCH_PHONE'] = df['BRANCH_PHONE'].astype(str).apply(format_phone_number)
    df['BRANCH_ZIP'] = df['BRANCH_ZIP'].fillna('999999')
    return df.drop_duplicates(subset=['BRANCH_CODE'])

# Transformation function for credit cards
def transform_credit_cards(data):
    df = pd.DataFrame(data)
    df['TIMEID'] = pd.to_datetime(df[['DAY', 'MONTH', 'YEAR']]).dt.strftime('%Y%m%d')
    return df.drop_duplicates(subset=['TRANSACTION_ID'])

# Transform data
transformed_customers = transform_customers(customers)
transformed_branches = transform_branches(branches)
transformed_credit_cards = transform_credit_cards(credit_cards)

# Load data into the database
def load_to_database(df, table_name):
    conn = sqlite3.connect('creditcard_capstone.db')
    df.to_sql(table_name, conn, if_exists='replace', index=False)
    conn.close()

load_to_database(transformed_customers, 'customers')
load_to_database(transformed_branches, 'branches')
load_to_database(transformed_credit_cards, 'credit_cards')
