In [7]:
from datetime import datetime
import json

def transaction_transformations(transactions):
    transformed_data = []

    for transaction in transactions:
        # Round the amount
        amount = round(transaction["amount"], 2)

        # Format date_time to be compatible with Hive timestamp or datetime format
        date_time_str = transaction["date_time"]
        formatted_date_time = datetime.strptime(date_time_str, "%Y-%m-%dT%H:%M:%S.%f").strftime("%Y-%m-%d %H:%M:%S")

        # unify currency"USD", "EUR", "GBP" to "USD"
        if transaction["currency"] in ["EUR", "GBP"]:
            if transaction["currency"] == "EUR":
                amount = round(amount * 1.2, 2)
            elif transaction["currency"] == "GBP":
                amount = round(amount * 1.4, 2)
            transaction["currency"] = "USD"
        # Create a dictionary with the transformed data
        transformed_transaction = {
            "transaction_id": transaction["transaction_id"],
            "date_time": formatted_date_time,
            "amount": amount,
            "currency": transaction["currency"],
            "merchant_details": transaction["merchant_details"],
            "customer_id": transaction["customer_id"],
            "transaction_type": transaction["transaction_type"],
            "location": transaction["location"]
        }

        transformed_data.append(transformed_transaction)

    return transformed_data

# Example usage:
original_transactions = [
    {
        "transaction_id": "T1",
        "date_time": "2023-11-30T12:30:45.123456",
        "amount": 123.456,
        "currency": "USD",
        "merchant_details": "MerchantA",
        "customer_id": "C001",
        "transaction_type": "purchase",
        "location": "City1"
    },
    {
        "transaction_id": "T2",
        "date_time": "2023-11-30T12:30:45.123456",
        "amount": 100.000,
        "currency": "EUR",
        "merchant_details": "MerchantB",
        "customer_id": "C002",
        "transaction_type": "purchase",
        "location": "City2"
    },
]

transformed_transactions = transaction_transformations(original_transactions)
for transaction in transformed_transactions:
    print(json.dumps(transaction, indent=4))


{
    "transaction_id": "T1",
    "date_time": "2023-11-30 12:30:45",
    "amount": 123.46,
    "currency": "USD",
    "merchant_details": "MerchantA",
    "customer_id": "C001",
    "transaction_type": "purchase",
    "location": "City1"
}
{
    "transaction_id": "T2",
    "date_time": "2023-11-30 12:30:45",
    "amount": 120.0,
    "currency": "USD",
    "merchant_details": "MerchantB",
    "customer_id": "C002",
    "transaction_type": "purchase",
    "location": "City2"
}


In [4]:
def customers_transformation(customers):
    transformed_data = []

    for customer in customers:
        # Flatten the account history list into one string
        account_history_str = ",".join(customer["account_history"])

        # Split demographics into two variables (age and location)
        age = customer["demographics"]["age"]
        location = customer["demographics"]["location"]

        # Merge behavioral_patterns and avg_transaction_value into one variable
        behavioral_pattern_avg = customer["behavioral_patterns"]["avg_transaction_value"]

        # Create a dictionary with the transformed data
        transformed_customer = {
            "customer_id": customer["customer_id"],
            "account_history": account_history_str,
            "age": age,
            "location": location,
            "behavioral_pattern_avg": behavioral_pattern_avg
        }

        transformed_data.append(transformed_customer)

    return transformed_data

# Example usage:
original_customers = [
    {
        "customer_id": "C001",
        "account_history": ["T1", "T2", "T3"],
        "demographics": {"age": 30, "location": "City1"},
        "behavioral_patterns": {"avg_transaction_value": 250.75}
    },
    {
        "customer_id": "C002",
        "account_history": ["T4", "T5"],
        "demographics": {"age": 25, "location": "City2"},
        "behavioral_patterns": {"avg_transaction_value": 100.00}
    },
]

transformed_customers = customers_transformation(original_customers)
for customer in transformed_customers:
    print(json.dumps(customer, indent=4))

{
    "customer_id": "C001",
    "account_history": "T1,T2,T3",
    "age": 30,
    "location": "City1",
    "behavioral_pattern_avg": 250.75
}
{
    "customer_id": "C002",
    "account_history": "T4,T5",
    "age": 25,
    "location": "City2",
    "behavioral_pattern_avg": 100.0
}
