In [4]:
import pandas as pd

# List of datasets to load and check
datasets = ["debit_transactions", "credit_transactions", "customer_risk_markers"]

for ds in datasets:
    file_path = f"data/{ds}.csv"
    print(f"Loading dataset: {ds}")
    try:
        df = pd.read_csv(file_path)
        print(f"{ds} - Shape: {df.shape}")
        print(df.head())  # Display the first few rows to confirm data
    except Exception as e:
        print(f"Error loading {ds}: {e}")

Loading dataset: debit_transactions
debit_transactions - Shape: (1000, 4)
   account_id transaction_type  transaction_amount            transaction_date
0          27            Debit             1680.49  2024-09-12 10:11:39.037889
1          15            Debit              913.53  2024-09-18 10:11:39.037889
2         150            Debit             4515.06  2024-10-10 10:11:39.037889
3          86            Debit             1604.86  2024-09-23 10:11:39.037889
4           7            Debit             4033.40  2024-09-11 10:11:39.037889
Loading dataset: credit_transactions
credit_transactions - Shape: (1000, 4)
   account_id transaction_type  transaction_amount            transaction_date
0          70           Credit              564.78  2024-09-29 10:11:39.037889
1         172           Credit              950.58  2024-11-01 10:11:39.037889
2          42           Credit               67.31  2024-10-07 10:11:39.037889
3          35           Credit              352.30  2024-09-

In [6]:
# Simulating combination of multiple data frames
data_frames = []
for ds in datasets:
    df = pd.read_csv(f"data/{ds}.csv")
    data_frames.append(df)

if len(data_frames) > 1:
    combined_data = pd.concat(data_frames, ignore_index=True, sort=False)
elif data_frames:
    combined_data = data_frames[0]
else:
    combined_data = pd.DataFrame()

print("Combined DataFrame:")
print(combined_data.head())  # Display combined data preview
print("Shape:", combined_data.shape)

Combined DataFrame:
   account_id transaction_type  transaction_amount  \
0        27.0            Debit             1680.49   
1        15.0            Debit              913.53   
2       150.0            Debit             4515.06   
3        86.0            Debit             1604.86   
4         7.0            Debit             4033.40   

             transaction_date  customer_id risk_marker  
0  2024-09-12 10:11:39.037889          NaN         NaN  
1  2024-09-18 10:11:39.037889          NaN         NaN  
2  2024-10-10 10:11:39.037889          NaN         NaN  
3  2024-09-23 10:11:39.037889          NaN         NaN  
4  2024-09-11 10:11:39.037889          NaN         NaN  
Shape: (2100, 6)


In [10]:
import pandas as pd

# Load individual datasets
credit_transactions = pd.read_csv("data/credit_transactions.csv")
debit_transactions = pd.read_csv("data/debit_transactions.csv")
customer_risk_markers = pd.read_csv("data/customer_risk_markers.csv")

# Preview individual datasets
print("Credit Transactions:\n", credit_transactions.head())
print("Debit Transactions:\n", debit_transactions.head())
print("Customer Risk Markers:\n", customer_risk_markers.head())

# Merge credit and debit transactions on 'account_id'
transaction_data = pd.concat([credit_transactions, debit_transactions], ignore_index=True)

# Assuming there's a mapping between `customer_id` and `account_id`, which you might need in a real case
# For this example, we'll skip this mapping step, but normally you would have something to bridge these IDs

# If customer_id is a unique identifier in both customer_risk_markers and transactions, 
# you would use that to join with the customer risk markers
# Here, I'm directly concatenating to illustrate the concept; adjust as needed in your full pipeline

# Print combined data
print("Combined Transaction Data:\n", transaction_data.head())
print("Shape of Combined Transaction Data:", transaction_data.shape)

Credit Transactions:
    account_id transaction_type  transaction_amount            transaction_date
0          70           Credit              564.78  2024-09-29 10:11:39.037889
1         172           Credit              950.58  2024-11-01 10:11:39.037889
2          42           Credit               67.31  2024-10-07 10:11:39.037889
3          35           Credit              352.30  2024-09-19 10:11:39.037889
4          19           Credit             4185.79  2024-09-25 10:11:39.037889
Debit Transactions:
    account_id transaction_type  transaction_amount            transaction_date
0          27            Debit             1680.49  2024-09-12 10:11:39.037889
1          15            Debit              913.53  2024-09-18 10:11:39.037889
2         150            Debit             4515.06  2024-10-10 10:11:39.037889
3          86            Debit             1604.86  2024-09-23 10:11:39.037889
4           7            Debit             4033.40  2024-09-11 10:11:39.037889
Customer 