In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import json

# Load Data
input_path = 'data/user-transactions.json'

with open(json_path, 'r') as f:
    data = json.load(f)

df = pd.DataFrame(data)
print(f"Total Transactions: {len(df)}")
df.head()

# Action Distribution
plt.figure(figsize=(8,5))
sns.countplot(x='action', data=df)
plt.title('Transaction Action Distribution')
plt.xticks(rotation=45)
plt.show()

# Feature Engineering
from datetime import datetime

def extract_features(df):
    df['timestamp'] = pd.to_datetime(df['timestamp'], unit='s')
    grouped = df.groupby('user')

    features = []
    for user, group in grouped:
        actions = group['action'].value_counts().to_dict()
        total_tx = len(group)
        active_days = (group['timestamp'].max() - group['timestamp'].min()).days + 1
        repay_count = actions.get('repay', 0)
        borrow_count = actions.get('borrow', 0)
        repay_borrow_ratio = repay_count / borrow_count if borrow_count else 0
        liquidation_count = actions.get('liquidationcall', 0)
        deposit_count = actions.get('deposit', 0)

        features.append({
            'user': user,
            'total_tx': total_tx,
            'repay_borrow_ratio': repay_borrow_ratio,
            'liquidation_count': liquidation_count,
            'deposit_count': deposit_count,
            'borrow_count': borrow_count,
            'active_days': active_days,
            'action_variety': len(actions),
        })
    return pd.DataFrame(features)

features_df = extract_features(df)
features_df.head()

# Feature Distributions
fig, axs = plt.subplots(2, 3, figsize=(16,8))
sns.histplot(features_df['repay_borrow_ratio'], ax=axs[0,0], kde=True)
axs[0,0].set_title('Repay/Borrow Ratio')
sns.histplot(features_df['liquidation_count'], ax=axs[0,1], kde=False)
axs[0,1].set_title('Liquidation Count')
sns.histplot(features_df['total_tx'], ax=axs[0,2], kde=True)
axs[0,2].set_title('Total Transactions')
sns.histplot(features_df['active_days'], ax=axs[1,0], kde=True)
axs[1,0].set_title('Active Days')
sns.histplot(features_df['action_variety'], ax=axs[1,1], kde=True)
axs[1,1].set_title('Action Variety')
axs[1,2].axis('off')
plt.tight_layout()
plt.show()

# Correlation Heatmap
plt.figure(figsize=(8,6))
sns.heatmap(features_df.drop(columns=['user']).corr(), annot=True, cmap='coolwarm')
plt.title('Feature Correlation Heatmap')
plt.show()
