In [30]:
import numpy as np
import json
import pandas as pd
import matplotlib.pyplot as plt

# Ingest data
properties_data = []

## Read the file line by line
with open('MixPanel_export-1691969806891.txt', 'r') as f:
    for line in f:
        # Load the JSON data from each line
        json_line = json.loads(line)
        # Add the JSON data to the list
        properties_data.append(json_line)

df = pd.json_normalize(properties_data)

## Select the required columns
df = df[['properties.sellAmountUsd', 'properties.sellAssetId', 'properties.buyAssetId', 'properties.quoteMeta', 'properties.version', 'properties.distinct_id']]

## Cleanup the nested headers
df = df.rename(columns=lambda x: x.replace('properties.', ''))

# Use the latest data schema and bug fixes
df = df.query('version == "20230823"')

# Filter duplicate quotes with the same input args from the same user (i.e. polling qoute updates)
columns_to_check = ['sellAmountUsd', 'sellAssetId', 'buyAssetId', 'distinct_id']
df = df.drop_duplicates(subset=columns_to_check, keep='first')

# Handle infinite ratio bug, manifesting as "1" in the data, by removing these qoutes
# TODO

# Create a new DataFrame containing only EVM trades, with filtered rows where sellAssetId and buyAssetId start with eip155
df_evm = df.loc[(df['sellAssetId'].str.startswith('eip155')) & (df['buyAssetId'].str.startswith('eip155'))].copy()


# Extract prefix before ":" and compare for sellAssetId and buyAssetId in the new DataFrame
df_evm.loc[:, 'sellAssetPrefix'] = df_evm['sellAssetId'].str.split('/').str[0]
df_evm.loc[:, 'buyAssetPrefix'] = df_evm['buyAssetId'].str.split('/').str[0]

df_evm_same_chain = df_evm[df_evm['sellAssetPrefix'] == df_evm['buyAssetPrefix']]


## Log the DF schema as a sanity check
df_evm_same_chain.info()

<class 'pandas.core.frame.DataFrame'>
Index: 351 entries, 30 to 2491
Data columns (total 8 columns):
 #   Column           Non-Null Count  Dtype 
---  ------           --------------  ----- 
 0   sellAmountUsd    351 non-null    object
 1   sellAssetId      351 non-null    object
 2   buyAssetId       351 non-null    object
 3   quoteMeta        351 non-null    object
 4   version          351 non-null    object
 5   distinct_id      351 non-null    object
 6   sellAssetPrefix  351 non-null    object
 7   buyAssetPrefix   351 non-null    object
dtypes: object(8)
memory usage: 24.7+ KB


In [28]:
# Log a row for reference

# Get the first row of the DataFrame as a dict
first_row_dict = df_evm_same_chain.iloc[30].to_dict()

# Convert the dict to a formatted JSON string
formatted_json = json.dumps(first_row_dict, indent=4)

# Print the formatted JSON
print(formatted_json)

{
    "sellAmountUsd": "0.52100000000000082018",
    "sellAssetId": "eip155:1/slip44:60",
    "buyAssetId": "eip155:1/erc20:0xc770eefad204b5180df6a14ee197d99d808ee52d",
    "quoteMeta": [
        {
            "differenceFromBestQuoteDecimalPercentage": 0,
            "quoteReceived": true,
            "swapperName": "LI.FI"
        },
        {
            "differenceFromBestQuoteDecimalPercentage": 1,
            "quoteReceived": true,
            "swapperName": "0x"
        },
        {
            "differenceFromBestQuoteDecimalPercentage": null,
            "quoteReceived": false,
            "swapperName": "1INCH"
        },
        {
            "differenceFromBestQuoteDecimalPercentage": null,
            "quoteReceived": false,
            "swapperName": "CoW Swap"
        },
        {
            "differenceFromBestQuoteDecimalPercentage": null,
            "quoteReceived": false,
            "swapperName": "Osmosis"
        },
        {
            "differenceFromBestQuoteDe