In [1]:
import sqlite3
import pandas as pd
import numpy as np
import json

# Connect to the SQLite database
conn = sqlite3.connect('iotaDB.db')
cursor = conn.cursor()

# Fetching data from the database
query = """
SELECT input_addresses_x, input_amounts_x, output_addresses_y, output_amounts_y FROM Transactions
"""
cursor.execute(query)
transactions = cursor.fetchall()

# Close the database connection
conn.close()

# Normalize data and create a DataFrame for input and output separately
inputs = []
outputs = []

for trans in transactions:
    input_addresses = json.loads(trans[0])
    input_amounts = json.loads(trans[1])
    output_addresses = json.loads(trans[2])
    output_amounts = json.loads(trans[3])
    
    for address, amount in zip(input_addresses, input_amounts):
        inputs.append({'address': address, 'amount': amount})
    
    for address, amount in zip(output_addresses, output_amounts):
        outputs.append({'address': address, 'amount': amount})

df_inputs = pd.DataFrame(inputs)
df_outputs = pd.DataFrame(outputs)

# Aggregate the sums of amounts per address for inputs and outputs
agg_inputs = df_inputs.groupby('address')['amount'].sum()
agg_outputs = df_outputs.groupby('address')['amount'].sum()

# Calculate net balances if needed (input - output)
# Here, combining them and calculating the net effect per address
all_addresses = pd.concat([agg_inputs, -agg_outputs]).groupby(level=0).sum()

# Sort the data by amount and normalize
sorted_balances = all_addresses.sort_values()
cumulative_values = (sorted_balances / sorted_balances.sum()).cumsum()

# Calculate the Gini coefficient
cumulative_base = np.arange(1, len(cumulative_values) + 1) / len(cumulative_values)
gini = (cumulative_base - cumulative_values).sum() / len(cumulative_values)
gini_coefficient = 1 - 2 * gini

print("Gini Coefficient for Net Balances:", gini_coefficient)


Gini Coefficient for Net Balances: 2.0417183619000294
