In [1]:


import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

# Load the component data with balances
components_df = pd.read_csv("../Analysis/iota_h0_h1_components_with_balance_removing_0Values.csv")

# Exclude component 0
components_df = components_df[components_df['component'] != 0]

# Extract the balance data
balances = components_df['total_balance'].values

# Function to calculate the Gini coefficient
def gini_coefficient(x):
    # Mean absolute difference
    mad = np.abs(np.subtract.outer(x, x)).mean()
    # Relative mean absolute difference
    rmad = mad / np.mean(x)
    # Gini coefficient
    gini = 0.5 * rmad
    return gini

# Calculate the Gini coefficient for the balances
gini = gini_coefficient(balances)
print(f"Gini Coefficient for Balance Distribution (excluding component 0): {gini}")

# Plot the balance distribution
plt.figure(figsize=(10, 6))
plt.hist(balances, bins=50, edgecolor='black', log=True)
plt.title("Balance Distribution for Components (excluding component 0)")
plt.xlabel("Balance")
plt.ylabel("Frequency (log scale)")
plt.grid(True)
plt.show()

# Plot the Lorenz curve
def lorenz_curve(x):
    sorted_x = np.sort(x)
    cumulative_x = np.cumsum(sorted_x)
    cumulative_x = np.insert(cumulative_x, 0, 0)
    cumulative_x = cumulative_x / cumulative_x[-1]
    lorenz_x = np.linspace(0, 1, len(cumulative_x))
    return lorenz_x, cumulative_x

lorenz_x, lorenz_y = lorenz_curve(balances)

plt.figure(figsize=(10, 6))
plt.plot(lorenz_x, lorenz_y, drawstyle='steps-post', label='Lorenz Curve')
plt.plot([0, 1], [0, 1], linestyle='--', color='k', label='Line of Equality')
plt.title("Lorenz Curve for Balance Distribution (excluding component 0)")
plt.xlabel("Cumulative Share of Components")
plt.ylabel("Cumulative Share of Balance")
plt.legend()
plt.grid(True)
plt.show()




FileNotFoundError: [Errno 2] No such file or directory: '../Analysis/iota_h0_h1_components_with_balance_removing_0Values.csv'

In [None]:


import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import sqlite3

# Connect to the SQLite database
conn = sqlite3.connect('iotaDB.db')

# Query the transaction data
query = "SELECT timestamp, input_addresses_x, input_amounts_x, output_addresses_y, output_amounts_y FROM Transactions"
transactions_df = pd.read_sql_query(query, conn)

# Close the database connection
conn.close()

# Convert the timestamp to datetime
transactions_df['timestamp'] = pd.to_datetime(transactions_df['timestamp'])

# Function to extract addresses and balances
def extract_balances(df):
    balances = []
    for _, row in df.iterrows():
        input_addresses = eval(row['input_addresses_x'])
        input_amounts = eval(row['input_amounts_x'])
        output_addresses = eval(row['output_addresses_y'])
        output_amounts = eval(row['output_amounts_y'])
        
        for addr, amt in zip(input_addresses, input_amounts):
            balances.append({'address': addr, 'balance': -amt, 'timestamp': row['timestamp']})
        for addr, amt in zip(output_addresses, output_amounts):
            balances.append({'address': addr, 'balance': amt, 'timestamp': row['timestamp']})
    
    return pd.DataFrame(balances)

# Extract the balances from the transactions
balances_df = extract_balances(transactions_df)

# Remove negative and zero balances
positive_balances_df = balances_df[balances_df['balance'] > 0]

# Resample the data to monthly frequency
monthly_balances = positive_balances_df.set_index('timestamp').groupby([pd.Grouper(freq='M'), 'address'])['balance'].sum().reset_index()

# Function to calculate the Gini coefficient using a more memory efficient method
def gini_coefficient(x):
    x = np.sort(x) # values must be sorted
    n = len(x)
    cumx = np.cumsum(x, dtype=float)
    return (n + 1 - 2 * np.sum(cumx) / cumx[-1]) / n

# Calculate Gini coefficient for each month
monthly_gini = monthly_balances.groupby('timestamp')['balance'].apply(gini_coefficient).reset_index()

# Plot the Gini coefficient over time
plt.figure(figsize=(10, 6))
plt.plot(monthly_gini['timestamp'], monthly_gini['balance'], label='Gini Coefficient')
plt.title("Gini Coefficient Over Time")
plt.xlabel("Time")
plt.ylabel("Gini Coefficient")
plt.legend()
plt.grid(True)
plt.show()

# Plot the Lorenz curve
def lorenz_curve(x):
    sorted_x = np.sort(x)
    cumulative_x = np.cumsum(sorted_x)
    cumulative_x = np.insert(cumulative_x, 0, 0)
    cumulative_x = cumulative_x / cumulative_x[-1]
    lorenz_x = np.linspace(0, 1, len(cumulative_x))
    return lorenz_x, cumulative_x

lorenz_x, lorenz_y = lorenz_curve(positive_balances_df['balance'])

plt.figure(figsize=(10, 6))
plt.plot(lorenz_x, lorenz_y, drawstyle='steps-post', label='Lorenz Curve')
plt.plot([0, 1], [0, 1], linestyle='--', color='k', label='Line of Equality')
plt.title("Lorenz Curve for Balance Distribution")
plt.xlabel("Cumulative Share of Addresses")
plt.ylabel("Cumulative Share of Balance")
plt.legend()
plt.grid(True)
plt.show()




Analysis and Interpretation of the Gini Coefficient Over Time
Introduction
The Gini coefficient is a measure of statistical dispersion intended to represent the income or wealth inequality within a nation or a group. It ranges from 0 to 1, where 0 indicates perfect equality (everyone has the same wealth or income) and 1 indicates maximal inequality (one person has all the wealth or income, and all others have none). In the context of the IOTA transaction dataset, the Gini coefficient has been calculated to understand the distribution of wealth among addresses over time.

Data Preparation
The analysis involves the following key steps:

Data Extraction: Transaction data was extracted from the iotaDB.db database, focusing on input and output addresses and their corresponding amounts.
Data Cleaning: Addresses with zero or negative balances were filtered out to ensure that only active, positive balances were considered.
Monthly Aggregation: The balance data was aggregated on a monthly basis to observe trends over time.
Gini Coefficient Calculation: The Gini coefficient was computed for each month's balance distribution, providing a snapshot of wealth inequality at each point in time.
Findings
The plot of the Gini coefficient over time reveals several key trends:

General Trend:

The Gini coefficient remains consistently high throughout the observed period, indicating a significant level of inequality in the wealth distribution among IOTA addresses.
The values are close to 1, suggesting that a small number of addresses hold a disproportionately large share of the total wealth.
Initial Increase (May 2021 to September 2021):

The Gini coefficient shows a steep increase from May 2021 to September 2021. This could indicate that during this period, wealth became more concentrated in fewer addresses.
Possible reasons for this trend could include large transactions being directed to a few addresses, new wealth accumulation in specific addresses, or the movement of funds from many smaller addresses to fewer larger ones.
Stabilization and Slight Fluctuations (October 2021 to January 2022):

From October 2021 to January 2022, the Gini coefficient stabilizes with slight fluctuations. This period indicates a relative steadiness in the distribution of wealth, although at a high level of inequality.
This stabilization could suggest that the major addresses maintaining their wealth distribution without significant new wealth entering the network or redistribution occurring.
Decline (February 2022 to April 2022):

A noticeable decline in the Gini coefficient is observed starting in February 2022 and continuing through April 2022. This indicates a slight decrease in inequality.
The reasons for this decrease could include redistribution of wealth, with some large addresses transferring wealth to a larger number of smaller addresses. It could also suggest new, smaller addresses accumulating more wealth over this period.
Interpretation
The consistently high Gini coefficient suggests that the IOTA network has a significant concentration of wealth, with a few addresses holding a large portion of the total funds. This pattern is typical in many financial networks, where wealth accumulation follows a power-law distribution, leading to a small number of "rich" nodes (addresses) and many "poor" nodes.

The initial increase in the Gini coefficient may indicate periods of wealth concentration, possibly due to large investors consolidating their holdings. The later decline suggests some redistribution, possibly as larger addresses distribute funds or smaller addresses begin to accumulate wealth.

Overall, this analysis provides valuable insights into the wealth distribution within the IOTA network. High levels of inequality may have implications for the stability and security of the network, as well as for the behavior and motivation of participants. Understanding these dynamics is crucial for network governance and for designing mechanisms to promote a more equitable distribution of wealth.

Future Work
Further analysis could include:


Network Analysis: Investigating the network structure to identify key nodes and their roles in wealth distribution.
Temporal Analysis: Examining shorter time intervals to capture more granular changes in wealth distribution.
Comparison with Other Networks: Comparing the wealth distribution in IOTA with other cryptocurrency networks to identify unique patterns or common trends.
This comprehensive approach will provide deeper insights into the dynamics of wealth distribution and its implications for the IOTA network.