In [None]:
import pandas as pd
import numpy as np
import plotly.express as px
import plotly.graph_objects as go
import hashlib


from core import constants
from core.utils import *
from core.allocator import *

log = get_logger()

pd.set_option("display.float_format", "{:.2f}".format)

## RPGF 3 Data Check and Cleanup

In [None]:
df = pd.read_csv("data/input/ballots_raw_final.csv")
# df = pd.read_csv("data/input/vote_export_final.csv")
df_project_names = pd.read_csv("data/input/project_names.csv")
df_badge_holders = pd.read_csv("data/input/rpgf3_badge_holders.csv")

In [None]:
log.info("Check - Num Ballots: " + str(df["Signature"].count()))
# log.info("Check - Num Submissions (Published): " + str(df["Has published"].sum()))

# Check if voter_address is unique
if df["Address"].nunique() == df.shape[0]:
    log.info("Check - Address is unique.")
else:
    diff = df.shape[0] - df["Address"].nunique()
    log.info(f"Check - Address is not unique. There are {diff} duplicates.")

# # Check if all voters have voted
# if df[df["Has voted"] == False].shape[0] > 0:
#     not_voted = df[df["Has voted"] == False].shape[0]
#     total = df["Address"].nunique()
#     log.info(f"Check - {not_voted} voters out of {total} have not voted.")
# else:
#     log.info("Check - All voters have voted.")

# # Check if all voters have published
# if df[df["Has published"] == False].shape[0] > 0:
#     not_voted = df[df["Has published"] == False].shape[0]
#     total = df["Address"].nunique()
#     log.info(f"Check - {not_voted} voters out of {total} have not published.")
# else:
#     log.info("Check - All voters have published.")

# Check if all address in df are in df_badge_holders

voter_validity_check = (
    df["Address"]
    .str.lower()
    .isin(df_badge_holders["ETH address for voting"].str.lower())
)

if df[voter_validity_check].shape[0] == df.shape[0]:
    log.info("Check - All addresses in df are in df_badge_holders.")
else:
    diff = df.shape[0] - df[voter_validity_check].shape[0]
    log.info(f"Check - {diff} addresses in voting are not Badge Holders.")
    # print those addresses not in df_badge_holders
    display(df[~voter_validity_check])

In [None]:
# Apply the function and concatenate results
expanded_list = [
    expand_json(safe_json_loads(row), idx) for idx, row in df["Ballot"].items()
]
expanded_df = pd.concat(expanded_list, ignore_index=True)

result_df = expanded_df.set_index("original_index").join(df.set_index(df.index))

In [None]:
# testing_address = "0x5e349eca2dc61aBCd9dD99Ce94d04136151a09Ee"
# print_df = result_df[result_df["Address"] == testing_address]
# print("Num Projects Voted : " + str(print_df["projectId"].count()))
# display(print_df.head(5))

In [None]:
columns = [col for col in result_df.columns if col not in ["amount", "projectId"]]
columns += ["amount", "projectId"]  # Add the columns to the end of the list
result_df = result_df[columns]

# Update df columns names
result_df.columns = [
    "voter_address",
    "signature",
    "ballot",
    "verification_message",
    "is_multisig",
    "amount",
    "project_id",
]

result_df.drop(columns=["ballot", "verification_message", "is_multisig"], inplace=True)

result_df["amount"] = result_df["amount"].astype(float)

In [None]:
# # result_df.head()
# result_df[result_df["voter_address"] == testing_address].head(5)

## Calculate Voting Results

In [None]:
allocator = ProjectAllocator(
    total_amount=constants.TOTAL_AMOUNT,
    min_amount=constants.MIN_AMOUNT,
    quorum=constants.QUORUM,
)

In [None]:
initial_allocation = allocator.calculate_initial_allocation(result_df)

In [None]:
display(initial_allocation.sample(10))

In [None]:
# group by is_eligible, show min and max votes count, min and max step amount
display(
    initial_allocation.groupby("is_eligible").agg(
        {
            "votes_count": ["min", "max"],
            "step_amount": ["min", "max"],
            "median_amount": ["min", "max"],
        }
    )
)

In [None]:
# Scaling the total to 30M OP by project and filter out those with < 1500 OP
allocation_iter = initial_allocation[initial_allocation["is_eligible"] == True].copy()
allocation_iter["scaled_amount"] = allocation_iter["median_amount"]
# display(allocation_iter)
# Set a maximum number of iterations to prevent infinite loop
max_iterations = 100
current_iteration = 0

while (
    round(allocation_iter["scaled_amount"].sum()) != constants.TOTAL_AMOUNT
    and current_iteration < max_iterations
):
    allocation_iter = allocator.scale_allocations(allocation_iter, "step_amount")
    current_iteration += 1

    log.info("Check - Current iteration: " + str(current_iteration))

In [None]:
# Check if the loop exited due to reaching max iterations

final_total = allocation_iter["scaled_amount"].sum()

if (
    current_iteration == max_iterations
    and allocation_iter["scaled_amount"].sum() != constants.TOTAL_AMOUNT
):
    log.info("Maximum iterations reached without meeting the total amount condition.")
else:
    log.info(
        f"Condition met with {final_total} OP allocated through {current_iteration} iteration(s) 🎉."
    )

In [None]:
# join the initial allocation with the final allocation, if scaled_amount is null then make it 0
final_allocation = initial_allocation.merge(
    allocation_iter["scaled_amount"],
    how="left",
    on="project_id",
).fillna({"scaled_amount": 0})

# check if the final allocation table still contains all projects.
if final_allocation.index.nunique() == result_df["project_id"].nunique():
    log.info("Check - Final allocation table has included all the projects 🎉.")
else:
    log.info(
        "Check - Final allocation table has missing projects. Printing out the missing projects below."
    )
    log.info(
        result_df[~result_df["project_id"].isin(final_allocation.index)]["project_id"]
    )

# check if the final allocation table still sums to the total amount.
if abs(final_allocation["scaled_amount"].sum() - final_total) < 0.01:
    log.info(
        "Check - Final allocation table sums to the right amount of OP: "
        + str(final_total)
        + " 🎉"
    )
else:
    log.info(
        "Check - Final allocation table does not sum to the total OP. Printing out the missing amount below."
    )
    log.info(str(final_total - final_allocation["scaled_amount"].sum()) + " OP")

In [None]:
# join with project names on project_id from result_df and Approval Attestation ID from project_name get displayName only
final_allocation = final_allocation.join(
    df_project_names.set_index("Approval Attestation ID")["displayName"],
    on="project_id",
)

# rename columns display_name to project_name
final_allocation.rename(columns={"displayName": "project_name"}, inplace=True)

In [None]:
# plot the final allocation in histogram where is eligible in bins of 1000

fig = px.histogram(
    final_allocation[final_allocation["is_eligible"] == True],
    x="scaled_amount",
    nbins=50,
)

fig.update_layout(
    xaxis_title="Scaled Amount",
    yaxis_title="Number of Projects",
    title="Final Allocation Distribution",
)

fig.show()

In [None]:
# plot projects that are not eligible by step amount and votes on the x-axis
# and the median amount on the y-axis

# Reset the index of the DataFrame so that 'project_id' becomes a column
eligible_projects = final_allocation[
    final_allocation["is_eligible"] == True
].reset_index()

fig = px.scatter(
    eligible_projects,
    x="votes_count",
    y="scaled_amount",
    color="scaled_amount",
    hover_data=["project_name"],
)

fig.update_layout(
    xaxis_title="Votes Count",
    yaxis_title="Scaled Amount",
    title="Eligible Projects by Scaled Amount and Votes",
)

fig.show()

In [None]:
# plot projects that are not eligible by step amount and votes on the x-axis
# and the median amount on the y-axis

# Reset the index of the DataFrame so that 'project_id' becomes a column
eligible_projects = final_allocation[
    final_allocation["is_eligible"] == False
].reset_index()

fig = px.scatter(
    eligible_projects,
    x="votes_count",
    y="median_amount",
    color="step_amount",
    hover_data=["project_name"],
)

fig.update_layout(
    xaxis_title="Votes Count",
    yaxis_title="Median Amount",
    title="Ineligible Projects by Median Amount and Votes",
)

fig.show()

In [None]:
display(
    final_allocation.groupby("is_eligible").agg(
        {
            "scaled_amount": ["min", "max"],
            "is_eligible": "count",
        }
    )
)

In [None]:
display(
    final_allocation[final_allocation["is_eligible"] == False]
    .sort_values(by=["median_amount", "votes_count"], ascending=False)
    .head(10)
)

## Export Results

In [None]:
# export csv
# allocation_iter.drop(columns="median_amount", inplace=True)
final_allocation.to_csv("data/output/rpgf3_allocation_final.csv")
final_allocation[final_allocation["is_eligible"] == True].to_csv(
    "data/output/rpgf3_allocation_final_eligible_only.csv"
)

log.info(f"Results saved in data/output/rpgf3_allocation_final.csv.")

In [None]:
random.seed(100)
random_ballots = df.sample(55)

# select rows in result_df where Address in random_ballots["Address"]
sampled_df = result_df[result_df["voter_address"].isin(random_ballots["Address"])]
sampled_df.to_csv("data/output/rpgf3_sampled_ballots.csv")

## Visual Exploration

In [None]:
result_df.head()

In [None]:
# result_df group by voter_address and sum amount avg votes_count

voter_df = result_df.groupby("voter_address").agg(
    {
        "amount": "sum",
        "project_id": "count",
    }
)

voter_df = voter_df.rename(columns={"project_id": "projects_in_ballot"})

voter_df.reset_index(inplace=True)

In [None]:
voter_df[voter_df["amount"] == 30000000].shape[0] / voter_df.shape[0]

In [None]:
voter_df.describe()

In [None]:
# create plotly violin chart for voter_df
fig = px.violin(
    voter_df,
    y="amount",
    box=True,
    points="all",
    hover_data=["voter_address"],
)

fig.update_layout(
    yaxis_title="Total OP Amount Allocated",
    title="Total OP Amount Distribution",
)

fig.show()

In [None]:
fig = px.violin(
    voter_df,
    y="projects_in_ballot",
    box=True,
    points="all",
    hover_data=["voter_address"],
)

fig.update_layout(
    yaxis_title="Number of Projects in Ballot",
    title="Projects in Ballot Distribution",
)

fig.show()

In [None]:
def hash_address(address):
    return hashlib.sha256(address.encode()).hexdigest()


result_df["hashed_address"] = result_df["voter_address"].apply(hash_address)

# Check for uniqueness (optional)
if result_df["hashed_address"].nunique() == result_df["voter_address"].nunique():
    print("All hashes are unique.")
else:
    print("There are duplicate hashes.")


result_df["vote_count"] = 1
# join df_project_names on project_id from result_df and Approval Attestation ID from project_name get displayName only
result_df = result_df.join(
    df_project_names.set_index("Approval Attestation ID")["displayName"],
    on="project_id",
)

result_df.drop("voter_address", axis=1, inplace=True)
result_df.to_csv("data/output/rpgf3_voter_project.csv")

In [None]:
import plotly.graph_objs as go
from plotly.subplots import make_subplots

# Data
rounds = ["RPGF1", "RPGF2", "RPGF3"]
number_of_projects = [58, 195, 501]
amount = [1000000, 10000000, 30000000]

# Create figure with secondary y-axis
fig = make_subplots(specs=[[{"secondary_y": True}]])

# Add bar chart for Number of Projects
fig.add_trace(
    go.Bar(x=rounds, y=number_of_projects, name="Number of Projects"),
    secondary_y=False,
)

# Add bar chart for Amount
# To make it appear grouped, adjust the x-axis slightly for the second bar chart
amount_x = [round_val + " " for round_val in rounds]
fig.add_trace(
    go.Bar(x=amount_x, y=amount, name="OP Amount"),
    secondary_y=True,
)

# Update layout
fig.update_layout(
    title_text="Number of Project Recipients and Amount by RPGF Round", barmode="group"
)

# Set x-axis title
fig.update_xaxes(title_text="RPGF Round")

# Set y-axes titles
fig.update_yaxes(title_text="Number of Projects", secondary_y=False)
fig.update_yaxes(title_text="OP Amount", secondary_y=True)

# Show the figure
fig.show()