# Voting Influence and Concentration Analysis (VICA)

## Process Data

In [1]:
import pandas as pd
from data_processor import process_data

# # Process from raw data (if raw data has been updated)
# df_token_house_top_200, df_citizen_house, df_summary = process_data()
# df_token_house_top_200.to_csv('../data/token_house.csv')
# df_citizen_house.to_csv('../data/citizen_house.csv')
# df_summary.to_csv('../data/summary.csv')

# Use processed data from 2024-07-01
df_token_house_top_200 = pd.read_csv('../data/token_house.csv')
df_citizen_house = pd.read_csv('../data/citizen_house.csv')
df_summary = pd.read_csv('../data/summary.csv')

## Run VICA

In [None]:
from vica import *

# further process data and convert to R
(prop_token_filter, prop_cit_filter,prop_token_filter_add, prop_cit_filter_add) = process_data_and_convert_to_r(df_token_house_top_200, df_citizen_house, df_summary)
# run VICA
results_dict = run_vica_all(prop_token_filter, prop_cit_filter,prop_token_filter_add, prop_cit_filter_add)
# Get states
df_stats = run_stats(results_dict)


## Save Results and Upload to Dune

In [3]:
results_output_file_path = '../results/results.csv'

df_stats.to_csv(results_output_file_path,mode='a',header=not pd.io.common.file_exists(results_output_file_path),index=False)


In [4]:
from dune_uploader import upload_data

upload_data(results_output_file_path)

## Explore and Expirement

In [9]:
results_dict['token_house'].keys()

dict_keys(['similarity_matrix', 'community_detection', 'louvain_member', 'logistic_regression'])

In [8]:
t = results_dict['token_house']['logistic_regression']['factual']['adjusted_logit_effect']
t = t[~np.isnan(t)]
np.var(t)

np.float64(70.77467641365277)

In [None]:
import numpy as np
import matplotlib.pyplot as plt
num_points=1
# Simulate additional data points for various entities, scenario types, and regression types
entities = ['token_house', 'citizen_house', 'both_houses']
scenario_types = ['factual']
regression_types = ['logit_effect', 'adjusted_logit_effect']
dates = df_stats.Date.unique()

# Convert to a DataFrame
df_simulated =  df_stats

# Plot the variance over time for each combination of Entity, Scenario_Type, and Regression_Type
plt.figure(figsize=(12, 8))

# Plotting for each unique combination
for entity in entities:
    for scenario in scenario_types:
        for regression in regression_types:
            subset = df_simulated[(df_simulated['Entity'] == entity) &
                                  (df_simulated['Scenario_Type'] == scenario) &
                                  (df_simulated['Regression_Type'] == regression) &
                                  (df_simulated['Statistic'] == 'Variance')]
            if not subset['Value'].empty:
                if subset['Value'].values[0]!=0:
                    plt.plot(subset['Date'], subset['Value'],  marker='o', markersize=8, label=f'{entity}, {scenario}, {regression}')

# Customize the plot
plt.title('Variance Over Time by Entity, Scenario_Type, and Regression_Type')
plt.xlabel('Date')
plt.ylabel('Variance')
plt.xticks(rotation=45)
plt.legend(bbox_to_anchor=(1.05, 1), loc='upper left', fontsize='small')
plt.tight_layout()
plt.show()
