In [1]:
import os
import json
import numpy as np
import matplotlib.pyplot as plt
from typing import Union
from metrics.metrics import nclusters, pwdist, lobbyist_performance

In [2]:
basepath = f'C:/Users/leona/PycharmProjects/almondo-tweets-retrieval/model/results/balanced_budgets/2_lobbyists_bottom_right/'
filename = 'config.json'
with open(os.path.join(basepath, filename), 'r') as f:
    params = json.load(f)

In [3]:
print(params)

{'p_o': 0.01, 'p_p': 0.99, 'lambda_values': [0.8, 0.82, 0.84, 0.86, 0.88, 0.9, 0.92, 0.94, 0.96, 0.98, 1.0], 'phi_values': [0.8, 0.82, 0.84, 0.86, 0.88, 0.9, 0.92, 0.94, 0.96, 0.98, 1.0], 'T': 10000, 'n_lobbyists': 2, 'lobbyists_data': {'0': {'m': 0, 'B': 300000, 'c': 1, 'strategies': ['results\\balanced_budgets/2_lobbyists\\strategies\\300000\\strategy_100.txt', 'results\\balanced_budgets/2_lobbyists\\strategies\\300000\\strategy_114.txt', 'results\\balanced_budgets/2_lobbyists\\strategies\\300000\\strategy_93.txt', 'results\\balanced_budgets/2_lobbyists\\strategies\\300000\\strategy_118.txt', 'results\\balanced_budgets/2_lobbyists\\strategies\\300000\\strategy_48.txt', 'results\\balanced_budgets/2_lobbyists\\strategies\\300000\\strategy_58.txt', 'results\\balanced_budgets/2_lobbyists\\strategies\\300000\\strategy_53.txt', 'results\\balanced_budgets/2_lobbyists\\strategies\\300000\\strategy_146.txt', 'results\\balanced_budgets/2_lobbyists\\strategies\\300000\\strategy_62.txt', 'result

In [4]:
p_o = params['p_o']
p_p = params['p_p']
lambda_values = params['lambda_values']
phi_values = params['phi_values']
n_lobbyists = params['n_lobbyists']
nruns = params['nruns']
lobbyists_data = params['lobbyists_data']

In [5]:
def get_data(trends: dict, 
            p_o: float,
            p_p: float,
            iteration: Union[int, str] = -1, 
            kind: str = "probabilities"):
       
        """
        Args:
            trends (dict): The computed simulation trends.
            p_o (float): Probability of the optimistic model.
            p_p (float): Probability of the pessimistic model.
            iteration (int | str): The iteration number or "last" for final state (default: -1).
            values (str): The type of values to extract ("probabilities" or "weights").
        """
        #print(f"Length of trends: {len(trends)}, Iteration: {iteration}")
        if isinstance(iteration, int) and -1 <= iteration < len(trends):
            it = trends[iteration]['iteration']
            ops = np.array(list(trends[iteration]['status'].values()), dtype=float)
        else:
            raise ValueError(f"Invalid iteration index: {iteration}")

        # Compute values based on type
        if kind == 'probabilities':
            ops = p_o * ops + p_p * (1 - ops)
            ops = np.array(ops, dtype=float)
        elif kind == 'weights':
            np.array(ops, dtype=float)
        else:
            raise ValueError("`values` must be either 'probabilities' or 'weights'.")
        
        return ops, it

In [10]:
import json
import os
import numpy as np
from tqdm.notebook import tqdm  # Use tqdm for Jupyter Notebook
from scipy.stats import t  # Import stud_t for the t-test p-value calculation

kinds = ['weights', 'probabilities']

# Total iterations for tqdm
total_iterations = len(kinds) * len(params['lambda_values']) * len(params['phi_values']) * params['nruns']

with tqdm(total=total_iterations, desc="Processing", unit="iteration") as pbar:
    for kind in kinds:
        for _, (lambda_v, phi_v) in enumerate([(l, p) for l in params['lambda_values'] for p in params['phi_values']]):    
            path = os.path.join(basepath, f'{lambda_v}_{phi_v}/')  
            metrics = {
                'effective_number_clusters': [],
                'number_iterations': [],
                'average_pairwise_distance': [],
                'average_opinions': [],
                'std_opinions': [],
                'lobbyists_performance': {k: [] for k in range(n_lobbyists)}
            }
            
            avg_metrics = {
                'effective_number_clusters': {'avg': -1, 'std': -1},
                'number_iterations': {'avg': -1, 'std': -1},
                'average_pairwise_distance': {'avg': -1, 'std': -1},
                'average_opinions': {'avg': -1, 'std': -1},
                'std_opinions': {'avg': -1, 'std': -1},
                'lobbyists_performance': {k: {'avg': -1, 'std': -1} for k in range(n_lobbyists)}
            }

            for run in range(params['nruns']):
                runpath = os.path.join(path, str(run))
                json_path = os.path.join(runpath, 'status.json')

                try:
                    with open(json_path, 'r') as f:
                        trends = json.load(f)  # Try to load JSON

                except json.JSONDecodeError:
                    print(f"Skipping run {run} for ({lambda_v}, {phi_v}) due to JSON decoding error.")
                    pbar.update(1)
                    continue  # Skip this iteration
                
                except FileNotFoundError:
                    print(f"Skipping run {run} for ({lambda_v}, {phi_v}) - File not found.")
                    pbar.update(1)
                    continue  # Skip if the file doesn't exist

                ops, it = get_data(trends, p_o, p_p, kind=kind)
                
                metrics['effective_number_clusters'].append(nclusters(ops, 0.0001))
                metrics['number_iterations'].append(it)
                metrics['average_pairwise_distance'].append(pwdist(ops))
                metrics['average_opinions'].append(np.array(ops).mean())
                metrics['std_opinions'].append(np.array(ops).std())

                for id, lob in lobbyists_data.items():
                    metrics['lobbyists_performance'][int(id)].append(lobbyist_performance(ops, lob['m'], p_o, p_p))
                    
                for k, v in metrics.items():
                    if k != 'lobbyists_performance':
                        avg = np.array(v).mean()
                        std = np.array(v).std()
                        avg_metrics[k]['avg'] = avg
                        avg_metrics[k]['std'] = std
                    else:
                        for id in range(n_lobbyists):
                            avg = np.array(v[id]).mean()
                            std = np.array(v[id]).std()
                            avg_metrics[k][id]['avg'] = avg
                            avg_metrics[k][id]['std'] = std

            
                # t-test for means calculation
                p_hat = avg_metrics['average_opinions']['avg']  #average opinion across runs
                n = params['nruns']  # Sample size
                p_0 = 0.5  # Hypothesized population proportion

                # t-test
                t_stat = (p_hat - p_0) / np.sqrt((p_hat * (1 - p_hat)) / n)

                
                # Degrees of freedom
                df = n - 1

                p_value = 2 * (1 - t.cdf(abs(t_stat), df))

                # Add Z-statistic and p-value to avg_metrics
                avg_metrics['average_opinions']['t_statistic'] = t_stat
                avg_metrics['average_opinions']['p_value'] = p_value

                pbar.update(1)  

            with open(path + f'{kind}_metrics_distributions.json', 'w') as f:
                json.dump(metrics, f)
            
            with open(path + f'{kind}_average_metrics.json', 'w') as f:
                json.dump(avg_metrics, f)


Processing:   0%|          | 0/36300 [00:00<?, ?iteration/s]

  (1 - p_lob) * np.log((1 - p_lob) / (1 - opinions))
  x = asanyarray(arr - arrmean)
  rel_entropy = p_lob * np.log((p_lob) / (opinions)) + \
  rel_entropy = p_lob * np.log((p_lob) / (opinions)) + \
