In [1]:
import json
import math
import networkx as nx
import pandas as pd
import altair as alt

from generator_v2 import Generator

60 2
80 4
100 6
60 2
80 4
100 6


In [2]:
alt.__version__

'5.1.1'

In [31]:
N = 500
N_coms = 2
# sampling_strat = "weighted"
sampling_strat = "max"

In [4]:
community_array = [0 for x in range(N//2)]  + [1 for x in range(N//2)]

# Node Degree

In [5]:
gen = Generator(N, N // 2, N_coms, 20/N, 3/N, community_array, sampling_strat)
gen.run()

In [6]:
degrees = dict(gen.degrees()).values()

In [7]:
degrees_df = pd.DataFrame(degrees, columns=["degree"])

In [8]:
degrees_df

Unnamed: 0,degree
0,7
1,7
2,5
3,5
4,5
...,...
495,5
496,4
497,6
498,10


In [9]:
alt.Chart(degrees_df).mark_bar().encode(
    alt.X("degree:Q"),
    y='count()',
)

# Node Degree Multi Simulation

In [10]:
N_sim = 2

In [11]:
df_sim = pd.DataFrame(columns=["count", "simNumber"], dtype=int)
for i in range(N_sim):
    gen = Generator(N, N // 2, N_coms, 20/N, 3/N, community_array, sampling_strat)
    gen.run()
    degrees = dict(gen.degrees()).values()
    degrees_df = pd.DataFrame(degrees, columns=["degree"])

    countdf = degrees_df.groupby(['degree'])['degree'].count()
    countdf = countdf.to_frame().rename(columns={"degree": "count"})
    countdf["simNumber"] = i
    
    df_sim = pd.concat([df_sim, countdf])

In [12]:
df_sim = df_sim.reset_index(names="degree")
# df_sim.rename(columns={}

In [13]:
df_sim

Unnamed: 0,degree,count,simNumber
0,0,3,0
1,1,2,0
2,2,14,0
3,3,25,0
4,4,58,0
5,5,69,0
6,6,62,0
7,7,72,0
8,8,70,0
9,9,46,0


In [14]:
bars = alt.Chart(df_sim).mark_bar().encode(
    alt.X("degree:Q"),
    alt.Y("mean(count):Q"),
)

In [15]:
# c = alt.Chart(df_sim).mark_errorband(extent="ci", borders=True).encode(
#     x="degree:Q",
#     y=alt.Y(
#         "count:Q",
#         scale=alt.Scale(zero=False),
#         title="Miles per Gallon (95% CIs)",
#     ),
# )

# alt.Chart(source).mark_errorbar(extent="ci", ticks=True).encode(
#     x="year(Year)",
#     y=alt.Y(
#         "Miles_per_Gallon:Q",
#         scale=alt.Scale(zero=False),
#         title="Miles per Gallon (95% CIs)",
#     ),
# )

error = alt.Chart(df_sim).mark_errorbar(extent="ci", ticks=True).encode(
    x="degree:Q",
    y=alt.Y(
        "count:Q",
        scale=alt.Scale(zero=False),
        title="Miles per Gallon (95% CIs)",
    ),
)

In [16]:
bars + error

# Hyperedge Size

In [17]:
hsizes = dict(gen.hyperedge_sizes()).values()

In [18]:
hsizes

dict_values([8, 12, 17, 14, 9, 10, 12, 9, 14, 10, 8, 11, 12, 13, 15, 19, 15, 14, 20, 18, 18, 10, 12, 10, 16, 12, 9, 13, 16, 14, 14, 9, 22, 9, 11, 15, 11, 9, 17, 14, 12, 14, 13, 20, 15, 11, 12, 16, 13, 18, 9, 13, 14, 17, 11, 17, 11, 21, 12, 12, 14, 19, 14, 14, 10, 16, 6, 20, 11, 13, 10, 11, 19, 15, 10, 15, 14, 11, 12, 19, 8, 11, 19, 12, 18, 13, 9, 7, 16, 17, 22, 16, 16, 10, 16, 16, 9, 11, 11, 15, 16, 22, 13, 10, 10, 9, 15, 15, 15, 18, 12, 17, 7, 10, 11, 17, 9, 12, 15, 15, 21, 10, 18, 16, 17, 15, 20, 16, 12, 13, 14, 15, 11, 14, 17, 17, 21, 13, 10, 9, 14, 18, 11, 10, 15, 13, 17, 13, 16, 14, 13, 14, 15, 15, 14, 19, 13, 8, 12, 15, 15, 13, 10, 12, 14, 17, 10, 14, 12, 15, 15, 16, 15, 7, 10, 12, 12, 13, 8, 11, 14, 12, 13, 17, 15, 16, 18, 12, 15, 18, 9, 14, 8, 13, 16, 14, 10, 13, 13, 11, 10, 18, 13, 7, 13, 15, 17, 13, 22, 14, 16, 18, 15, 12, 15, 11, 13, 14, 16, 15, 11, 13, 14, 9, 17, 24, 17, 10, 12, 12, 13, 16, 10, 8, 22, 18, 14, 12, 12, 18, 18, 16, 10, 15, 15, 11, 13, 13, 11, 21])

In [19]:
hsizes_df = pd.DataFrame(hsizes, columns=["hsize"])

In [20]:
alt.Chart(hsizes_df).mark_bar().encode(
    alt.X("hsize:Q"),
    y='count()',
)

# Fraction Dist

In [21]:
p_init = 10 / N
q_init = 10 / N

In [22]:
q_init

0.02

In [32]:
p = p_init
q = q_init 

df_sim = pd.DataFrame(columns=["sim", "type", "count"], dtype=int)

c = 0.001
N_sim = 20
for i in range(N_sim):
    # print(i)
    
    # p = p + c
    # q = q - c
    # q = format(q - p * 0.05, '.2f')
    q = round(q - p * 0.05, 4)
    print(i, p, q)
    
    gen = Generator(N, N, N_coms, p, q, community_array, sampling_strat)
    gen.run()
    # comp = gen.hyperedges_composition()
    comp = gen.hyperedges_types()
    n_pure = comp.count("pure")
    n_mixed = comp.count("mixed")
    
    df = pd.DataFrame({"sim": [i, i], "type": ["pure", "mixed"], "count": [n_pure, n_mixed]})
    # print(df)
    
    df_sim = pd.concat([df_sim, df])

0.02 0.019
0.02 0.018
0.02 0.017
0.02 0.016
0.02 0.015
0.02 0.014
0.02 0.013
0.02 0.012
0.02 0.011
0.02 0.01
0.02 0.009
0.02 0.008
0.02 0.007
0.02 0.006
0.02 0.005
0.02 0.004
0.02 0.003
0.02 0.002
0.02 0.001
0.02 0.0


In [28]:
df_sim

Unnamed: 0,sim,type,count
0,0,pure,1
1,0,mixed,499
0,1,pure,1
1,1,mixed,499
0,2,pure,1
1,2,mixed,499
0,3,pure,6
1,3,mixed,494
0,4,pure,4
1,4,mixed,496


In [33]:
alt.Chart(df_sim).mark_bar().encode(
    x='sim:O',
    y='count',
    color='type'
)