In [1]:
import json
import math
import networkx as nx
import pandas as pd
import altair as alt

from generator_v2 import Generator

In [2]:
COLOR = "#99d8c9"

In [3]:
alt.__version__

'5.1.1'

In [4]:
N = 100
N_coms = 2
sampling_strat = "weighted"
# sampling_strat = "max"

In [5]:
community_array = [0 for x in range(N//2)]  + [1 for x in range(N//2)]

# Node Degree

In [6]:
gen = Generator(N, N // 2, N_coms, 20/N, 3/N, community_array, sampling_strat)
gen.run()

In [7]:
degrees = dict(gen.degrees()).values()

In [8]:
degrees_df = pd.DataFrame(degrees, columns=["degree"])

In [9]:
degrees_df

Unnamed: 0,degree
0,8
1,5
2,5
3,8
4,3
...,...
95,4
96,7
97,10
98,5


In [10]:
alt.Chart(degrees_df).mark_bar().encode(
    alt.X("degree:Q"),
    y='count()',
)

# Node Degree Multi Simulation

In [11]:
N_sim = 2

In [12]:
df_sim = pd.DataFrame(columns=["count", "simNumber"], dtype=int)
for i in range(N_sim):
    gen = Generator(N, N // 2, N_coms, 20/N, 3/N, community_array, sampling_strat)
    gen.run()
    degrees = dict(gen.degrees()).values()
    degrees_df = pd.DataFrame(degrees, columns=["degree"])

    countdf = degrees_df.groupby(['degree'])['degree'].count()
    countdf = countdf.to_frame().rename(columns={"degree": "count"})
    countdf["simNumber"] = i
    
    df_sim = pd.concat([df_sim, countdf])

In [13]:
df_sim = df_sim.reset_index(names="degree")
# df_sim.rename(columns={}

In [14]:
df_sim

Unnamed: 0,degree,count,simNumber
0,1,1,0
1,2,4,0
2,3,7,0
3,4,14,0
4,5,11,0
5,6,18,0
6,7,20,0
7,8,9,0
8,9,8,0
9,10,5,0


In [15]:
bars = alt.Chart(df_sim).mark_bar(color=COLOR).encode(
    alt.X("degree:Q", scale=alt.Scale(domain=[0, 18])),
    alt.Y("mean(count):Q"),
)

In [16]:
bars

In [17]:
error = alt.Chart(df_sim).mark_errorbar(extent="ci", rule=True).encode(
    x=alt.X("degree:Q", scale=alt.Scale(domain=[0, 18])),
    y=alt.Y(
        "count:Q",
        scale=alt.Scale(zero=False),
        title="Absolute Frequency"
    ),
)

In [18]:
bars + error

# Hyperedge Size

In [19]:
hsizes = dict(gen.hyperedge_sizes()).values()

In [20]:
hsizes

dict_values([8, 11, 14, 15, 8, 11, 15, 13, 13, 10, 15, 4, 21, 13, 12, 11, 12, 10, 11, 16, 17, 10, 14, 16, 11, 10, 12, 15, 15, 17, 10, 13, 15, 11, 13, 12, 7, 14, 10, 11, 9, 13, 15, 12, 9, 12, 15, 12, 12, 12])

In [21]:
hsizes_df = pd.DataFrame(hsizes, columns=["hsize"])

In [22]:
alt.Chart(hsizes_df).mark_bar().encode(
    alt.X("hsize:Q"),
    y='count()',
)

## Hyperedge Sim

In [23]:
df_sim = pd.DataFrame(columns=["count", "simNumber"], dtype=int)
for i in range(N_sim):
    gen = Generator(N, N // 2, N_coms, 20/N, 3/N, community_array, sampling_strat)
    gen.run()
    hsizes = dict(gen.hyperedge_sizes()).values()
    hsizes_df = pd.DataFrame(hsizes, columns=["hsize"])

    countdf = hsizes_df.groupby(['hsize'])['hsize'].count()
    countdf = countdf.to_frame().rename(columns={"hsize": "count"})
    countdf["simNumber"] = i
    
    df_sim = pd.concat([df_sim, countdf])

In [24]:
df_sim = df_sim.reset_index(names="hsize")

In [25]:
df_sim.head()

Unnamed: 0,hsize,count,simNumber
0,7,3,0
1,8,1,0
2,9,10,0
3,10,4,0
4,11,6,0


In [26]:
bars = alt.Chart(df_sim).mark_bar(color=COLOR).encode(
    alt.X("hsize:Q", scale=alt.Scale(domain=[0, 22])),
    alt.Y("mean(count):Q"),
)

error = alt.Chart(df_sim).mark_errorbar(extent="ci", rule=True).encode(
    x=alt.X("hsize:Q", scale=alt.Scale(domain=[0, 22])),
    y=alt.Y(
        "count:Q",
        scale=alt.Scale(zero=False),
        title="Absolute Frequency"
    ),
)

In [27]:
bars + error

# Fraction Dist

In [28]:
p_init = 10 / N
q_init = 10 / N

In [29]:
q_init

0.1

In [30]:
p = p_init
q = q_init 

df_sim = pd.DataFrame(columns=["sim", "type", "count"], dtype=int)
df_fraction = pd.DataFrame(columns=["sim", "count", "fraction0"], dtype=int)


increment = 0.05
N_sim = int(1 / increment) + 1
q_frac_order = []
for i in range(N_sim):

    # q = round(q - p * increment, 4)
    q = round(q_init - (p * increment * i), 4)
    print(i, p, q)
    
    gen = Generator(N, N, N_coms, p, q, community_array, sampling_strat)
    gen.run()
    comp = gen.hyperedges_types()
    n_pure = comp.count("pure")
    n_mixed = comp.count("mixed")
    
    q_frac = f"{round(1 - (increment * (i)), 3)}p"
    q_frac_order.append(q_frac)
    
    df = pd.DataFrame({"sim": [i, i], "q": [q_frac, q_frac], "type": ["pure", "mixed"], "count": [n_pure, n_mixed]})
    df_sim = pd.concat([df_sim, df])
    
#     For fraction distribution of mixed edges
    comp = gen.mixed_he_fraction_to_count()
    for fraction, count in comp.items():
        df = pd.DataFrame({"sim": [i], "q": [q_frac], "count": [count], "fraction0": fraction})
        df_fraction = pd.concat([df_fraction, df])
        
    

0 0.1 0.1
1 0.1 0.095
2 0.1 0.09
3 0.1 0.085
4 0.1 0.08
5 0.1 0.075
6 0.1 0.07
7 0.1 0.065
8 0.1 0.06
9 0.1 0.055
10 0.1 0.05
11 0.1 0.045
12 0.1 0.04
13 0.1 0.035
14 0.1 0.03
15 0.1 0.025
16 0.1 0.02
17 0.1 0.015
18 0.1 0.01
19 0.1 0.005
20 0.1 -0.0


In [31]:
df_fraction

Unnamed: 0,sim,count,fraction0,q
0,0,9,60.000,1.0p
0,0,2,30.000,1.0p
0,0,6,46.154,1.0p
0,0,2,18.182,1.0p
0,0,1,87.500,1.0p
...,...,...,...,...
0,19,1,37.500,0.05p
0,19,1,90.000,0.05p
0,19,1,10.000,0.05p
0,20,44,100.000,0.0p


In [47]:
alt.Chart(df_sim).mark_bar().encode(
    x=alt.X('q:O', sort=q_frac_order),
    y=alt.Y('count', title="Number of hyperedges"),
    color=alt.Color('type', 
    scale = alt.Scale(domain=['mixed', "pure"], range=['#9ebcda', '#e0ecf4']))
).properties(
    width=800,
    height=300
)

In [48]:
alt.Chart(df_fraction).mark_bar().encode(
    x=alt.X('q:O', sort=q_frac_order),
    y=alt.Y('count'),
    color=alt.Color('fraction0').scale(scheme="lightgreyteal"),
    order=alt.Order(
      # Sort the segments of the bars by this field
      'fraction0',
      sort='descending'
    )
    # scale = alt.Scale(domain=['mixed', "pure"], range=['#9ebcda', '#e0ecf4']))
).properties(
    width=800,
    height=300
)