In [1]:
import pandas as pd
import numpy as np

In [2]:
# Example usage:
#k = 2  # Number of columns
#n = 3  # Maximum random number for column names
#m = 3   # Number of rows (less than 2**k)

In [4]:
def generate_random_dataframe(k, n, m):
    '''
    Given a number k, and n greater than k I need a dataframe with k columns. 
    The column names are random numbers between 1 and n. There are m rows. 
    All entries are either 0 or 1, each with probability 50%.
    '''
    # Generate random column names
    column_names = np.random.choice(range(1, n+1), k, replace=False)
    
    # Create a DataFrame with k columns and m rows
    df = pd.DataFrame(columns=column_names, index=range(m))
    
    # Populate the DataFrame with 0s and 1s with a 50% probability
    df = df.applymap(lambda x: np.random.choice([0, 1], p=[0.5, 0.5]))
    
    # As it stands, the code could generate duplicate rows. let's remove them.
    df = df.drop_duplicates()
    
    #sort the columns
    df = df.sort_index(axis=1)
    
    return df

In [5]:
def generate_and_find_closest(avg, n):
    '''
    given integers avg and n > avg, 
    generate random integers (internal parameter num_sampling), 
    then select the one closest to avg.
    This function is used twice:
    1) To determine the number of inputs for the boolean functions 
       in the updating rule.
    2) To determine the number of correct inputs in the truth table
    '''
    # Generate random integers
    num_sampling = int(n/3)+1 # you can change this. I just wanted something that scales with n.
    random_integers = np.random.randint(1, n+1, size = num_sampling)

    # Find the integer closest to mean_kin
    closest_integer = min(random_integers, key=lambda k: abs(k - avg))

    return closest_integer

In [6]:
# This is the main function. The previous two are just meant to generate random truth tables.

def conditions_product(df1,df2):
    # Firts check if the two function share input nodes
    shared_columns = list(set(df1.columns) & set(df2.columns))
    
    if len(shared_columns) == 0:
        #if they don't, take all possible products of conditions
        merged_df = pd.merge(df1, df2, how = 'cross')
    else:
        #if they do, just keep the non conflicting ones
        merged_df = pd.merge(df1, df2, on = shared_columns)
    
    merged_df = merged_df.sort_index(axis=1)
    
    return merged_df

In [172]:
# EXAMPLE

# select n
n = 10
# select mean_kin
mean_kin = 3 

# generate actual kin
kin = generate_and_find_closest(mean_kin, n)

# select the number of correct inputs
m = generate_and_find_closest(2*n, 2**n)

In [176]:
tt1 = generate_random_dataframe(kin, n, m)
tt2 = generate_random_dataframe(kin, n, m)
tt3 = generate_random_dataframe(kin, n, m)
#tt4 = generate_random_dataframe(kin, n, m)
#tt5 = generate_random_dataframe(kin, n, m)
#tt6 = generate_random_dataframe(kin, n, m)

In [177]:
tt1

Unnamed: 0,4,6,9
0,1,0,0
1,0,0,0
2,1,1,0
4,0,0,1
5,0,1,1
8,1,1,1
19,0,1,0


In [178]:
tt2

Unnamed: 0,1,3,5
0,1,1,1
1,0,1,0
2,0,0,1
3,0,1,1
7,1,0,1
8,1,0,0
9,0,0,0
10,1,1,0


In [179]:
tt3

Unnamed: 0,3,4,7
0,0,1,1
2,0,0,1
3,1,0,0
4,0,1,0
7,1,1,1
8,0,0,0
11,1,0,1


In [164]:
#tt4

In [165]:
#tt5

In [166]:
#tt6

In [182]:
tt12 = conditions_product(tt1,tt2)
print(len(tt12))
tt12

56


Unnamed: 0,1,3,4,5,6,9
0,1,1,1,1,0,0
1,0,1,1,0,0,0
2,0,0,1,1,0,0
3,0,1,1,1,0,0
4,1,0,1,1,0,0
5,1,0,1,0,0,0
6,0,0,1,0,0,0
7,1,1,1,0,0,0
8,1,1,0,1,0,0
9,0,1,0,0,0,0


In [168]:
tt123 = conditions_product(tt12,tt3)
tt123

Unnamed: 0,1,2,3
0,1,1,0


In [169]:
#tt1234 = conditions_product(tt123,tt4)
#tt1234

In [170]:
#tt12345 = conditions_product(tt1234,tt5)
#tt12345

In [171]:
#tt123456 = conditions_product(tt12345,tt6)
#tt123456