#### 1 ) Program a function that given a causal graph (in matrix form) computes the basis set of independences to be checked to assert if that graph is a feasible causal structure.

In [125]:
import numpy as np
import scipy.stats as st

def generateMatrix(size):
    return np.random.randint(0,2,size=(size, size))

def getPairIndeps(matrix):
    i=0
    j=0
    pairOfindep = list(list())
    #First I go for every item in the matrix and get every pair of independent nodes
    while i < len(matrix):
        while j < len(matrix):
            if matrix[i][j] == 0 and matrix[j][i] == 0 and i != j:
                pairOfindep.append([chr(ord('A')+i),chr(ord('A')+j)])
                #The chr(ord..) is to get an int from a char and viceversa.
            j+=1
        i+=1
        j=0


    index = int(len(pairOfindep)/2)    #I cut the list in half because of the repeated pairs.

    return pairOfindep[:index]

In [154]:
#Function for geting all parents of a letter
def haveParent(letter, matrix):
    parents = list()
    number = ord(letter) - ord('A')
    i=0
    while i < len(matrix):
        if(matrix[i][number] == 1):
            parents.append(chr(ord('A')+i))
        i+=1
    return list(set(parents))                #In order to avoid repeated parents. (Messy but it does work)
 

def getBasisSet(sizeOfMatrix):
    matrix = generateMatrix(sizeOfMatrix)
    pairOfindep = getPairIndeps(matrix)
    basis_set = dict()
    
    for pair in pairOfindep:
        l = haveParent(pair[0],matrix) + haveParent(pair[1],matrix)
        basis_set[','.join(pair)] = l
    return basis_set
    


In [155]:
getBasisSet(8)

{'A,E': ['F', 'B', 'D', 'B', 'H', 'G'],
 'B,F': ['C', 'B', 'G', 'F', 'G', 'E', 'H', 'C'],
 'B,H': ['C', 'B', 'G', 'F', 'A', 'G', 'E', 'C', 'D'],
 'C,E': ['C', 'B', 'A', 'F', 'B', 'H', 'G'],
 'D,G': ['F', 'B', 'E', 'H', 'C', 'B', 'A', 'E', 'H', 'C']}

#### 2)Program a function that given a ​basis set and data for all the variables computes the independences given the data (using Pearson Correlation) and returns True if the independences are observed. The threshold of correlation to consider independent or dependent two variables is a parameter of the function.

In [156]:
def getIndependencies(basis_set, data, n):
    independencies = []
    print(basis_set)
    for p in basis_set:
        pearson = st.pearsonr(random_data[ord(p[0]) - ord('A')], random_data[ord(p[2]) - ord('A')])[0]
        tr = (pearson*(n-2)**1/2) * ((1-pearson**2)**1/2)
        pval = st.t.sf(np.abs(tr), n-1)*2  
        independencies.append([p[0],p[2],pval <= 0.0005])
    
    return independencies



A couple of examples for random data:

In [157]:
n = 200
numberOfNodes = 10
random_data = np.random.randint(0,n,size=(n, numberOfNodes))
getIndependencies(getBasisSet(numberOfNodes), random_data, n)

{'A,B': ['D', 'I', 'G', 'J', 'B', 'G', 'J', 'D', 'I'], 'A,C': ['D', 'I', 'G', 'J', 'C', 'G', 'I'], 'B,A': ['B', 'G', 'J', 'D', 'I', 'D', 'I', 'G', 'J'], 'B,C': ['B', 'G', 'J', 'D', 'I', 'C', 'G', 'I'], 'B,H': ['B', 'G', 'J', 'D', 'I', 'A', 'G', 'E', 'H', 'C'], 'C,A': ['C', 'G', 'I', 'D', 'I', 'G', 'J'], 'C,B': ['C', 'G', 'I', 'B', 'G', 'J', 'D', 'I'], 'C,E': ['C', 'G', 'I', 'F', 'B', 'A', 'G', 'E', 'H', 'I'], 'C,J': ['C', 'G', 'I', 'E', 'G', 'I'], 'D,E': ['F', 'B', 'H', 'C', 'I', 'F', 'B', 'A', 'G', 'E', 'H', 'I'], 'D,J': ['F', 'B', 'H', 'C', 'I', 'E', 'G', 'I']}


[['A', 'B', True],
 ['A', 'C', True],
 ['B', 'A', True],
 ['B', 'C', True],
 ['B', 'H', True],
 ['C', 'A', True],
 ['C', 'B', True],
 ['C', 'E', True],
 ['C', 'J', True],
 ['D', 'E', True],
 ['D', 'J', True]]

In [158]:
n = 100
numberOfNodes = 5
random_data = np.random.randint(0,n,size=(n, numberOfNodes))
getIndependencies(getBasisSet(numberOfNodes), random_data, n)

{'A,D': ['C', 'B', 'A', 'E', 'E', 'B', 'D'], 'C,D': ['E', 'B', 'A', 'E', 'B', 'D']}


[['A', 'D', False], ['C', 'D', True]]

In [159]:
n = 300
numberOfNodes = 5
random_data = np.random.randint(0,n,size=(n, numberOfNodes))
getIndependencies(getBasisSet(numberOfNodes), random_data, n)

{'A,B': ['C', 'D', 'E', 'C', 'D', 'E'], 'B,A': ['C', 'D', 'E', 'C', 'D', 'E']}


[['A', 'B', True], ['B', 'A', True]]