In [13]:
import os,sys
cwd = os.path.abspath(os.path.curdir)
sys.path.append(cwd)  # workplace
import time
import numpy as np
import pandas as pd
import torch
import argparse


In [14]:
import pandas as pd
from causallearn.graph.GraphNode import GraphNode
import copy
from causallearn.graph.Dag import Dag
import networkx as nx


def ini_nodes(adj_df):
    nodes = []
    for i in range(len(adj_df[0,:])):
        nodes.append(GraphNode(str(i)))
    return nodes


def adj2dag(adj_df):
    G = nx.from_numpy_array(adj_df, create_using=nx.DiGraph)
    nodes = ini_nodes(adj_df)
    dag = Dag(nodes)
    for i,j in list(G.edges()):
        dag.add_directed_edge(nodes[i], nodes[j])
    return dag,nodes

def remove_edge(index_x, index_y,nodes, dag):
    dag_rm = copy.deepcopy(dag)
    dag_rm.remove_connecting_edge(nodes[index_x], nodes[index_y])
    return dag_rm

def get_all_xy_edges(dag,nodes):
    x_ls = []
    y_ls = []
    for e in list(dag.get_graph_edges()):        
        index_x = int(e.get_node1().get_name())
        index_y = int(e.get_node2().get_name())
        x_ls.append(index_x)
        y_ls.append(index_y)
    dir = np.array([x_ls,y_ls])
    return dir.T

def get_eva_xy_dirs(dag,nodes):
    x_ls = []
    y_ls = []
    for e in list(dag.get_graph_edges()):        
        index_x = int(e.get_node1().get_name())
        index_y = int(e.get_node2().get_name())
        dag_rm = remove_edge(index_x, index_y,nodes, dag)
        # print(e.get_node1(),e.get_node2(),dag_rm.is_dseparated_from(nodes[index_x],nodes[index_y],set()))
        if dag_rm.is_dseparated_from(nodes[index_x],nodes[index_y],set()):
            x_ls.append(index_x)
            y_ls.append(index_y)
    dir = np.array([x_ls,y_ls])
    return dir.T


In [15]:
dataname = 'lu'
for sim_seed in range(100,110):
    with open(f'./eval_llms/data/table/cdir_{sim_seed}_questions.txt', 'w') as file:
        adj_path = f'./data/sim_{dataname}/{sim_seed}/generated_graph_target.csv'
        adj_df = pd.read_csv(adj_path)
        dag,nodes = adj2dag(adj_df.to_numpy())
        xy_edges = get_all_xy_edges(dag,nodes)
        eva_xy_dirs =  get_eva_xy_dirs(dag,nodes)
        for pair in eva_xy_dirs:    
            file.write(f'Between V{pair[0]} and V{pair[1]}, is V{pair[0]} the cause?\n')
            




In [30]:
def text_cz(cz):
    cz_text = ''
    for element in cz:
        cz_text += 'V' + str(element) 
    return cz_text


def text_ci(ciset):
    question_ls = []
    for cis in ciset:
        if len(cis[2]) == 0:
            ci_text = f'Are V{cis[0]} and V{cis[1]} independent from each other?'
        else:
            cz_text =text_cz(cis[2])
            ci_text = f'Are V{cis[0]} and V{cis[1]} conditionally independent given {cz_text}?'    
        question_ls.append(ci_text)
    return question_ls


def text_dsep(ciset):
    question_ls = []
    for cis in ciset:
        if len(cis[2]) == 0:
            ci_text = f'Are V{cis[0]} and V{cis[1]} d-seperated?'
        else:
            cz_text =text_cz(cis[2])
            ci_text = f'Are V{cis[0]} and V{cis[1]} d-seperated given {cz_text}?'    
        question_ls.append(ci_text)
    return question_ls

from eval_llms.src.causal_eval.helper import get_sets


In [32]:
dataname = 'lu'
for sim_seed in range(100,110):
    adj_path = f'./data/sim_{dataname}/{sim_seed}/generated_graph_target.csv'
    adj_df = pd.read_csv(adj_path)
    _,_,conditional_independent_set,_,conditional_dependent_set = get_sets(adj_df.to_numpy())

    ci_sets = text_dsep(conditional_independent_set)
    nci_set = text_dsep(conditional_dependent_set)
    with open(f'./eval_llms/data/graph/{sim_seed}_questions.txt', 'w') as file:
        for q in ci_sets:
            file.write(q)
            file.write('\n')
        for q in nci_set:
            file.write(q)
            file.write('\n')

    with open(f'./eval_llms/data/graph/{sim_seed}_answers.txt', 'w') as file:
        for q in ci_sets:
            file.write('yes')
            file.write('\n')
        for q in nci_set:
            file.write('no')
            file.write('\n')

In [33]:
dataname = 'lu'
for sim_seed in range(100,110):
    adj_path = f'./data/sim_{dataname}/{sim_seed}/generated_graph_target.csv'
    adj_df = pd.read_csv(adj_path)
    _,_,conditional_independent_set,_,conditional_dependent_set = get_sets(adj_df.to_numpy())

    ci_sets = text_ci(conditional_independent_set)
    nci_set = text_ci(conditional_dependent_set)
    with open(f'./eval_llms/data/table/{sim_seed}_questions.txt', 'w') as file:
        for q in ci_sets:
            file.write(q)
            file.write('\n')
        for q in nci_set:
            file.write(q)
            file.write('\n')

    with open(f'./eval_llms/data/table/{sim_seed}_answers.txt', 'w') as file:
        for q in ci_sets:
            file.write('yes')
            file.write('\n')
        for q in nci_set:
            file.write('no')
            file.write('\n')

In [26]:
dataname = 'lu'
for sim_seed in range(100,110):
    adj_path = f'./data/sim_{dataname}/{sim_seed}/generated_graph_target.csv'
    adj_df = pd.read_csv(adj_path)
    _,_,conditional_independent_set,_,conditional_dependent_set = get_sets(adj_df.to_numpy())

    ci_sets = text_ci(conditional_independent_set)
    nci_set = text_ci(conditional_dependent_set)
    with open(f'./eval_llms/data/{sim_seed}_questions.txt', 'w') as file:
        for q in ci_sets:
            file.write(q)
            file.write('\n')
        for q in nci_set:
            file.write(q)
            file.write('\n')

    with open(f'./eval_llms/data/{sim_seed}_answers.txt', 'w') as file:
        for q in ci_sets:
            file.write('yes')
            file.write('\n')
        for q in nci_set:
            file.write('no')
            file.write('\n')