In [1]:
import pandas as pd
from ucimlrepo import fetch_ucirepo

# fetch dataset
wine_quality = fetch_ucirepo(id=186)

# data (as pandas dataframes)
X = wine_quality.data.features
y = wine_quality.data.targets

# metadata
print(wine_quality.metadata)

# variable information
print(wine_quality.variables)


{'uci_id': 186, 'name': 'Wine Quality', 'repository_url': 'https://archive.ics.uci.edu/dataset/186/wine+quality', 'data_url': 'https://archive.ics.uci.edu/static/public/186/data.csv', 'abstract': 'Two datasets are included, related to red and white vinho verde wine samples, from the north of Portugal. The goal is to model wine quality based on physicochemical tests (see [Cortez et al., 2009], http://www3.dsi.uminho.pt/pcortez/wine/).', 'area': 'Business', 'tasks': ['Classification', 'Regression'], 'characteristics': ['Multivariate'], 'num_instances': 4898, 'num_features': 11, 'feature_types': ['Real'], 'demographics': [], 'target_col': ['quality'], 'index_col': None, 'has_missing_values': 'no', 'missing_values_symbol': None, 'year_of_dataset_creation': 2009, 'last_updated': 'Wed Nov 15 2023', 'dataset_doi': '10.24432/C56S3T', 'creators': ['Paulo Cortez', 'A. Cerdeira', 'F. Almeida', 'T. Matos', 'J. Reis'], 'intro_paper': {'ID': 252, 'type': 'NATIVE', 'title': 'Modeling wine preferences

In [2]:
wine_data = pd.DataFrame(wine_quality.data.features)
wine_data["quality"] = pd.DataFrame(wine_quality.data.targets)
wine_data

Unnamed: 0,fixed_acidity,volatile_acidity,citric_acid,residual_sugar,chlorides,free_sulfur_dioxide,total_sulfur_dioxide,density,pH,sulphates,alcohol,quality
0,7.4,0.70,0.00,1.9,0.076,11.0,34.0,0.99780,3.51,0.56,9.4,5
1,7.8,0.88,0.00,2.6,0.098,25.0,67.0,0.99680,3.20,0.68,9.8,5
2,7.8,0.76,0.04,2.3,0.092,15.0,54.0,0.99700,3.26,0.65,9.8,5
3,11.2,0.28,0.56,1.9,0.075,17.0,60.0,0.99800,3.16,0.58,9.8,6
4,7.4,0.70,0.00,1.9,0.076,11.0,34.0,0.99780,3.51,0.56,9.4,5
...,...,...,...,...,...,...,...,...,...,...,...,...
6492,6.2,0.21,0.29,1.6,0.039,24.0,92.0,0.99114,3.27,0.50,11.2,6
6493,6.6,0.32,0.36,8.0,0.047,57.0,168.0,0.99490,3.15,0.46,9.6,5
6494,6.5,0.24,0.19,1.2,0.041,30.0,111.0,0.99254,2.99,0.46,9.4,6
6495,5.5,0.29,0.30,1.1,0.022,20.0,110.0,0.98869,3.34,0.38,12.8,7


In [10]:
from causallearn.search.ConstraintBased.PC import pc
from causallearn.search.ConstraintBased.FCI import fci
from causallearn.search.ConstraintBased.CDNOD import cdnod
from causallearn.utils.GraphUtils import GraphUtils

data_np = wine_data.to_numpy()
indep_tests = ['fisherz',
               "mv_fisherz", 
              # "chisq", #crashed memory loss
               #"kci",   #crashed too long execution
               "gsq"] 

for indep_test in indep_tests:
    print(f"Test: ", indep_test)
    print("PC")
    cg = pc(data_np, alpha=0.05, indep_test=indep_test)
    labels = wine_data.columns
    pyd = GraphUtils.to_pydot(cg.G, labels=labels)
    pyd.write_png(f"graphs\constraint_wine_quality_pc_{indep_test}.png")
    
    print("FCI")
    g, edges = fci(data_np ,alpha=0.05, independence_test_method=indep_test)
    labels = wine_data.columns
    pyd = GraphUtils.to_pydot(g,labels=labels )
    pyd.write_png(f"graphs\constraint_wine_quality_fci_{indep_test}.png")

    # cg = cdnod(data_np, c_indx = wine_data.index, alpha=0.05, indep_test=indep_test)
    # labels = wine_data.columns
    # pyd = GraphUtils.to_pydot(cg.G, labels=labels)
    # pyd.write_png(f"graphs\constaint_wine_quality_cdnod_{indep_test}.png")


Test:  fisherz
PC


  0%|          | 0/12 [00:00<?, ?it/s]

FCI


  0%|          | 0/12 [00:00<?, ?it/s]

X7 --> X6
Test:  mv_fisherz
PC


  0%|          | 0/12 [00:00<?, ?it/s]

FCI


  0%|          | 0/12 [00:00<?, ?it/s]

X7 --> X6
Test:  gsq
PC


  0%|          | 0/12 [00:00<?, ?it/s]

FCI


  0%|          | 0/12 [00:00<?, ?it/s]

In [12]:
from causallearn.search.ScoreBased.GES import ges

score_functions = ["local_score_BIC","local_score_BDeu", "local_score_CV_general", "local_score_marginal_general", "local_score_CV_multi", "local_score_marginal_multi"]

for score_func in score_functions:
    Record = ges(X, score_func = score_func)
    pyd = GraphUtils.to_pydot(Record['G'])
    pyd.write_png(f"graphs\score_wine_quality_ges_{score_func}.png")
    # pyd = GraphUtils.to_pydot(Record['G'])
    # pyd.write_png('simple_test.png')


InvalidIndexError: (slice(None, None, None), 0)

# lingam

In [13]:
from causallearn.search.FCMBased import lingam

random_state = 42
max_iter = 1000
model = lingam.ICALiNGAM(random_state, max_iter)
model.fit(data_np)
print(model.causal_order_)
print(model.adjacency_matrix_)
causal_graph = model.causal_graph
pydot_graph = GraphUtils.to_pydot(causal_graph, labels=labels)

model = lingam.DirectLiNGAM(random_state = random_state, prior_knowledge= -1)
model.fit(X)
print(model.causal_order_)
print(model.adjacency_matrix_)
causal_graph = model.causal_graph
pydot_graph = GraphUtils.to_pydot(causal_graph, labels=labels)

[7, 4, 8, 1, 2, 9, 0, 10, 11, 3, 5, 6]
[[ 0.00000000e+00  2.16213875e+00  2.67578368e+00  0.00000000e+00
   0.00000000e+00  0.00000000e+00  0.00000000e+00  1.33524155e+02
  -2.14186623e+00  1.66562416e+00  0.00000000e+00  0.00000000e+00]
 [ 0.00000000e+00  0.00000000e+00  0.00000000e+00  0.00000000e+00
   1.45444506e+00  0.00000000e+00  0.00000000e+00  8.57560528e+00
   2.51676423e-01  0.00000000e+00  0.00000000e+00  0.00000000e+00]
 [ 0.00000000e+00 -3.68234255e-01  0.00000000e+00  0.00000000e+00
   6.19437716e-01  0.00000000e+00  0.00000000e+00  7.65014864e+00
  -2.07197461e-01  0.00000000e+00  0.00000000e+00  0.00000000e+00]
 [-2.48508702e+00 -4.94222646e+00  1.47890069e+00  0.00000000e+00
  -1.84511453e+01  0.00000000e+00  0.00000000e+00  2.15490203e+03
  -1.23742374e+01 -5.28904538e+00  1.99064728e+00  2.32572122e-01]
 [ 0.00000000e+00  0.00000000e+00  0.00000000e+00  0.00000000e+00
   0.00000000e+00  0.00000000e+00  0.00000000e+00  4.23643969e+00
   0.00000000e+00  0.00000000e+00

AttributeError: 'ICALiNGAM' object has no attribute 'causal_graph'

In [None]:
from causalinference import CausalModel
# Causal Inference using CausalModel
D = lalonde['treat'].values
X = lalonde[['age', 'educ', 'black', 'hispan', 'married', 'nodegree', 're74', 're75']].values

causal = CausalModel(Y, D, X)
causal.est_via_ols()
causal.estimates