In [2]:
import lingam
import numpy as np
import pandas as pd

# import sys
from lingam.utils import make_dot
from scipy.stats import ks_2samp
from sklearn.linear_model import LinearRegression


# np.set_printoptions(precision=3, suppress=True, threshold=sys.maxsize)
np.random.seed(100)

# loading the dataset
high_scrap = pd.read_csv("../data/high_scrap.csv")
low_scrap = pd.read_csv("../data/low_scrap.csv")

X = pd.concat([high_scrap, low_scrap])
# normalizing
X = X / X.std()

# including previous knowledge as a matrix
# prior knowledge matrix that have as many rows and columns as the number of columns in the dataset with values -1
n = X.shape[1]
prior_knowledge = np.full((n, n), -1)
# create an array length of number of columns and have as values the number of the station
station = np.zeros(n)
for i in range(n):
    station[i] = X.columns[i][7]

# fill the matrix with value 0 when the station is the same
for i in range(len(station)):
    for j in range(len(station)):
        if station[i] < station[j]:
            prior_knowledge[i][j] = 0


In [6]:
from castle.algorithms import DAG_GNN


# structure learning
dag_gnn = DAG_GNN(device_type="gpu")
dag_gnn.learn(X, k_max_iter=10, h_tolerance=1e-6)
dag_gnn.causal_matrix

2024-10-26 14:29:35,182 - /home/zeus/miniconda3/envs/cloudspace/lib/python3.10/site-packages/castle/algorithms/gradient/dag_gnn/torch/dag_gnn.py[line:165] - INFO: GPU is available.


2024-10-26 14:36:08,988 - /home/zeus/miniconda3/envs/cloudspace/lib/python3.10/site-packages/castle/algorithms/gradient/dag_gnn/torch/dag_gnn.py[line:253] - INFO: Iter: 0, epoch: 299, h_new: 1.0
2024-10-26 14:41:09,366 - /home/zeus/miniconda3/envs/cloudspace/lib/python3.10/site-packages/castle/algorithms/gradient/dag_gnn/torch/dag_gnn.py[line:253] - INFO: Iter: 1, epoch: 299, h_new: 0.00022715344547918903
2024-10-26 14:51:28,804 - /home/zeus/miniconda3/envs/cloudspace/lib/python3.10/site-packages/castle/algorithms/gradient/dag_gnn/torch/dag_gnn.py[line:253] - INFO: Iter: 2, epoch: 299, h_new: 2.5134366893553306e-05
2024-10-26 15:01:36,752 - /home/zeus/miniconda3/envs/cloudspace/lib/python3.10/site-packages/castle/algorithms/gradient/dag_gnn/torch/dag_gnn.py[line:253] - INFO: Iter: 3, epoch: 299, h_new: 2.6500003258433935e-05
2024-10-26 15:06:24,932 - /home/zeus/miniconda3/envs/cloudspace/lib/python3.10/site-packages/castle/algorithms/gradient/dag_gnn/torch/dag_gnn.py[line:253] - INFO: 

Tensor([[0., 0., 0., ..., 0., 0., 0.],
        [0., 0., 0., ..., 0., 0., 0.],
        [0., 0., 0., ..., 0., 0., 0.],
        ...,
        [0., 0., 0., ..., 0., 0., 0.],
        [0., 0., 0., ..., 0., 0., 0.],
        [0., 0., 0., ..., 0., 0., 0.]])

In [3]:
from castle.algorithms import CORL
import torch
corl = CORL(device_type="gpu", batch_size=128, iteration=500)

corl.learn(X)
corl.causal_matrix

2024-10-26 21:03:30,355 - /Users/marksobolev/projects/NeuralWave/.venv/lib/python3.11/site-packages/castle/algorithms/gradient/corl/torch/corl.py[line:175] - INFO: GPU is unavailable.
2024-10-26 21:03:30,360 - /Users/marksobolev/projects/NeuralWave/.venv/lib/python3.11/site-packages/castle/algorithms/gradient/corl/torch/corl.py[line:233] - INFO: Python version is 3.11.9
2024-10-26 21:03:30,613 - /Users/marksobolev/projects/NeuralWave/.venv/lib/python3.11/site-packages/castle/algorithms/gradient/corl/torch/corl.py[line:277] - INFO: Shape of input batch: 128, 98, 100
2024-10-26 21:03:30,613 - /Users/marksobolev/projects/NeuralWave/.venv/lib/python3.11/site-packages/castle/algorithms/gradient/corl/torch/corl.py[line:279] - INFO: Shape of input batch: 128, 98, 256
2024-10-26 21:03:30,614 - /Users/marksobolev/projects/NeuralWave/.venv/lib/python3.11/site-packages/castle/algorithms/gradient/corl/torch/corl.py[line:281] - INFO: Starting training.
  td_target=torch.tensor(td_target),
2024-10-2

Tensor([[0., 0., 0., ..., 0., 0., 0.],
        [0., 0., 0., ..., 0., 0., 0.],
        [0., 0., 0., ..., 0., 0., 0.],
        ...,
        [0., 0., 0., ..., 0., 0., 0.],
        [0., 0., 0., ..., 0., 0., 0.],
        [0., 0., 0., ..., 0., 0., 0.]], dtype=float32)

In [8]:
np.count_nonzero(corl.causal_matrix)

76

In [10]:
import causalAssembly
import pandas as pd
from causalAssembly.models_dag import ProductionLineGraph

assembly_line_data = ProductionLineGraph.get_data()
assembly_line_data

Unnamed: 0,Station1_mp_0,Station1_mp_1,Station1_mp_2,Station1_mp_3,Station1_mp_4,Station1_mp_5,Station2_mp_6,Station2_mp_7,Station2_mp_8,Station2_mp_9,...,Station5_mp_88,Station5_mp_89,Station5_mp_90,Station5_mp_91,Station5_mp_92,Station5_mp_93,Station5_mp_94,Station5_mp_95,Station5_mp_96,Station5_mp_97
0,0.000177,9.0,3.0,0.000208,14.0,3.0,39581.93,0.003666,57107.03,39943.39,...,4417200.0,0.004736,11261.05,5466.781,0.005314,376.8426,0.000797,5808.374,4394433.0,0.004768
1,0.000185,8.0,2.0,0.000212,14.0,4.0,37468.92,0.003563,57293.53,40331.77,...,4278373.0,0.004930,11155.95,5883.358,0.005334,405.0416,0.000676,5288.613,4215270.0,0.004828
2,0.000178,9.0,3.0,0.000168,13.0,3.0,38030.34,0.003723,54422.99,37161.29,...,5111332.0,0.004867,10984.19,6541.551,0.005104,357.6159,0.000701,4433.027,4686754.0,0.004654
3,0.000217,8.0,3.0,0.000245,17.0,5.0,37374.71,0.003711,58731.69,41450.76,...,4107996.0,0.004949,11296.94,5489.853,0.005473,377.4834,0.000525,5789.147,4946328.0,0.004945
4,0.000236,9.0,3.0,0.000199,13.0,3.0,39270.46,0.003662,52442.64,35021.37,...,4234797.0,0.004887,11195.68,5809.656,0.005359,385.8150,0.000430,5389.874,4426723.0,0.004898
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
15576,0.000185,8.0,2.0,0.000233,16.0,5.0,40699.00,0.003697,56766.72,39306.99,...,4284681.0,0.004768,10945.10,6334.544,0.005453,369.1519,0.000451,4649.006,4335166.0,0.004945
15577,0.000199,7.0,2.0,0.000208,14.0,4.0,35100.19,0.003674,53121.34,35751.98,...,4751879.0,0.004900,11073.92,6044.221,0.005373,394.1465,0.000600,5008.545,4089065.0,0.004898
15578,0.000139,10.0,3.0,0.000262,13.0,5.0,39616.54,0.003646,57208.93,40201.03,...,4327666.0,0.004727,11237.34,5677.633,0.005424,412.7323,0.000525,5545.610,4839991.0,0.004914
15579,0.000193,6.0,3.0,0.000232,15.0,5.0,39881.87,0.003641,56411.03,38993.59,...,4098588.0,0.004896,10999.57,6271.737,0.005346,396.0692,0.000525,4717.582,4290995.0,0.004900


In [16]:
assembly_line = ProductionLineGraph.get_ground_truth()
assembly_line.between_adjacency

Unnamed: 0,Station1_mp_0,Station1_mp_1,Station1_mp_2,Station1_mp_3,Station1_mp_4,Station1_mp_5,Station2_mp_6,Station2_mp_7,Station2_mp_8,Station2_mp_9,...,Station5_mp_88,Station5_mp_89,Station5_mp_90,Station5_mp_91,Station5_mp_92,Station5_mp_93,Station5_mp_94,Station5_mp_95,Station5_mp_96,Station5_mp_97
Station1_mp_0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
Station1_mp_1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
Station1_mp_2,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
Station1_mp_3,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
Station1_mp_4,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
Station5_mp_93,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
Station5_mp_94,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
Station5_mp_95,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
Station5_mp_96,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [27]:
causal = np.asarray(corl.causal_matrix)
assembly_line_adj = assembly_line.graph
assembly_line_adj

<networkx.classes.digraph.DiGraph at 0x2b85feb10>

In [28]:
import cdt.metrics

cdt.metrics.SHD(assembly_line_adj, causal)

np.float64(493.0)

In [29]:
cdt.metrics.SHD(assembly_line_adj, dag_gnn.causal_matrix)

NameError: name 'dag_gnn' is not defined