In [3]:
import numpy as np
import pandas as pd
from lingam import VARLiNGAM
from sklearn.preprocessing import MinMaxScaler

# Load and preprocess the merged dataset
data = pd.read_csv('./merged_threshold_240_scrap.csv')
data = data.drop(columns=['Faulty'])

# Scale the data if necessary
scaler = MinMaxScaler()
data_scaled = scaler.fit_transform(data)
df_scaled = pd.DataFrame(data_scaled, columns=data.columns)

# Initialize and fit the VARMA-LiNGAM model
model = VARLiNGAM(prune=True, random_state=42)
model.fit(df_scaled)



<lingam.var_lingam.VARLiNGAM at 0x7efbf98a4970>

In [13]:
from lingam.utils import make_dot

# Convert columns to a list to avoid ambiguity
labels = list(data.columns)

# Plot the contemporaneous graph
make_dot(adjacency_matrices[0], labels=labels).render("var_lingam_graph", format='pdf')

'var_lingam_graph.pdf'

In [11]:
adjacency_matrices[0]

array([[0.        , 0.        , 0.        , ..., 0.        , 0.        ,
        0.        ],
       [0.        , 0.        , 0.19689691, ..., 0.        , 0.        ,
        0.        ],
       [0.        , 0.        , 0.        , ..., 0.        , 0.        ,
        0.        ],
       ...,
       [0.        , 0.        , 0.        , ..., 0.        , 0.        ,
        0.        ],
       [0.        , 0.        , 0.        , ..., 0.        , 0.        ,
        0.        ],
       [0.        , 0.        , 0.        , ..., 0.        , 0.        ,
        0.        ]])

In [17]:
def prior_knowledge_matrix(columns):
    """
    prior knowledge matrix for LiNGAM where:
    0: no directed path possible (temporal constraint violation)
    1: directed path 
    -1: no prior knowledge (we'll allow the algorithm to determine)
    """
    n_features = len(columns)
    prior_knowledge = np.full((n_features, n_features), -1)
    
    # get station number 
    def get_station_number(col_name):
        return int(col_name.split('_')[0].replace('Station', ''))
    
    # get measurement point number
    def get_mp_number(col_name):
        return int(col_name.split('_')[2])
    
    for i in range(n_features):
        for j in range(n_features):
            station_i = get_station_number(columns[i])
            station_j = get_station_number(columns[j])
            
            # constraint
            if station_i > station_j:
                prior_knowledge[i, j] = 0
            
            # should we allow internal dependencies? 
            # if station_i == station_j:
            #     prior_knowledge[i, j] = -1  # Let LiNGAM determine
    
    # No self loop allowed
    np.fill_diagonal(prior_knowledge, 0)
    
    return prior_knowledge


# Create the prior knowledge matrix
prior_knowledge = prior_knowledge_matrix(data.columns)
prior_knowledge

array([[ 0, -1, -1, ..., -1, -1, -1],
       [-1,  0, -1, ..., -1, -1, -1],
       [-1, -1,  0, ..., -1, -1, -1],
       ...,
       [ 0,  0,  0, ...,  0, -1, -1],
       [ 0,  0,  0, ..., -1,  0, -1],
       [ 0,  0,  0, ..., -1, -1,  0]])

In [18]:
adjusted_adjacency_matrix = np.where(prior_knowledge.T == 0, 0, adjacency_matrices[0])

print("Adjusted Adjacency Matrix:\n", adjusted_adjacency_matrix)

Adjusted Adjacency Matrix:
 [[0.         0.         0.         ... 0.         0.         0.        ]
 [0.         0.         0.19689691 ... 0.         0.         0.        ]
 [0.         0.         0.         ... 0.         0.         0.        ]
 ...
 [0.         0.         0.         ... 0.         0.         0.        ]
 [0.         0.         0.         ... 0.         0.         0.        ]
 [0.         0.         0.         ... 0.         0.         0.        ]]


In [19]:
make_dot(adjusted_adjacency_matrix, labels=labels).render("var_lingam_graph", format='pdf')

'var_lingam_graph.pdf'

In [21]:
adjusted_adjacency_matrix

array([[0.        , 0.        , 0.        , ..., 0.        , 0.        ,
        0.        ],
       [0.        , 0.        , 0.19689691, ..., 0.        , 0.        ,
        0.        ],
       [0.        , 0.        , 0.        , ..., 0.        , 0.        ,
        0.        ],
       ...,
       [0.        , 0.        , 0.        , ..., 0.        , 0.        ,
        0.        ],
       [0.        , 0.        , 0.        , ..., 0.        , 0.        ,
        0.        ],
       [0.        , 0.        , 0.        , ..., 0.        , 0.        ,
        0.        ]])

In [22]:
adjusted_adjacency_matrix_df = pd.DataFrame(adjusted_adjacency_matrix, columns=data.columns, index=data.columns)

In [27]:
adjusted_adjacency_matrix_df.loc["Station5_mp_85"].sort_values(ascending=True)

Station4_mp_61    0.000000
Station4_mp_73    0.000000
Station4_mp_72    0.000000
Station4_mp_71    0.000000
Station4_mp_70    0.000000
                    ...   
Station2_mp_29    0.000000
Station2_mp_28    0.000000
Station2_mp_27    0.000000
Station2_mp_8     0.000000
Station2_mp_13    0.358685
Name: Station5_mp_85, Length: 98, dtype: float64

In [35]:
adjusted_adjacency_matrix_df.T["Station5_mp_85"].sort_values(ascending=True)

Station4_mp_61    0.000000
Station4_mp_73    0.000000
Station4_mp_72    0.000000
Station4_mp_71    0.000000
Station4_mp_70    0.000000
                    ...   
Station2_mp_29    0.000000
Station2_mp_28    0.000000
Station2_mp_27    0.000000
Station2_mp_8     0.000000
Station2_mp_13    0.358685
Name: Station5_mp_85, Length: 98, dtype: float64

In [36]:
adjusted_adjacency_matrix_df.T.to_csv('adj_matrix_var_lag1.csv')

adding lag 3

In [2]:
import numpy as np
import pandas as pd
from lingam import VARLiNGAM
from sklearn.preprocessing import MinMaxScaler

# Load and preprocess the merged dataset
data = pd.read_csv('./merged_threshold_240_scrap.csv')
data = data.drop(columns=['Faulty'])
# Scale the data if necessary
scaler = MinMaxScaler()
data_scaled = scaler.fit_transform(data)
df_scaled = pd.DataFrame(data_scaled, columns=data.columns)

In [3]:
model = VARLiNGAM(lags=3, prune=True, random_state=42)
model.fit(df_scaled)


<lingam.var_lingam.VARLiNGAM at 0x7f6eb8bd10c0>

In [7]:
adjacency_matrices = model.adjacency_matrices_

In [8]:
adjusted_adjacency_matrix = np.where(prior_knowledge.T == 0, 0, adjacency_matrices[0])

print("Adjusted Adjacency Matrix:\n", adjusted_adjacency_matrix)

Adjusted Adjacency Matrix:
 [[0.         0.         0.         ... 0.         0.         0.        ]
 [0.         0.         0.19663143 ... 0.         0.         0.        ]
 [0.         0.         0.         ... 0.         0.         0.        ]
 ...
 [0.         0.         0.         ... 0.         0.         0.        ]
 [0.         0.         0.         ... 0.         0.         0.        ]
 [0.         0.         0.         ... 0.         0.         0.        ]]


In [9]:
from lingam.utils import make_dot

# Convert columns to a list to avoid ambiguity
labels = list(data.columns)

def prior_knowledge_matrix(columns):
    """
    prior knowledge matrix for LiNGAM where:
    0: no directed path possible (temporal constraint violation)
    1: directed path 
    -1: no prior knowledge (we'll allow the algorithm to determine)
    """
    n_features = len(columns)
    prior_knowledge = np.full((n_features, n_features), -1)
    
    # get station number 
    def get_station_number(col_name):
        return int(col_name.split('_')[0].replace('Station', ''))
    
    # get measurement point number
    def get_mp_number(col_name):
        return int(col_name.split('_')[2])
    
    for i in range(n_features):
        for j in range(n_features):
            station_i = get_station_number(columns[i])
            station_j = get_station_number(columns[j])
            
            # constraint
            if station_i > station_j:
                prior_knowledge[i, j] = 0
            
            # should we allow internal dependencies? 
            # if station_i == station_j:
            #     prior_knowledge[i, j] = -1  # Let LiNGAM determine
    
    # No self loop allowed
    np.fill_diagonal(prior_knowledge, 0)
    
    return prior_knowledge


# Create the prior knowledge matrix
prior_knowledge = prior_knowledge_matrix(data.columns)
prior_knowledge

adjusted_adjacency_matrix = np.where(prior_knowledge.T == 0, 0, adjacency_matrices[0])

print("Adjusted Adjacency Matrix:\n", adjusted_adjacency_matrix)

make_dot(adjusted_adjacency_matrix, labels=labels).render("var_lingam_graph_lag3", format='pdf')

Adjusted Adjacency Matrix:
 [[0.         0.         0.         ... 0.         0.         0.        ]
 [0.         0.         0.19663143 ... 0.         0.         0.        ]
 [0.         0.         0.         ... 0.         0.         0.        ]
 ...
 [0.         0.         0.         ... 0.         0.         0.        ]
 [0.         0.         0.         ... 0.         0.         0.        ]
 [0.         0.         0.         ... 0.         0.         0.        ]]


'var_lingam_graph_lag3.pdf'

In [12]:
adjusted_adjacency_matrix_df = pd.DataFrame(adjusted_adjacency_matrix, columns=data.columns, index=data.columns)

In [14]:
adjusted_adjacency_matrix_df.T.to_csv('adj_matrix_var_lag3.csv')

ADDING LAG 5

In [1]:
import numpy as np
import pandas as pd
from lingam import VARLiNGAM
from sklearn.preprocessing import MinMaxScaler

# Load and preprocess the merged dataset
data = pd.read_csv('./merged_threshold_240_scrap.csv')
data = data.drop(columns=['Faulty'])
# Scale the data if necessary
scaler = MinMaxScaler()
data_scaled = scaler.fit_transform(data)
df_scaled = pd.DataFrame(data_scaled, columns=data.columns)

model = VARLiNGAM(lags=5, prune=True, random_state=42)
model.fit(df_scaled)


<lingam.var_lingam.VARLiNGAM at 0x7f6df5b87100>