In [16]:
import numpy as np 
import pandas as pd 
import os
import pickle
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, precision_score, recall_score

In [None]:
def g_score(true_positives, false_positives, false_negatives):
    return max(0, true_positives - false_positives) / (true_positives + false_negatives + 1e-7)  # Added epsilon to avoid division by zero

def rank_score(g_scores):
    return sum(1**g for g in g_scores)


In [17]:
class GCNLayer:
    def __init__(self, input_dim, output_dim):
        self.weights = initialize_weights(input_dim, output_dim)
        self.grads = np.zeros_like(self.weights)

    def relu(self, x):
        return np.maximum(0, x)

    def sigmoid(self, x):
        return 1 / (1 + np.exp(-x))

    def forward(self, A, X):
        return self.relu(A @ X @ self.weights)
    
    def backward(self, d_loss):
        # Compute gradients using the chain rule
        self.grads = d_loss  # Update this as per your specific loss and activation
        return self.grads

class GCN:
    def __init__(self, input_dim, hidden_dim, output_dim):
        self.gcn1 = GCNLayer(input_dim, hidden_dim)
        self.gcn2 = GCNLayer(hidden_dim, output_dim)

    def forward(self, A, X):
        self.H1 = self.gcn1.forward(A, X)
        self.H2 = self.gcn2.forward(A, self.H1)
        return self.H2

    def backward(self, d_loss):
        # Backward pass for the second layer
        d_loss2 = self.gcn2.backward(d_loss)
        
        # Backward pass for the first layer
        d_loss1 = self.gcn1.backward(d_loss2)
        
    def update_weights(self, learning_rate):
        self.gcn1.weights -= learning_rate * self.gcn1.grads
        self.gcn2.weights -= learning_rate * self.gcn2.grads


In [18]:
def create_features(dataset_list):
    new_dataset_list = []
    for alarm, causal in dataset_list:
        
        alarm['duration'] = alarm['end_timestamp'] - alarm['start_timestamp']
        alarm_one_hot = pd.get_dummies(alarm['alarm_id'], prefix='alarm')
        device_one_hot = pd.get_dummies(alarm['device_id'], prefix='device')

        # Time-windowed aggregations - Likely unnecessary as handled in training.
        alarm['count_last_5min'] = alarm.groupby('device_id')['alarm_id'].transform(lambda x: x.rolling(window=300, min_periods=0).count()).astype(int)
        alarm['count_last_3min'] = alarm.groupby('device_id')['alarm_id'].transform(lambda x: x.rolling(window=180, min_periods=0).count()).astype(int)

        # Alarm sequences
        alarm['last_alarm'] = alarm.groupby('device_id')['alarm_id'].shift().fillna(-1).astype(int)  # Fill NaN with -1

        # Elapsed time
        alarm['time_since_last_alarm'] = alarm.groupby('device_id')['start_timestamp'].diff().fillna(-1).astype(int)  # Fill NaN with -1
        alarm = pd.concat([alarm, alarm_one_hot, device_one_hot], axis=1)
        flattened_causal = causal.flatten()
        new_dataset_list.append((alarm, flattened_causal))
        
    return new_dataset_list

In [19]:
alarm1 = pd.read_csv('/kaggle/input/causal-structure-learning-from-event-sequences/datasets/dataset_1/alarm.csv')
causal1 = np.load('/kaggle/input/causal-structure-learning-from-event-sequences/datasets/dataset_1/causal_prior.npy' , allow_pickle = True)

alarm2 = pd.read_csv('/kaggle/input/causal-structure-learning-from-event-sequences/datasets/dataset_2/alarm.csv')
causal2 = np.load('/kaggle/input/causal-structure-learning-from-event-sequences/datasets/dataset_2/causal_prior.npy' , allow_pickle = True)

alarm3 = pd.read_csv('/kaggle/input/causal-structure-learning-from-event-sequences/datasets/dataset_3/alarm.csv')
causal3 = np.load('/kaggle/input/causal-structure-learning-from-event-sequences/datasets/dataset_3/causal_prior.npy' , allow_pickle = True)

alarm4 = pd.read_csv('/kaggle/input/causal-structure-learning-from-event-sequences/datasets/dataset_4/alarm.csv')
causal4 = np.load('/kaggle/input/causal-structure-learning-from-event-sequences/datasets/dataset_4/causal_prior.npy' , allow_pickle = True)

rca1 = pd.read_csv('/kaggle/input/causal-structure-learning-from-event-sequences/datasets/dataset_1/rca_prior.csv')
topology1 = np.load('/kaggle/input/causal-structure-learning-from-event-sequences/datasets/dataset_1/topology.npy' , allow_pickle = True)
rca2 = pd.read_csv('/kaggle/input/causal-structure-learning-from-event-sequences/datasets/dataset_2/rca_prior.csv')
topology2 = np.load('/kaggle/input/causal-structure-learning-from-event-sequences/datasets/dataset_2/topology.npy' , allow_pickle = True)
rca3 = pd.read_csv('/kaggle/input/causal-structure-learning-from-event-sequences/datasets/dataset_3/rca_prior.csv')
topology3 = np.load('/kaggle/input/causal-structure-learning-from-event-sequences/datasets/dataset_3/topology.npy' , allow_pickle = True)

In [35]:
test = causal1.flatten()
print(test.shape)

(1521,)


In [37]:
dataset_list = [(alarm1, causal1), (alarm2, causal2), (alarm3, causal3), (alarm4, causal4)]

dataset_list = create_features(dataset_list)
for al, ca in dataset_list:
    print(al.shape)
    print(ca.shape)

(141853, 83)
(1521,)
(132688, 100)
(2401,)
(198962, 79)
(961,)
(126803, 139)
(900,)


In [21]:
counter = 0
dataset_dict = {}

for i in range(4):
    for j in range(4):
        if i != j:
            X_train, y_train = dataset_list[i]
            X_test, y_test = dataset_list[j]
            
            dataset_dict[f'dataset_{counter}'] = {
                'X_train': X_train,
                'y_train': y_train,
                'X_test': X_test,
                'y_test': y_test
            }
            
            counter += 1



In [22]:
def dont_run():# Create an instance of the MarkovModel
    markov_model = MarkovModel(num_nodes)

    # Train the model using training data
    X_train, y_train = dataset_dict['dataset_0']['X_train'], dataset_dict['dataset_0']['y_train']
    markov_model.train(X_train, y_train)

    # Make predictions using test data
    X_test = dataset_dict['dataset_1']['X_test']
    predictions = markov_model.predict(X_test)

In [23]:
for al, ca in dataset_list:
    print(al.shape)
    print(ca.shape)

(141853, 83)
(1521,)
(132688, 100)
(2401,)
(198962, 79)
(961,)
(126803, 139)
(900,)
