In [None]:
import numpy as np 
import pandas as pd 
import os
import pickle
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, precision_score, recall_score

In [None]:
alarm1 = pd.read_csv('/kaggle/input/causal-structure-learning-from-event-sequences/datasets/dataset_1/alarm.csv')
causal1 = np.load('/kaggle/input/causal-structure-learning-from-event-sequences/datasets/dataset_1/causal_prior.npy' , allow_pickle = True)

alarm2 = pd.read_csv('/kaggle/input/causal-structure-learning-from-event-sequences/datasets/dataset_2/alarm.csv')
causal2 = np.load('/kaggle/input/causal-structure-learning-from-event-sequences/datasets/dataset_2/causal_prior.npy' , allow_pickle = True)

alarm3 = pd.read_csv('/kaggle/input/causal-structure-learning-from-event-sequences/datasets/dataset_3/alarm.csv')
causal3 = np.load('/kaggle/input/causal-structure-learning-from-event-sequences/datasets/dataset_3/causal_prior.npy' , allow_pickle = True)

alarm4 = pd.read_csv('/kaggle/input/causal-structure-learning-from-event-sequences/datasets/dataset_4/alarm.csv')
causal4 = np.load('/kaggle/input/causal-structure-learning-from-event-sequences/datasets/dataset_4/causal_prior.npy' , allow_pickle = True)

rca1 = pd.read_csv('/kaggle/input/causal-structure-learning-from-event-sequences/datasets/dataset_1/rca_prior.csv')
topology1 = np.load('/kaggle/input/causal-structure-learning-from-event-sequences/datasets/dataset_1/topology.npy' , allow_pickle = True)
rca2 = pd.read_csv('/kaggle/input/causal-structure-learning-from-event-sequences/datasets/dataset_2/rca_prior.csv')
topology2 = np.load('/kaggle/input/causal-structure-learning-from-event-sequences/datasets/dataset_2/topology.npy' , allow_pickle = True)
rca3 = pd.read_csv('/kaggle/input/causal-structure-learning-from-event-sequences/datasets/dataset_3/rca_prior.csv')
topology3 = np.load('/kaggle/input/causal-structure-learning-from-event-sequences/datasets/dataset_3/topology.npy' , allow_pickle = True)

In [None]:
class MarkovModel:
    def __init__(self, y):
        self.transition_matrix = np.zeros_like(y, dtype=float)
        self.y = y
        self.time_window = 300

    def train(self, X):
        alarm_to_node = {alarm: index for index, alarm in enumerate(sorted(np.unique(X['alarm_id'])))}

        for i, row in X.iterrows():
            current_alarm = row['alarm_id']
            current_time = row['start_timestamp']
            current_node = alarm_to_node[current_alarm]

            # Identify potential alarms that could be caused by the current alarm
            potential_cause_nodes = np.where(self.y[current_node, :] != -1)[0]

            future_alarms = X[(X['start_timestamp'] > current_time) & 
                              (X['start_timestamp'] <= current_time + self.time_window)]

            for _, future_row in future_alarms.iterrows():
                next_alarm = future_row['alarm_id']
                next_node = alarm_to_node[next_alarm]

                if next_node in potential_cause_nodes:  # Only consider causally related alarms
                    self.transition_matrix[current_node, next_node] += 1

        # Normalize
        row_sums = self.transition_matrix.sum(axis=1, keepdims=True)
        row_sums[row_sums == 0] = 1  # Avoid division by zero
        self.transition_matrix /= row_sums

        
    def predict_matrix(self, X_test):
        # Get unique and sorted alarms from X_test
        unique_sorted_alarms = sorted(np.unique(X_test['alarm_id']))
        alarm_to_node_test = {alarm: index for index, alarm in enumerate(unique_sorted_alarms)}

        # Initialize predicted_matrix
        blank_y_pred = [[-1 for _ in unique_sorted_alarms] for _ in unique_sorted_alarms]
        predicted_matrix = np.array(blank_y_pred)

        for alarm1 in unique_sorted_alarms:
            if alarm1 not in self.alarm_to_node:
                continue
            i = self.alarm_to_node[alarm1]
            i_test = alarm_to_node_test[alarm1]

            for alarm2 in unique_sorted_alarms:
                if alarm2 not in self.alarm_to_node:
                    continue
                j = self.alarm_to_node[alarm2]
                j_test = alarm_to_node_test[alarm2]

                if self.transition_matrix[i, j] > 0:  # If observed in training data, predict a connection
                    predicted_matrix[i_test, j_test] = 1

        return predicted_matrix



    def evaluate(self, y_test):
        pass
    
    
    def fine_tune(self):
        pass
    
    def deploy(self):
        pass

In [None]:
def create_features(dataset_list):
    new_dataset_list = []
    for alarm, causal in dataset_list:
        
        alarm['duration'] = alarm['end_timestamp'] - alarm['start_timestamp']
        alarm_one_hot = pd.get_dummies(alarm['alarm_id'], prefix='alarm')
        device_one_hot = pd.get_dummies(alarm['device_id'], prefix='device')

        # Time-windowed aggregations - Likely unnecessary as handled in training.
        alarm['count_last_5min'] = alarm.groupby('device_id')['alarm_id'].transform(lambda x: x.rolling(window=300, min_periods=0).count()).astype(int)
        alarm['count_last_3min'] = alarm.groupby('device_id')['alarm_id'].transform(lambda x: x.rolling(window=180, min_periods=0).count()).astype(int)

        # Alarm sequences
        alarm['last_alarm'] = alarm.groupby('device_id')['alarm_id'].shift().fillna(-1).astype(int)  # Fill NaN with -1

        # Elapsed time
        alarm['time_since_last_alarm'] = alarm.groupby('device_id')['start_timestamp'].diff().fillna(-1).astype(int)  # Fill NaN with -1
        alarm = pd.concat([alarm, alarm_one_hot, device_one_hot], axis=1)
        new_dataset_list.append((alarm, causal))
        
    return new_dataset_list

In [None]:
def train_model(X_train, y_train):
    # Model architecture
    # Model training
    return trained_model


In [None]:

def evaluate_model(trained_model, X_test, y_test):
    # Calculate metrics
    return accuracy, precision, recall


In [None]:
def fine_tune_model(trained_model):
    # Hyperparameter tuning
    return fine_tuned_model


In [None]:
def deploy_model(fine_tuned_model, new_data):
    # Make predictions
    return root_causes


In [None]:
dataset_list = [(alarm1, causal1), (alarm2, causal2), (alarm3, causal3), (alarm4, causal4)]
dataset_list = create_features(dataset_list)

In [None]:
counter = 0
dataset_dict = {}

for i in range(4):
    for j in range(4):
        if i != j:
            X_train, y_train = dataset_list[i]
            X_test, y_test = dataset_list[j]
            
            dataset_dict[f'dataset_{counter}'] = {
                'X_train': X_train,
                'y_train': y_train,
                'X_test': X_test,
                'y_test': y_test
            }
            
            counter += 1



In [None]:
def dont_run():# Create an instance of the MarkovModel
    markov_model = MarkovModel(num_nodes)

    # Train the model using training data
    X_train, y_train = dataset_dict['dataset_0']['X_train'], dataset_dict['dataset_0']['y_train']
    markov_model.train(X_train, y_train)

    # Make predictions using test data
    X_test = dataset_dict['dataset_1']['X_test']
    predictions = markov_model.predict(X_test)

In [None]:
alarm1


In [None]:
for num, row in enumerate(causal1):
    print(num, row)


In [None]:
for num, row in enumerate(topology1):
    print(num, row)