In [2]:
import numpy as np
import pandas as pd
from sklearn.neural_network import MLPClassifier
from sklearn.metrics import classification_report, accuracy_score
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, LabelEncoder
from pgmpy.models import BayesianModel
from pgmpy.factors.discrete import TabularCPD
from pgmpy.inference import VariableElimination

ModuleNotFoundError: No module named 'pgmpy'

In [None]:
class DataPreprocessor:
    def __init__(self, filepath):
        self.filepath = filepath
        self.label_encoder = LabelEncoder()

    def load_data(self):
        self.data = pd.read_excel(self.filepath, header=0)
        self.features = self.data.iloc[:, :-1]
        self.labels = self.data.iloc[:, -1]

    def clean_data(self):
        self.features = self.features.apply(pd.to_numeric, errors='coerce')
        self.features = self.features.fillna(self.features.mean())

    def encode_labels(self):
        self.labels = pd.to_numeric(self.labels, errors='coerce')
        valid_indices = self.labels.dropna().index
        self.labels = self.labels.loc[valid_indices].astype(int)
        self.features = self.features.loc[valid_indices]
        self.labels = self.label_encoder.fit_transform(self.labels)

    def scale_features(self):
        scaler = StandardScaler()
        self.features = scaler.fit_transform(self.features)

    def split_data(self):
        return train_test_split(self.features, self.labels, test_size=0.2, random_state=42)

    def preprocess(self):
        self.load_data()
        self.clean_data()
        self.encode_labels()
        self.scale_features()
        return self.split_data()

In [3]:
class MLPFaultModel:
    def __init__(self, num_inputs):
        self.model = MLPClassifier(hidden_layer_sizes=(100, 50), max_iter=1500, alpha=0.001, random_state=1, learning_rate_init=0.001)

    def train(self, X_train, y_train):
        self.model.fit(X_train, y_train)

    def evaluate(self, X_test, y_test):
        predictions = self.model.predict(X_test)
        accuracy = accuracy_score(y_test, predictions)
        report = classification_report(y_test, predictions)
        return accuracy, report

    def predict_proba(self, X):
        return self.model.predict_proba(X)

In [4]:
class MLPFaultModelManager:
    def __init__(self, input_dim, num_conditions):
        self.models = {condition: MLPFaultModel(input_dim) for condition in range(num_conditions)}

    def train_models(self, X_train, y_train):
        for condition, model in self.models.items():
            binary_labels = (y_train == condition).astype(int)
            model.train(X_train, binary_labels)

    def evaluate_models(self, X_test, y_test):
        evaluations = {}
        for condition, model in self.models.items():
            binary_labels = (y_test == condition).astype(int)
            accuracy, report = model.evaluate(X_test, binary_labels)
            evaluations[condition] = {'accuracy': accuracy, 'report': report}
        return evaluations


In [5]:
class ResidualCalculator:
    def __init__(self, models):
        self.models = models

    def calculate_residuals(self, X, y):
        residuals = {}
        for condition, model in self.models.items():
            probabilities = model.predict_proba(X)[:, 1]
            actual = (y == condition).astype(int)
            residuals[condition] = np.abs(probabilities - actual)
        return residuals

In [6]:
class DynamicBayesianNetwork:
    def __init__(self, conditions):
        self.conditions = conditions
        self.model = BayesianModel()
        self.setup_model()
        self.inference = VariableElimination(self.model)

    def setup_model(self):
        condition_nodes = ['Condition_' + str(cond) for cond in self.conditions]
        self.model.add_nodes_from(['State'] + condition_nodes)
        self.model.add_edges_from([('State', node) for node in condition_nodes])
        cpd_state = TabularCPD(variable='State', variable_card=2, values=[[0.5], [0.5]])
        self.model.add_cpds(cpd_state)
        for node in condition_nodes:
            cpd = TabularCPD(variable=node, variable_card=2, values=[[0.95, 0.05], [0.05, 0.95]], evidence=['State'], evidence_card=[2])
            self.model.add_cpds(cpd)
        assert self.model.check_model(), "Model configuration errors"

    def integrate_with_dbn(self, evidence):
        return self.inference.query(variables=['State'], evidence=evidence)

In [7]:
import pandas as pd

class ModelEvaluator:
    def __init__(self, models):
        self.models = models

    def evaluate_models(self, X_test, y_test):
        evaluations = {}
        for condition, model in self.models.items():
            accuracy, report = model.evaluate(X_test, (y_test == condition).astype(int))
            evaluations[condition] = {'accuracy': accuracy, 'report': report}
        return evaluations

    def predict_random_samples(self, X_test, y_test, n_samples=500):
        indices = np.random.choice(len(X_test), size=n_samples, replace=False)
        predictions = [np.argmax([model.predict_proba(X_test[idx:idx+1])[:, 1] for model in self.models.values()], axis=0) for idx in indices]
        actuals = y_test[indices]
        accuracy = accuracy_score(actuals, predictions)
        # Create DataFrame for better visualization
        results_df = pd.DataFrame({
            'Sample Index': indices,
            'Predicted Class': predictions,
            'Actual Class': actuals
        })
        return results_df, accuracy

In [8]:
def main():
    filepath = r'C:\Users\SIDDHARTH SINGH\Downloads\Untitled Folder\Merged_dataset.xlsx'
    data_preprocessor = DataPreprocessor(filepath)
    X_train, X_test, y_train, y_test = data_preprocessor.preprocess()

    num_conditions = len(np.unique(y_train))
    model_manager = MLPFaultModelManager(X_train.shape[1], num_conditions)
    model_manager.train_models(X_train, y_train)

    # Evaluate models before feedback
    evaluator = ModelEvaluator(model_manager.models)
    initial_evaluations = evaluator.evaluate_models(X_test, y_test)
    print("Initial Model Evaluations:")
    for condition, eval in initial_evaluations.items():
        print(f"Condition {condition}: Accuracy = {eval['accuracy']:.2%}")
        print(eval['report'])

    residual_calculator = ResidualCalculator(model_manager.models)
    residuals = residual_calculator.calculate_residuals(X_test, y_test)

    # Dynamic Bayesian Network feedback
    dbn = DynamicBayesianNetwork(range(num_conditions))
    feedback_evidence = {f'Condition_{i}': residuals[i].mean() > 0.5 for i in range(num_conditions)}
    feedback_results = dbn.integrate_with_dbn(feedback_evidence)
    state_prob = feedback_results.values
    print("Feedback from DBN:", feedback_results)

    for condition in range(num_conditions):
        if state_prob[1] > 0.5:  # Assuming the state '1' indicates a fault or need for re-training
            print(f"Re-training model for condition {condition} due to high probability of system fault.")
            y_condition = (y_train == condition).astype(int)
            model_manager.models[condition].train(X_train, y_condition)

    final_evaluations = evaluator.evaluate_models(X_test, y_test)
    print("Final Model Evaluations after DBN Feedback:")
    for condition, eval in final_evaluations.items():
        print(f"Condition {condition}: Accuracy = {eval['accuracy']:.2%}")
        print(eval['report'])

    # Evaluate on random samples
    random_sample_results, accuracy = evaluator.predict_random_samples(X_test, y_test, 500)
    print("\nRandom Sample Evaluations (500 samples):")
    print(random_sample_results.to_string(index=False))
    print(f"\nAccuracy of 500 samples: {accuracy:.2%}")

if __name__ == "__main__":
    main()



Initial Model Evaluations:
Condition 0: Accuracy = 100.00%
              precision    recall  f1-score   support

           0       1.00      1.00      1.00      1493
           1       1.00      1.00      1.00       108

    accuracy                           1.00      1601
   macro avg       1.00      1.00      1.00      1601
weighted avg       1.00      1.00      1.00      1601

Condition 1: Accuracy = 100.00%
              precision    recall  f1-score   support

           0       1.00      1.00      1.00      1506
           1       1.00      1.00      1.00        95

    accuracy                           1.00      1601
   macro avg       1.00      1.00      1.00      1601
weighted avg       1.00      1.00      1.00      1601

Condition 2: Accuracy = 100.00%
              precision    recall  f1-score   support

           0       1.00      1.00      1.00      1487
           1       1.00      1.00      1.00       114

    accuracy                           1.00      1601
   ma


Random Sample Evaluations (500 samples):
 Sample Index Predicted Class  Actual Class
          165             [9]             3
          446             [7]             7
          572             [1]             1
          776            [13]            13
         1330             [0]             0
          196             [8]             8
          239             [2]             2
          787            [10]            10
         1032             [4]             4
          747            [13]            13
          899             [4]             4
          961             [9]             9
         1007             [5]             5
          616             [2]             2
          148             [5]             5
         1405             [8]             8
          312             [4]             4
          422            [13]            13
          217            [10]            10
          769            [11]            11
         1345             [8]     