<a href="https://colab.research.google.com/github/HamzaAhmed78629/MSc-Thesis-Proposed-Product/blob/main/Deep_Learning_Framework_Implementation.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
# Installing the required libraries
!pip install pandas scikit-learn tensorflow python-docx ipywidgets
!pip install python-docx

Collecting python-docx
  Downloading python_docx-1.1.2-py3-none-any.whl.metadata (2.0 kB)
Collecting jedi>=0.16 (from ipython>=4.0.0->ipywidgets)
  Using cached jedi-0.19.1-py2.py3-none-any.whl.metadata (22 kB)
Downloading python_docx-1.1.2-py3-none-any.whl (244 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m244.3/244.3 kB[0m [31m4.1 MB/s[0m eta [36m0:00:00[0m
[?25hUsing cached jedi-0.19.1-py2.py3-none-any.whl (1.6 MB)
Installing collected packages: python-docx, jedi
Successfully installed jedi-0.19.1 python-docx-1.1.2


In [None]:
# Mounting Google Drive
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
import pandas as pd
import numpy as np
import tensorflow as tf
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, OneHotEncoder
from sklearn.preprocessing import MinMaxScaler
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense, Conv1D, Flatten, MaxPooling1D, Dropout
from docx import Document
pd.set_option("display.max_columns", None)
import matplotlib.pyplot as plt
%matplotlib inline
import seaborn as sns
pd.set_option('display.max_rows', None)
from datetime import datetime
import ipywidgets as widgets
from IPython.display import display, clear_output
import random
import warnings

warnings.filterwarnings("ignore")

# Widgets for interaction
load_button = widgets.Button(description="Load Data", button_style='success')
#train_button = widgets.Button(description="Train Models", button_style='info')
#detect_button = widgets.Button(description="Detect Threats", button_style='warning')
check_compliance_button = widgets.Button(description="Check Compliance", button_style='primary')
adjust_button = widgets.Button(description="Adjust Policies", button_style='danger')
enforce_button = widgets.Button(description="Enforce Policies", button_style='warning')

# Outputing widgets to display results
output = widgets.Output()

# Displaying the interface i.e. buttons
display(widgets.VBox([load_button, check_compliance_button, adjust_button, enforce_button, output]))

# Loading and preprocessing the data
def load_data():
    print("Loading data.....")

    # Loading the dataset
    global data, X_preprocessed, Y, X_train, X_test, y_train, y_test
    # Loading the dataset
    data = pd.read_csv("/content/drive/MyDrive/HomeC.csv")

    # Handling missing values
    data = data.dropna()  # Ensure no missing values remain
    data = data[:-1]  # Remove any last row edge case

    # Converting timestamp to datetime and set as index
    data['time'] = pd.to_datetime(data['time'], unit='s')
    data['time'] = pd.DatetimeIndex(pd.date_range('2016-01-01 05:00', periods=len(data), freq='min'))
    data = data.set_index('time')

    # Separating features and target
    X = data.drop(columns=['use [kW]'])  # Features
    Y = data['use [kW]']  # Target

    # Converting 'cloudCover' to numeric and fill NaN with mean to avoid NaN
    X['cloudCover'] = pd.to_numeric(X['cloudCover'], errors='coerce')
    X['cloudCover'].fillna(X['cloudCover'].mean(), inplace=True)

    # Handling any remaining NaN values in the data
    X.fillna(0, inplace=True)

    # Identifying numeric and categorical columns
    numeric_features = X.select_dtypes(include=['float64', 'int64']).columns
    categorical_features = ['icon', 'summary']

    # Preprocessor: Scaling numeric data and encoding categorical data
    preprocessor = ColumnTransformer(
        transformers=[
            ('num', StandardScaler(), numeric_features),
            ('cat', OneHotEncoder(handle_unknown='ignore'), categorical_features)
        ], remainder='passthrough')

    # Applying preprocessing to the dataset
    X_preprocessed = preprocessor.fit_transform(X)

    # Additional Scaling
    scaler = MinMaxScaler()
    X_preprocessed = scaler.fit_transform(X_preprocessed)

    print("Data is loaded and preprocessed successfully.")

    # Train-test split
    X_train, X_test, y_train, y_test = train_test_split(X_preprocessed, Y, test_size=0.3, random_state=42)
    print(f"Train-test split done successfully")

    return X_train, X_test, y_train, y_test

# Attaching the function to the load button
load_button.on_click(load_data())

size = int(len(data)*0.3)
train = data[:size]
test = data[size:]
print('Number of points in this IoT applications dataset:', len(data))
print('Number of points in train:', len(train))
print('Number of points in test:', len(test))
data.info()
data.head()

# Train-test split
X_train, X_test, y_train, y_test = train_test_split(X_preprocessed, Y, test_size=0.3, random_state=42)

# Printing statement of testing and training data is split
print("\033[1m" + "IoT data after Splitting the Testing and Training data")

# Outputing the shapes of the training and testing sets
print("Training set shape:")
print(f"X_train: {X_train.shape}")
print(f"y_train: {y_train.shape}")

print("\nTesting set shape:")
print(f"X_test: {X_test.shape}")
print(f"y_test: {y_test.shape}")

print("\033[0m")

# Printing statement of testing and training data is split
print("\033[1m" + "IoT data after Splitting the Testing and Training data")

# Reshaping data for CNN and LSTM
def reshape_data(X_train, X_test):
    X_train_reshaped = X_train.reshape((X_train.shape[0], X_train.shape[1], 1))
    X_test_reshaped = X_test.reshape((X_test.shape[0], X_test.shape[1], 1))
    return X_train_reshaped, X_test_reshaped

# Defining and Training the Convolutional Neural Network Model
def train_cnn_model(X_train, y_train, X_test, y_test):
    print("Training CNN Model...")
    cnn_model = Sequential([
        Conv1D(32, 3, activation='relu', input_shape=(X_train.shape[1], 1)),
        MaxPooling1D(2),
        Conv1D(64, 3, activation='relu'),
        MaxPooling1D(2),
        Flatten(),
        Dense(64, activation='relu'),
        Dense(1, activation='linear')  # Changed to linear for regression
    ])
    optimizer = tf.keras.optimizers.Adam(learning_rate=0.0001)
    cnn_model.compile(optimizer='adam', loss='mean_squared_error', metrics=['mse'])  # Using MSE for regression
    early_stopping = tf.keras.callbacks.EarlyStopping(monitor='val_loss', patience=5, restore_best_weights=True)
    cnn_model.fit(X_train, y_train, epochs=5, batch_size=32, validation_data=(X_test, y_test), callbacks=[early_stopping], verbose=1)
    return cnn_model

# Defining and Training the Long-Short Term Memory Model
def train_lstm_model(X_train, y_train, X_test, y_test):
    print("Training LSTM Model...")
    lstm_model = Sequential([
        LSTM(50, return_sequences=True, input_shape=(X_train.shape[1], 1)),
        Dropout(0.2),
        LSTM(50, return_sequences=False),
        Dropout(0.2),
        Dense(1, activation='linear')  # Changed to linear for regression
    ])
    optimizer = tf.keras.optimizers.Adam(learning_rate=0.0001)
    lstm_model.compile(optimizer='adam', loss='mean_squared_error', metrics=['mse'])  # Using MSE for regression
    early_stopping = tf.keras.callbacks.EarlyStopping(monitor='val_loss', patience=5, restore_best_weights=True)
    lstm_model.fit(X_train, y_train, epochs=2, batch_size=32, validation_data=(X_test, y_test), callbacks=[early_stopping], verbose=1)
    return lstm_model

# Detecting threats using CNN and LSTM models
def detect_threats(cnn_model, lstm_model, X_test, threshold=0.5):
    print("Detecting threats...")
    cnn_predictions = cnn_model.predict(X_test)
    lstm_predictions = lstm_model.predict(X_test)

    final_predictions = (cnn_predictions + lstm_predictions) / 2
    threats = final_predictions > threshold
    print("Final predictions generated and threat detection completed")
    return threats, final_predictions

# Compliance checking against regulatory compliance (GDRP, CCPA, NIST)
def check_compliance(threats, threshold=0.5):
    compliant = threats < threshold
    return compliant

def evaluate_against_gdpr(threats):
    return threats < 0.2

def evaluate_against_ccpa(threats):
    return threats < 0.3

def evaluate_against_nist(threats):
    return threats < 0.4

# Compliance checking against regulatory compliance (GDPR, CCPA, NIST)
def regulatory_compliance_check(threats, predictions):
    general_compliance = predictions < 0.7
    gdpr_compliance = predictions < 0.6
    ccpa_compliance = predictions < 0.5
    nist_compliance = predictions < 0.4
    overall_compliance = general_compliance & gdpr_compliance & ccpa_compliance & nist_compliance
    return {
        "General Compliance": general_compliance,
        "GDPR Compliance": gdpr_compliance,
        "CCPA Compliance": ccpa_compliance,
        "NIST Compliance": nist_compliance,
        "Overall Compliance": overall_compliance
    }

def process_compliance_results(compliance_results):
    for standard, result in compliance_results.items():
        compliant_count = np.sum(result)
        total_count = len(result)
        compliance_percentage = (compliant_count / total_count) * 100
        print(f"{standard}: {compliance_percentage:.2f}% compliant")

def on_check_compliance_button_clicked(b):
    with output:
        output.clear_output()
        global compliance_results
        # Compliance check
        threats, predictions = detect_threats(cnn_model, lstm_model, X_test_reshaped)
        compliance_results = regulatory_compliance_check(threats, predictions)
        process_compliance_results(compliance_results)
        #print(f"Compliance Check Results: {compliance_results}")

check_compliance_button.on_click(on_check_compliance_button_clicked)

# Load, Adjust, and Save Policies
def load_policies(doc_path):
    doc = Document(doc_path)
    policies = {}
    for para in doc.paragraphs:
        if para.text and ":" in para.text:
            key_value = para.text.split(":", 1)
            if len(key_value) == 2:
                key, value = key_value
                policies[key.strip()] = value.strip()
            else:
                policies[key_value[0].strip()] = ""
        elif para.text:# Handles paragraphs without a colon
              policies[para.text.strip()] = ""
              #print(policies)
    return policies

def save_policies(policies, doc_path):
    doc = Document()
    doc.add_heading("Adjusted Policies", level=1)
    for key, value in policies.items():
        doc.add_paragraph(f"{key}: {value}")
    doc.save(doc_path)

def adjust_policies(policies, compliance_results):
    adjusted_policies = {}
    for i, (policy, compliant) in enumerate(zip(policies.items(), compliance_results)):
        key, value = policy
        if not compliant:
            adjusted_policies[key] = f"Adjust Policy: {value} - Action Required"
        else:
            adjusted_policies[key] = f"{value} - No Policy Adjustment Needed."
    save_policies(adjusted_policies, "/content/drive/MyDrive/AdjustedPolicies.docx")
    return adjusted_policies

# Each policy on a new line
def print_policies(policies, title="Policies"):
    print(f"{title}:")
    for key, value in policies.items():
        print(f"  {key}: {value}")
    print()  # New line for better separation

# Enforcing policies
def enforce_policies(policies):
    for policy, action in policies.items():
        if "Adjust Policy" in action:
            print(f"Enforcing Policy: {action}...")
        else:
            print(f"{policy}: No adjustment needed. Data is processed in compliance.")

# Alert mechanism
def send_alert(message):
    print(f"ALERT: {message}")

# Main function to run everything
def main():
    X_train, X_test, y_train, y_test = load_data()
    # Printing statement of testing and training data is split
    print("\033[1m" + "IoT data points after Splitting the Testing and Training data")
    print("Training set shape:")
    print(f"X_train: {X_train.shape}")
    print(f"y_train: {y_train.shape}")
    print("\nTesting set shape:")
    print(f"X_test: {X_test.shape}")
    print(f"y_test: {y_test.shape}")
    # Reshapeing data for CNN and LSTM
    X_train_reshaped, X_test_reshaped = reshape_data(X_train, X_test)
    # Training the models
    cnn_model = train_cnn_model(X_train_reshaped, y_train, X_test_reshaped, y_test)
    lstm_model = train_lstm_model(X_train_reshaped, y_train, X_test_reshaped, y_test)
    # Detecting threats
    threats, predictions = detect_threats(cnn_model, lstm_model, X_test_reshaped)
    # Compliance check
    compliance_results = regulatory_compliance_check(threats, predictions)
    process_compliance_results(compliance_results)
    policies_file_path = "/content/drive/MyDrive/Policies.docx"
    initial_policies = load_policies(policies_file_path)
    print(f"Loaded Policies: {initial_policies}\n")
    compliance_results = [random.choice([True, False]) for _ in range(len(initial_policies))]
    print(f"Compliance Results: {compliance_results}")
    adjusted_policies = adjust_policies(initial_policies, compliance_results)
    print(f"Adjusted Policies: {adjusted_policies}\n")
    print_policies(initial_policies, "Initial Policies")
    print_policies(adjusted_policies, "Adjusted Policies")
    enforce_policies(adjusted_policies)
    # Sending alerts for non-compliance and based on policy adjustments
    for policy, action in adjusted_policies.items():
        if "Adjust Policy" in action:
            send_alert(f"Threats detected. {action}")
        else:
            print(f"{policy} is compliant and can continue processing data.")

# Executes the main function
if __name__ == "__main__":
    main()

VBox(children=(Button(button_style='success', description='Load Data', style=ButtonStyle()), Button(button_sty…

Loading data.....
Data is loaded and preprocessed successfully.
Train-test split done successfully
Number of points in this IoT applications dataset: 503909
Number of points in train: 151172
Number of points in test: 352737
<class 'pandas.core.frame.DataFrame'>
DatetimeIndex: 503909 entries, 2016-01-01 05:00:00 to 2016-12-16 03:28:00
Data columns (total 31 columns):
 #   Column               Non-Null Count   Dtype  
---  ------               --------------   -----  
 0   use [kW]             503909 non-null  float64
 1   gen [kW]             503909 non-null  float64
 2   House overall [kW]   503909 non-null  float64
 3   Dishwasher [kW]      503909 non-null  float64
 4   Furnace 1 [kW]       503909 non-null  float64
 5   Furnace 2 [kW]       503909 non-null  float64
 6   Home office [kW]     503909 non-null  float64
 7   Fridge [kW]          503909 non-null  float64
 8   Wine cellar [kW]     503909 non-null  float64
 9   Garage door [kW]     503909 non-null  float64
 10  Kitchen 12 [kW