In [None]:
import pandas as pd
import numpy as np
from xgboost import XGBRegressor
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score
from sklearn.model_selection import train_test_split
from prefixspan import PrefixSpan
import pennylane as qml
import shap
import matplotlib.pyplot as plt
import os
import seaborn as sns

# Dataset Path
dataset_path = "your_path"
result_path = "your_path"

# Load Dataset
data = pd.read_excel(dataset_path)

# Preprocessing: Convert date to datetime and sort by node/date
data['date'] = pd.to_datetime(data['date'])
data.sort_values(['node', 'date'], inplace=True)

# Feature and Target Columns
features = ['CO', 'O3', 'NO2', 'SO2', 'PM10', 'PM2.5', 'Highest tempreture: 12pm', 'Wind:km/h']
target = 'AQI'

# Split Data by Nodes (Federated Nodes)
nodes = sorted(data['node'].unique(), key=lambda x: int(x.split(' ')[-1]))  # Sort nodes numerically
federated_data = {node: data[data['node'] == node] for node in nodes}

# Quantum Optimization for Hyperparameters
def quantum_hyperparameter_tuning():
    dev = qml.device("default.qubit", wires=2)
    
    @qml.qnode(dev)
    def quantum_circuit(params):
        qml.RX(params[0], wires=0)
        qml.RY(params[1], wires=1)
        qml.CNOT(wires=[0, 1])
        return qml.expval(qml.PauliZ(0))
    
    def cost(params):
        return quantum_circuit(params)
    
    from scipy.optimize import minimize
    init_params = np.random.uniform(0, np.pi, 2)
    res = minimize(cost, init_params, method="BFGS")
    return res.x  # Optimal parameters

# Adaptive Federated Averaging
def adaptive_federated_averaging(global_model, local_model, X_global, y_global, local_weight):
    """
    Adaptive aggregation: Assign higher weights to nodes with better performance.
    """
    # Generate predictions for the global dataset
    global_preds = global_model.predict(X_global)
    local_preds = local_model.predict(X_global)
    
    # Weighted average predictions
    aggregated_preds = ((1 - local_weight) * global_preds + local_weight * local_preds)
    
    # Retrain the global model on the aggregated predictions
    global_model.fit(X_global, aggregated_preds)
    return global_model

# Initialize Global Model and Results
global_model = None
evaluation_results = []

# Accuracy Calculation
def calculate_accuracy(y_true, y_pred):
    """
    Calculates accuracy as the percentage of predictions within a certain threshold of the true value.
    """
    threshold = 0.1 * np.mean(y_true)  # 10% of the mean value as a threshold
    within_threshold = np.abs(y_true - y_pred) <= threshold
    return np.sum(within_threshold) / len(y_true)

# Split data into a global validation set for federated averaging
X_global = data[features]
y_global = data[target]

for node, node_data in federated_data.items():
    print(f"Training on node: {node}")
    
    # Train-Test Split
    X = node_data[features]
    y = node_data[target]
    
    # Check and handle missing values in the dataset
    if X.isnull().values.any() or y.isnull().values.any():
        print(f"Missing values detected in node {node}. Filling missing values with the mean.")
        X = X.fillna(X.mean())
        y = y.fillna(y.mean())
    
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
    
    # Hyperparameter Tuning via Quantum Optimization
    quantum_params = quantum_hyperparameter_tuning()
    learning_rate = quantum_params[0] * 0.1
    max_depth = int(quantum_params[1] * 10)
    
    # Train Local XGBoost Model
    model = XGBRegressor(learning_rate=learning_rate, max_depth=max_depth, random_state=42)
    model.fit(X_train, y_train)
    
    # Evaluate
    y_pred = model.predict(X_test)
    
    # Check if y_pred contains NaN values
    if np.isnan(y_pred).any():
        print(f"NaN values detected in predictions for node {node}. Skipping evaluation for this node.")
        continue  # Skip this node if predictions are invalid
    
    # Calculate metrics
    mse = mean_squared_error(y_test, y_pred)
    mae = mean_absolute_error(y_test, y_pred)
    r2 = r2_score(y_test, y_pred)
    accuracy = calculate_accuracy(y_test, y_pred)
    evaluation_results.append([node, mse, mae, r2, accuracy])
    
    # Adaptive Federated Averaging
    local_weight = 1 / (1 + mse)  # Nodes with lower MSE get higher weights
    if global_model is None:
        global_model = model
    else:
        global_model = adaptive_federated_averaging(global_model, model, X_global, y_global, local_weight)

# Anomaly Detection using PrefixSpan
def detect_anomalies(data):
    sequences = [list(row[features]) for _, row in data.iterrows()]
    ps = PrefixSpan(sequences)
    ps.minlen = 2
    ps.maxlen = 5
    return ps.topk(10, closed=True)

# Collect anomaly results
anomaly_results = []
for node, node_data in federated_data.items():
    patterns = detect_anomalies(node_data)
    for pattern in patterns:
        anomaly_results.append({'Node': node, 'Pattern': pattern[1], 'Frequency': pattern[0]})

# Save Anomaly Detection Results
anomaly_df = pd.DataFrame(anomaly_results)
anomaly_df.to_excel(os.path.join(result_path, "anomaly_results.xlsx"), index=False)

# Save Evaluation Results
result_df = pd.DataFrame(evaluation_results, columns=['Node', 'MSE', 'MAE', 'R2', 'Accuracy'])
result_df.to_excel(os.path.join(result_path, "evaluation_results.xlsx"), index=False)

# SHAP Explainability
explainer = shap.Explainer(global_model, X_global)
shap_values = explainer(X_global)

# Save SHAP Summary Plot
plt.figure(figsize=(10, 6))
shap.summary_plot(shap_values, X_global, show=False)
plt.savefig(os.path.join(result_path, "SHAP_Summary_Plot.png"))
plt.close()

# Visualizations
# Top 5 Nodes with Highest AQI
top_nodes = data.groupby('node')['AQI'].mean().sort_values(ascending=False).head(5).index
plt.figure(figsize=(8, 4))
for node in top_nodes:
    node_data = federated_data[node]
    plt.plot(node_data['date'], node_data['AQI'], label=f"Node {node}")
plt.title("Top 5 Nodes with Highest AQI")
plt.xlabel("Date")
plt.ylabel("AQI")
plt.legend()
plt.savefig(os.path.join(result_path, "Top_5_Nodes_AQI.png"))
plt.close()

print("All results, SHAP analysis, and visualizations have been saved successfully.")


Training on node: District 1
Training on node: District 2
Training on node: District 3
Training on node: District 4
Training on node: District 5
Training on node: District 6
Training on node: District 7
Training on node: District 8
Training on node: District 9
Training on node: District 10
Training on node: District 11
Training on node: District 12
Training on node: District 13
Training on node: District 14
Training on node: District 15
Training on node: District 16
Training on node: District 17
Training on node: District 18
Training on node: District 19
Training on node: District 20
Training on node: District 21
Training on node: District 22




All results, SHAP analysis, and visualizations have been saved successfully.
