In [4]:
!pip install openpyxl

Collecting openpyxl
  Using cached openpyxl-3.1.2-py2.py3-none-any.whl (249 kB)
Collecting et-xmlfile (from openpyxl)
  Using cached et_xmlfile-1.1.0-py3-none-any.whl (4.7 kB)
Installing collected packages: et-xmlfile, openpyxl
Successfully installed et-xmlfile-1.1.0 openpyxl-3.1.2


In [15]:
import os
import pandas as pd
import numpy as np
import networkx as nx
import tensorflow as tf
from tensorflow import keras
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error, mean_absolute_error
import openpyxl

from sklearn.metrics import r2_score


In [6]:
cluster_info = pd.read_excel('ClusterInformation.xlsx')

In [7]:
# Define your graph based on latitude and longitude
def create_graph_from_coordinates(cluster_info):
    G = nx.Graph()
    for idx, row in cluster_info.iterrows():
        node_name = row['Cluster']
        latitude = row['lat']
        longitude = row['lng']
        G.add_node(node_name, latitude=latitude, longitude=longitude)
    
    # Define edges based on geographical proximity or any other criteria
    # For example, you can connect nodes if they are within a certain distance.
    # Here, we connect nodes if they are within 0.1 degrees of latitude or longitude.
    for u, v, data in G.edges(data=True):
        u_lat, u_lng = G.nodes[u]['latitude'], G.nodes[u]['longitude']
        v_lat, v_lng = G.nodes[v]['latitude'], G.nodes[v]['longitude']
        if abs(u_lat - v_lat) <= 0.005 or abs(u_lng - v_lng) <= 0.005:
        #if abs(u_lat - v_lat) <= 0.5 or abs(u_lng - v_lng) <= 0.5:
            G[u][v]['weight'] = 1.0  # You can assign a weight to the edges

    return G

In [8]:
# Load predictor and target data
def load_data(predictor_folder, target_folder):
    predictor_data = {}
    target_data = {}
    for node_name in os.listdir(predictor_folder):
        predictor_file = os.path.join(predictor_folder, node_name)
        if os.path.isfile(predictor_file) and predictor_file.endswith('.xlsx'):
            node_df = pd.read_excel(predictor_file)
            predictor_data[node_name] = node_df
            
    for node_name in os.listdir(target_folder):
        target_file = os.path.join(target_folder, node_name)
        if os.path.isfile(target_file) and target_file.endswith('.xlsx'):
            node_df = pd.read_excel(target_file)
            target_data[node_name] = node_df
    
    return predictor_data, target_data


In [9]:
# Split data into training and test sets based on date ranges
def split_data(predictor_data, target_data, start_date, end_date):
    train_predictors = {}
    test_predictors = {}
    train_targets = {}
    test_targets = {}
    
    for node_name, predictor_df in predictor_data.items():
        target_df = target_data.get(node_name)
        if target_df is not None:
            mask_train = (predictor_df['Date'] >= start_date) & (predictor_df['Date'] < end_date)
            mask_test = (predictor_df['Date'] >= end_date)
            
            train_predictors[node_name] = predictor_df[mask_train]
            test_predictors[node_name] = predictor_df[mask_test]
            
            train_targets[node_name] = target_df[mask_train]
            test_targets[node_name] = target_df[mask_test]
    
    return train_predictors, test_predictors, train_targets, test_targets


In [10]:

# Build the adjacency matrix (graph) based on coordinates
G = create_graph_from_coordinates(cluster_info)

In [11]:
# Load data and split into training and test sets
predictor_folder = 'Predictors'
target_folder = 'Response'
start_date = '2010-01-01'
end_date = '2018-01-02'  # Adjust as needed

predictor_data, target_data = load_data(predictor_folder, target_folder)
train_predictors, test_predictors, train_targets, test_targets = split_data(
    predictor_data, target_data, start_date, end_date
)

# Preprocess your predictor and target data as needed


In [12]:

# Define and build your GNN model using TensorFlow and Keras
def build_gnn_model(input_shape, num_output_nodes):
    model = keras.Sequential([
        # Define your GNN layers here
        # Example: layers.GCNConv(32, activation='relu'),
        # Add more layers as needed

        
        ###keras.layers.GATConv(32, activation='relu')

        keras.layers.Dense(32, activation='relu'),  # Adjust parameters as needed
        #keras.layers.Dense(64, activation='relu'),  # Add more layers as needed
        keras.layers.Dense(32, activation='relu'), 
        #keras.layers.Dense(32, activation='relu'), 
        # Output layer
        keras.layers.Dense(num_output_nodes)  # Adjust the number of output nodes

        # Output layer
        #keras.layers.GraphConv(32, activation='relu')

        #keras.layers.Dense(num_output_nodes)  # Adjust the number of output nodes
    ])
    model.compile(optimizer='adam', loss='mean_squared_error')  # Adjust the loss function as needed
    return model


In [13]:

# Define GNN model hyperparameters
predictor_dim = 5  # Replace with the actual number of predictor features
input_shape = (None, predictor_dim)  # Adjust the input shape based on your data
num_output_nodes = 1  # Adjust for your regression task
input_shape = (None, predictor_dim)  # Adjust the input shape based on your data
num_output_nodes = 1  # Adjust for your regression task

In [14]:

# Build the GNN model
gnn_model = build_gnn_model(input_shape, num_output_nodes)


In [16]:

# Train the GNN model
for node_name, train_predictor_df in train_predictors.items():
    train_target_df = train_targets[node_name]
    train_predictors_array = train_predictor_df.drop('Date', axis=1).values
    train_targets_array = train_target_df['Flooding'].values
    
    # Train the model for each node
    gnn_model.fit(train_predictors_array, train_targets_array, epochs=50)  # Adjust epochs and batch size


Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 30/50
Epoch 31/50
Epoch 32/50
Epoch 33/50
Epoch 34/50
Epoch 35/50
Epoch 36/50
Epoch 37/50
Epoch 38/50
Epoch 39/50
Epoch 40/50
Epoch 41/50
Epoch 42/50
Epoch 43/50
Epoch 44/50
Epoch 45/50
Epoch 46/50
Epoch 47/50
Epoch 48/50
Epoch 49/50
Epoch 50/50
Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 30/50
Epoch 31/50
Epoch 32/50
Epoch 33/50
Epoch 34/50
Epoch 35/5

In [18]:

# Evaluate the GNN model on the test set
for node_name, test_predictor_df in test_predictors.items():
    test_target_df = test_targets[node_name]
    test_predictors_array = test_predictor_df.drop('Date', axis=1).values
    test_targets_array = test_target_df['Flooding'].values
    
    # Evaluate the model for each node
    mse = mean_squared_error(test_targets_array, gnn_model.predict(test_predictors_array))
    print(f"Node {node_name} - Mean Squared Error: {mse}")
    
for node_name, test_predictor_df in test_predictors.items():
    test_target_df = test_targets[node_name]
    test_predictors_array = test_predictor_df.drop('Date', axis=1).values
    test_targets_array = test_target_df['Flooding'].values
    
    # Evaluate the model for each node
    mse = mean_squared_error(test_targets_array, gnn_model.predict(test_predictors_array))
    r2 = r2_score(test_targets_array, gnn_model.predict(test_predictors_array))  # Calculate R2 score
    print(f"Node {node_name} - Mean Squared Error: {mse}, R2 Score: {r2}")
# You can also use the trained model to make predictions for new data

# Visualize results on a map (optional)
# Use latitude and longitude information from cluster_info and predicted values
# You can use libraries like matplotlib or folium for visualization


Node 3.xlsx - Mean Squared Error: 12.56759667438056
Node 1.xlsx - Mean Squared Error: 29.498054092771962
Node 4.xlsx - Mean Squared Error: 7.433908629491989
Node 2.xlsx - Mean Squared Error: 0.7094134106116973
Node 5.xlsx - Mean Squared Error: 2.5657786295372715
Node 0.xlsx - Mean Squared Error: 1.0449257417495608
Node 3.xlsx - Mean Squared Error: 12.56759667438056, R2 Score: 0.5045920693748721
Node 1.xlsx - Mean Squared Error: 29.498054092771962, R2 Score: 0.27574972760048955
Node 4.xlsx - Mean Squared Error: 7.433908629491989, R2 Score: 0.4317809539529043
Node 2.xlsx - Mean Squared Error: 0.7094134106116973, R2 Score: 0.3252544584679067
Node 5.xlsx - Mean Squared Error: 2.5657786295372715, R2 Score: 0.3787049560807155
Node 0.xlsx - Mean Squared Error: 1.0449257417495608, R2 Score: 0.34196111710314037
