In [None]:
import os
import pickle
import numpy as np 
import random
from geonamescache import GeonamesCache
import networkx as nx
from scipy.sparse import csr_matrix
import torch
from sklearn.model_selection import train_test_split
import numpy as np
from sklearn.preprocessing import StandardScaler
import pandas as pd
import scipy.stats as stats

#####################

# Define the directory paths for loading the data
load_directory = r""

# Load the network_matrices
network_matrices_load_path = os.path.join(load_directory, "network_matrices.pkl")
with open(network_matrices_load_path, 'rb') as f:
    network_matrices = pickle.load(f)

# Load the network_metrics
network_metrics_load_path = os.path.join(load_directory, "network_metrics.pkl")
with open(network_metrics_load_path, 'rb') as f:
    network_metrics = pickle.load(f)

##################

# Find and remove rows with NaN values in network_metrics
filtered_network_metrics = []
filtered_network_matrices = []

for i, metrics in enumerate(network_metrics):
    if not any(np.isnan(value) for value in metrics.values() if isinstance(value, (int, float))):
        filtered_network_metrics.append(metrics)
        filtered_network_matrices.append(network_matrices[i])

# Update network_metrics and network_matrices
network_metrics = filtered_network_metrics
network_matrices = filtered_network_matrices

# Print the updated number of elements
print("Number of elements in network_metrics:", len(network_metrics))
print("Number of elements in network_matrices:", len(network_matrices))

###################
def prepare_data(network_matrices, network_metrics):
    # Find the maximum dimensions of the adjacency matrices
    max_num_nodes = max(mat.shape[0] for mat in network_matrices)
    max_num_edges = max(mat.shape[1] for mat in network_matrices)

    # Prepare input data (network metrics)
    X = []
    for metrics in network_metrics:
        x = [
            metrics['Num Nodes'],
            metrics['Avg Degree'],
            metrics['Avg Betweenness Centrality'],
            metrics['Density'],
            metrics['Assortativity'],
            metrics['Avg Shortest Path Length'],
            metrics['Diameter'],
            metrics['Avg Closeness']
        ]
        X.append(x)
    X = np.array(X)
    

    # Prepare output data (adjacency matrices)
    y = []
    for mat in network_matrices:
        pad_rows = max_num_nodes - mat.shape[0]
        pad_cols = max_num_edges - mat.shape[1]
        if pad_rows > 0 or pad_cols > 0:
            padded_mat = np.pad(mat.toarray(), [(0, pad_rows), (0, pad_cols)], mode='constant', constant_values=0)
        else:
            padded_mat = mat.toarray()
        y.append(padded_mat)
    y = np.stack(y, axis=0)

    # Split the data into train and test sets
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

    return X_train, X_test, y_train, y_test

# Call the prepare_data function
X_train, X_test, y_train, y_test = prepare_data(network_matrices, network_metrics)

# Create a StandardScaler object
scaler = StandardScaler()

## Fit the scaler on the training data and transform both training and testing data
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# Check the shapes of the data arrays
print("X_train shape:", X_train.shape)
print("X_test shape:", X_test.shape)
print("y_train shape:", y_train.shape)
print("y_test shape:", y_test.shape)
print("Data preparation completed successfully!")

# Concatenate X_train_scaled and X_test_scaled
X_scaled = np.concatenate((X_train, X_test), axis=0)
X_scaled = pd.DataFrame(X_scaled)

print("CSV file saved successfully!")
X_scaled.to_csv(r'') 

#########################

X_scaled = X_scaled.values
# Calculate statistics for each feature in X_scaled
statistics = {
    'Feature': ['num_nodes', 'avg_degree', 'avg_betweenness_centrality', 'density', 'assortativity', 'average_shortest_path_length', 'diameter', 'avg_closeness'],
    'Mean': [],
    'Median': [],
    'Mode': [],
    'Standard Deviation': [],
    'Mean + Std': [],
    'Mean - Std': [],
    'Variance': []
}

for i in range(X_scaled.shape[1]):
    feature = X_scaled[:, i]
    feature_mean = np.mean(feature)
    feature_median = np.median(feature)
    feature_mode = stats.mode(feature)[0][0]
    feature_std = np.std(feature)
    feature_std_plus_mean = feature_mean + feature_std 
    feature_std_minus_mean = feature_mean - feature_std 
    feature_var = np.var(feature)

    statistics['Mean'].append(feature_mean)
    statistics['Median'].append(feature_median)
    statistics['Mode'].append(feature_mode)
    statistics['Standard Deviation'].append(feature_std)
    statistics['Mean + Std' ].append(feature_std_plus_mean)
    statistics['Mean - Std'].append(feature_std_minus_mean)
    statistics['Variance'].append(feature_var)

# Create a DataFrame from the statistics dictionary
df = pd.DataFrame(statistics)

# Print the DataFrame
print(df)

# Save the DataFrame as a CSV file
save_path = r''
df.to_csv(save_path, index=False)
print(f"Statistics saved as CSV: {save_path}")

######################

import torch.nn.init as init

# Convert the data to PyTorch tensors
X_train = torch.tensor(X_train, dtype=torch.float32)
X_test = torch.tensor(X_test, dtype=torch.float32)
y_train = torch.tensor(y_train, dtype=torch.float32)
y_test = torch.tensor(y_test, dtype=torch.float32)
import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
from sklearn.metrics import mean_squared_error

# Define the graph neural network model
class GraphNeuralNetwork(nn.Module):
    def __init__(self, input_size, hidden_size, output_size):
        super(GraphNeuralNetwork, self).__init__()
        self.fc1 = nn.Linear(input_size, hidden_size)
        self.relu = nn.ReLU()  # Add ReLU activation function
        self.fc2 = nn.Linear(hidden_size, output_size)
        ##### construct the structure of your model
        
        #Initialize the weights using Xavier initialization
        init.xavier_uniform_(self.fc1.weight)
        init.xavier_uniform_(self.fc2.weight)

    def forward(self, x):
        x = self.fc1(x)
        x = self.relu(x)  # Apply ReLU activation
        x = self.fc2(x)
        ##### construct the structure of your model
        return x
    

# Set the hyperparameters
input_size = X_train.shape[1]
hidden_size = 
output_size = y_train.shape[1] * y_train.shape[2]
learning_rate = 
num_epochs = 
batch_size = 

# Create an instance of the graph neural network
net = GraphNeuralNetwork(input_size, hidden_size, output_size)

# Define the loss function and optimizer
criterion = nn.MSELoss()
optimizer = optim.Adam(net.parameters(), lr=learning_rate)

####################

# Mini-batch training
for epoch in range(num_epochs):
    # Shuffle the training data
    indices = np.arange(X_train.shape[0])
    np.random.shuffle(indices)
    X_train_shuffled = X_train[indices]
    y_train_shuffled = y_train[indices]

    # Mini-batch training
    for i in range(0, X_train.shape[0], batch_size):
        # Extract mini-batch
        X_batch = X_train_shuffled[i:i+batch_size]
        y_batch = y_train_shuffled[i:i+batch_size]

        # Zero the gradients
        optimizer.zero_grad()

        # Forward pass
        outputs = net(X_batch)
        outputs = outputs.view(-1, y_batch.shape[1], y_batch.shape[2])
          # Reshape predicted output

        # Compute loss
        loss = criterion(outputs, y_batch)

        # Backward pass and optimization
        loss.backward()
        optimizer.step()
        
        # Display target and predicted output
        target_output = y_batch[0].detach().numpy()
        predicted_output = outputs[0].detach().numpy()
        #print(f"Target Output:\n{target_output}")
        #print(f"Predicted Output:\n{predicted_output}")
        #print("-" * 20)
        

    # Print the loss for every x epochs
    if (epoch + 1) % x == 0:
        print(f"Epoch: {epoch+1}/{num_epochs}, Loss: {loss.item()}")

# Save the trained model
model_save_path = r""
torch.save(net.state_dict(), model_save_path)

#################
# Evaluation
net.eval()
with torch.no_grad():
    # Forward pass on the test set
    outputs = net(X_test)
    # Reshape the predicted outputs
    outputs = outputs.view(outputs.shape[0], y_test.shape[1], y_test.shape[2])
    # Compute the test loss
    test_loss = criterion(outputs, y_test)

print(f"Test Loss: {test_loss.item()}")

# Print the predictions and ground truth for the first sample
print("Predictions:")
print(outputs[0])
print("Ground Truth:")
print(y_test[0])

##############

# Create an empty list to store the characteristics
characteristics_list = []

# Specify the directory to save the matrices
matrices_dir = r''

# Create the directory if it doesn't exist
os.makedirs(matrices_dir, exist_ok=True)

# Assuming you have the predict_adjacency_matrix function defined
def predict_adjacency_matrix(model, city_metrics):
    # Prepare the input data
    X_new = prepare_input_data(city_metrics)

    # Set the model to evaluation mode
    model.eval()

    # Predict the adjacency matrix using the model
    with torch.no_grad():
        predicted_adjacency_matrix = model(X_new)

    return predicted_adjacency_matrix

# Assuming you have the prepare_input_data function defined
def prepare_input_data(city_metrics):
    # Convert the city_metrics list to a dictionary with appropriate keys
    city_metrics_dict = {
        'Num Nodes': city_metrics[0],
        'Avg Degree': city_metrics[1],
        'Avg Betweenness Centrality': city_metrics[2],
        'Density': city_metrics[3],
        'Assortativity': city_metrics[4],
        'Avg Shortest Path Length': city_metrics[5],
        'Diameter': city_metrics[6],
        'Avg Closeness': city_metrics[7]
    }

    # Convert the relevant metrics to a numpy array
    X_new = torch.tensor([list(city_metrics_dict.values())], dtype=torch.float32)

    return X_new

# Assuming you have the net model defined
#net = net  # Replace 'YourModel()' with your actual model instantiation

# Iterate over all selected input values X_selected
for i in range(num_rows_to_select):
    # Get the city metrics for the current input value
    city_metrics = X_selected[i].tolist()
    new_city_num_nodes = y_train.shape[1]
    city_metrics[0] = new_city_num_nodes

    print(city_metrics)

        # Predict the adjacency matrix for the new city
    predicted_adjacency_matrix = predict_adjacency_matrix(net, city_metrics)
    predicted_adjacency_matrix = predicted_adjacency_matrix.reshape(new_city_num_nodes, new_city_num_nodes)

    # Normalize the predicted adjacency matrix between 0 and 1 using min-max normalization
    normalized_adjacency_matrix = (predicted_adjacency_matrix - torch.min(predicted_adjacency_matrix)) / (
            torch.max(predicted_adjacency_matrix) - torch.min(predicted_adjacency_matrix))

    # Select a cutoff point based on your desired criteria
    cutoff = x # Adjust this value as needed

    # Convert the normalized adjacency matrix to a binary adjacency matrix based on the cutoff
    binary_adjacency_matrix = torch.where(normalized_adjacency_matrix > cutoff, 1, 0)

    # Convert the binary adjacency matrix to a numpy array for further processing (if needed)
    binary_adjacency_matrix = binary_adjacency_matrix.numpy()

    # Check if the row has all zeros after modification
    if sum(binary_adjacency_matrix[i]) == 0:
        binary_adjacency_matrix = np.delete(binary_adjacency_matrix, i, axis=0)
        binary_adjacency_matrix = np.delete(binary_adjacency_matrix, i, axis=1)
        new_city_num_nodes -= 1
    else:
        i += 1

    # Check rows and columns with all zeros
    rows_to_remove = []
    for i in range(new_city_num_nodes):
        if all(value == 0 for value in binary_adjacency_matrix[i]) and all(
                value == 0 for value in binary_adjacency_matrix[:, i]):
            rows_to_remove.append(i)

    # Remove rows and corresponding columns
    binary_adjacency_matrix = np.delete(binary_adjacency_matrix, rows_to_remove, axis=0)
    binary_adjacency_matrix = np.delete(binary_adjacency_matrix, rows_to_remove, axis=1)
    
    # Update the value of new_city_num_nodes
    new_city_num_nodes = binary_adjacency_matrix.shape[0]

    # Convert the modified binary adjacency matrix to a NetworkX graph
    graph = nx.from_numpy_array(binary_adjacency_matrix, create_using=nx.Graph)

    # Calculate the network characteristics
    num_nodes = graph.number_of_nodes()
    avg_degree = sum(dict(graph.degree()).values()) / num_nodes
    betweenness_centrality = nx.betweenness_centrality(graph)
    avg_betweenness_centrality = sum(betweenness_centrality.values()) / num_nodes
    density = nx.density(graph)
    assortativity = nx.degree_assortativity_coefficient(graph)

    if nx.is_connected(graph):
    # If the graph is connected, calculate the metrics directly
        average_shortest_path_length = nx.average_shortest_path_length(graph)
        diameter = nx.diameter(graph)
        avg_closeness = sum(nx.closeness_centrality(graph).values()) / num_nodes
    else:
        # If the graph is disconnected, find the largest connected component
        largest_component = max(nx.connected_components(graph), key=len)
        subgraph = graph.subgraph(largest_component)

        # Calculate the metrics for the largest connected component
        average_shortest_path_length = nx.average_shortest_path_length(subgraph)
        diameter = nx.diameter(subgraph)
        avg_closeness = sum(nx.closeness_centrality(subgraph).values()) / len(largest_component)

    # Add the characteristics to the list
    characteristics_list.append({'Variation': variation, 'Number of nodes': num_nodes,
                                 'Average degree': avg_degree,
                                 'Average betweenness centrality': avg_betweenness_centrality,
                                 'Density': density, 'Assortativity': assortativity,
                                 'Average shortest path length': average_shortest_path_length,
                                 'Diameter': diameter, 'Average closeness': avg_closeness})

    # Save the predicted adjacency matrix as a CSV file
    predicted_matrix_file_path = os.path.join(matrices_dir, f'predicted_{variation}.csv')
    np.savetxt(predicted_matrix_file_path, predicted_adjacency_matrix, delimiter=',')
    print(f"Predicted adjacency matrix saved: {predicted_matrix_file_path}")

    # Save the binary adjacency matrix as a CSV file
    binary_matrix_file_path = os.path.join(matrices_dir, f'binary_{variation}.csv')
    np.savetxt(binary_matrix_file_path, binary_adjacency_matrix, delimiter=',')
    print(f"Binary adjacency matrix saved: {binary_matrix_file_path}")

# Create the DataFrame from the characteristics list
characteristics_df = pd.DataFrame(characteristics_list)

# Save the DataFrame as a CSV file
characteristics_file_path = os.path.join(matrices_dir, 'characteristics.csv')
characteristics_df.to_csv(characteristics_file_path, index=False)
print(f"Characteristics saved as CSV: {characteristics_file_path}")

