In [None]:
import cv2
import os
import glob
import time
import pandas as pd
import numpy as np

from skimage.data import astronaut
from skimage.segmentation import slic
from scipy.spatial import Delaunay
from skimage.segmentation import mark_boundaries
from matplotlib.lines import Line2D
import matplotlib.pyplot as plt

from skimage.color import label2rgb
from skimage.transform import resize
from scipy.spatial.distance import cdist

from PIL import Image

import tensorflow as tf
from tensorflow.keras.models import Model
from tensorflow.keras import layers, models

from datetime import datetime

#define globals required through out the whole program
edges           = [] #containing all edge tuple
attrs           = [] #countaining list of attribute of all nodes
graph_id        = 1 #id of latest graph
node_id         = 0 #id of latest node
graph_indicator = [] #containing graph-id for each node
node_labels     = [] #containing labels for all node
graph_labels    = []#containing labels for all graph
edge_labels = []
edge_attrs = []
node_edge_attrs = []

# Selecting dataset and k
dataset_name = 'BUSI-with-GT'  # 'BUSI-with-GT' or  'BUSBRA' 
k = 6  # '2' '4' '6' '8'

mask_dir = 'Dataset/'+ dataset_name + '/original/Masks/'
patch_dir  = 'Dataset/'+ dataset_name + '/data-edge/k' + str(k) + '/train/patch-image/'

#working directories

benign_dir  = 'Dataset/'+ dataset_name + '/original/train/benign'
malignant_dir  = 'Dataset/'+ dataset_name + '/original/train/malignant'
normal_dir  = 'Dataset/'+ dataset_name + '/original/train/normal'

#Output directories
sourcepath='Dataset/'+ dataset_name + '/data-edge/k' + str(k) + '/train/raw'
if not os.path.exists(sourcepath):
    os.makedirs(sourcepath, exist_ok=False)


node_file_name = []
image_file_name = []

#activity-label vs activity-name mapping (4-class)
activity_map    = {}
activity_map[1] = 'benign'
activity_map[2] = 'malignant'
activity_map[3] = 'normal'


#
def normalize_rows(arr):
    min_values = arr.min(axis=1)
    max_values = arr.max(axis=1)
    normalized_arr = (arr - min_values[:, np.newaxis]) / (max_values - min_values)[:, np.newaxis]
    return normalized_arr

def find_k_min_indices_desc(array, k):
    if k > len(array):
        raise ValueError("K is larger than the size of the array.")
    sorted_indices = np.argsort(array)
    k_min_indices = sorted_indices[:k]
    return k_min_indices


def resize_superpixel(superpixel, target_size):
    # Calculate the resize ratio
    

    # Resize the superpixel using bilinear interpolation
    resized_superpixel = resize(superpixel, (target_size, target_size), 
                                mode='reflect', anti_aliasing=True)

    return resized_superpixel

def extract_features(model, img_array):
    # img = image.load_img(image_path, target_size=(image_shape[0], image_shape[1]))
    # img_array = image.img_to_array(img)
    img_array = np.expand_dims(img_array, axis=0)

    features = model.predict(img_array, verbose=0)
    return features.flatten()

#z-score normalization
def normalize(arr):
    arr = np.array(arr)
    m   = np.mean(arr)
    s   = np.std(arr)
    return np.round((arr - m)/s, 3)

def is_coordinate_in_white_region(image, x, y):

    # Check each coordinate
    
    # Check if the coordinate is within the image bounds
        # Check if the pixel is white (assuming white is represented as 255)

    if image[int(x), int(y)] == 255:
        
        mask_check = True
    else: 

        mask_check = False

    return mask_check

def replace_prefix(filename, new_prefix):
    # Split the filename into prefix and the rest
    parts = filename.split('_', 1)
    
    # If the filename doesn't have a prefix, return it as is
    if len(parts) < 2:
        return filename
    
    # Return the new filename with the replaced prefix
    return new_prefix + '_' + parts[1]

def get_filename(directory):
    # Split the directory string by '/'
    parts = directory.split('/')
    
    # The filename is the last part of the directory
    filename = parts[-1]
    
    # Return the filename
    return filename

#generate graph for a given edge-image file
def generate_graphs(filename, node_label, activity_map):
    print(" ... Reading image: " + filename+" ...")
    global node_id, edges, attrs, graph_id, node_labels, graph_indicator, edge_labels, edge_attrs, node_edge_attrs, mask_dir, node_file_name, image_file_name, k
    cnt           = 0
    img           = cv2.imread(filename, cv2.IMREAD_GRAYSCALE)
    img = cv2.cvtColor(img, cv2.COLOR_GRAY2BGR)
    dim1, dim2, _ = img.shape

    saved_filename = get_filename(filename)

    image_mask_name = get_filename(filename)
    image_mask_name = replace_prefix(image_mask_name, 'mask')
    image_mask_name = mask_dir + image_mask_name
    image_mask = cv2.imread(image_mask_name, cv2.IMREAD_GRAYSCALE)

    attrs1        = []
    edge = 0
    # Assuming you have an image loaded as 'image'
    image = img
    # Step 1: Apply SLIC to get initial superpixels
    segments_slic = slic(image, n_segments=150, compactness=50)
    # Convert segments back to an image
    segmented_image = label2rgb(segments_slic, image, kind='avg')

    segments_ids = np.unique(segments_slic)

    # centers
    centers = np.array([np.mean(np.nonzero(segments_slic==i),axis=1) for i in segments_ids])

    # Calculate the average intensity for each superpixel
    unique_segments = np.unique(segments_slic)
    average_intensities = []

    print("Image type: " + activity_map[node_label] + "\nPixel matrix is of: " + str(dim1) + "x" + str(dim2))

    for seg_id in unique_segments:
        mask = segments_slic == seg_id
        average_intensity = np.mean(image[mask], axis=0)
        average_intensity = average_intensity.astype(int)
        average_intensities.append(average_intensity)

    average_intensities = np.array(average_intensities)

    # Compute distances between each pair of superpixels based on Average intensity superpixel
    distances_intensity = normalize_rows(cdist(average_intensities, average_intensities, metric='euclidean'))

    # Compute distances between each pair of superpixels based on location of Center
    distances_pos = normalize_rows(cdist(centers, centers, metric='euclidean'))

    # Combine distance
    distances = (distances_pos + distances_intensity)/2


    for i in unique_segments:       #Scan all the superpixel in image

        edges_image = find_k_min_indices_desc(distances[i-1], k+1)
        edges_image = edges_image + 1
        edges_image = edges_image + node_id

        for j in edges_image:        # Forming edges
            if(j != (i+node_id)):
                edges.append([i+node_id, j])
                edge += 1
    
    # model = building_model()

    for (i, segment_val) in enumerate(np.unique(segments_slic)):
        

        # Mask for the current superpixel
        mask = segments_slic == segment_val
        
        # Find the bounding box of the superpixel
        positions = np.where(mask)
        top, left = np.min(positions, axis=1)
        bottom, right = np.max(positions, axis=1)
        
        # Extract the superpixel image
        superpixel_image = image[top:bottom+1, left:right+1]
        mask1 = mask[top:bottom+1, left:right+1]
        
        # Calculate the size and see if it needs to be increased
        current_size = superpixel_image.shape[:2]


        resized_superpixel = resize_superpixel(superpixel_image, 32)

        PIL_img = Image.fromarray((resized_superpixel * 255).astype(np.uint8))


        # Apply the transformations to the image
        feature_vector = 0
        
        # feature_vector = extract_features(model, resized_superpixel)
        # feature_vector = feature_vector.reshape(1, -1)
        # feature_vector = feature_vector.tolist()
        # feature_vector = feature_vector[0]

        # attrs1.append(feature_vector)

        # Assign node label
        x, y = centers[i]
        mask_check2 = is_coordinate_in_white_region(image_mask, x, y)
        if mask_check2 == True:
            node_labels.append([node_label, activity_map[node_label]])
            patch_file_name = patch_dir + activity_map[node_label] + '/' + str(node_id+1) + '.png'
            print( activity_map[node_label])
            PIL_img.save(patch_file_name)
            node_file_name.append([node_id+1])

            image_file_name.append([saved_filename])
        else:
            patch_file_name = patch_dir +  'normal/' + str(node_id+1) + '.png'
            PIL_img.save(patch_file_name)
            node_labels.append([3, 'normal'])
            node_file_name.append([node_id+1])

            image_file_name.append([saved_filename])
            

        # Assign node id 
        node_id += 1
        cnt     += 1
        graph_indicator.append(graph_id)


    # attrs1=normalize(attrs1)

 

    print("For given image nodes formed: " + str(cnt)+" edges formed: " + str(edge))
    # if(cnt != 0): 
    #     graph_id += 1

#generate graphs for all edge-image under given dir along with proper label
def generate_graph_with_labels(dirname, label, activity_map):
    print("\n... Reading Directory: " + dirname + " ...\n")
    global graph_labels
    filenames = glob.glob(dirname + '/*.png')
    for filename in filenames:
        generate_graphs(filename, label, activity_map)
        graph_labels.append([label, activity_map[label]])

#generate graphs for all directories
def process_graphs(
                   benign_dir,
                   malignant_dir,
                   normal_dir,
                   activity_map):
    global node_labels, graph_labels
    generate_graph_with_labels(benign_dir,  1, activity_map)
    generate_graph_with_labels(malignant_dir,  2, activity_map)
    generate_graph_with_labels(normal_dir,  3, activity_map)


    print("Processing done")
    print("Total nodes formed: " + str(len(node_labels)) + "Total graphs formed: " + str(len(graph_labels)))




start = time.time()

#generate_graph_with_labels(BIRAD_0_dir, 1, activity_map)
process_graphs( 
               benign_dir, 
               malignant_dir, 
               normal_dir, 
               activity_map)

#check all the lengths of globals
#comment if not necessary
print(len(node_labels))
print(len(graph_labels))
print(len(edges))
print(len(attrs))
print("Calculating complete, Start Saving process =================")


#create adjacency dataframe
df_A = pd.DataFrame(columns = ["node-1", "node-2"], data = np.array(edges))
print("Shape of edge dataframe: " + str(df_A.shape))
print("\n--summary of dataframe--\n", df_A.head(50))

#create node label dataframe
df_node_label = pd.DataFrame(data = np.array(node_labels), columns=["label", "activity-name"])
print("shape of node-label dataframe: " + str(df_node_label.shape))
print("\n--summary of dataframe--\n", df_node_label)

#create graph label dataframe
df_graph_label = pd.DataFrame(data = np.array(graph_labels), columns = ["label","activity-name"])
print("shape of node-label dataframe: " + str(df_graph_label.shape))
print("\n--summary of dataframe--\n", df_graph_label.head(50))

#create node-attribute dataframe (normalized grayscale value)
# df_node_attr = pd.DataFrame(data = np.array(attrs))
# print("shape of node-attribute dataframe: " + str(df_node_attr.shape))
# print("\n--summary of dataframe--\n", df_node_attr.head(50))

#create graph-indicator datframe
df_graph_indicator = pd.DataFrame(data = np.array(graph_indicator), columns=["graph-id"])
print("shape of graph-indicator dataframe: " + str(df_graph_indicator.shape))
print("\n--summary of dataframe--\n", df_graph_indicator.head(50))

#create node file name
df_node_file_name = pd.DataFrame(data = np.array(node_file_name))
print("shape of node-label dataframe: " + str(df_node_file_name.shape))
print("\n--summary of dataframe--\n", node_file_name)

df_image_file_name = pd.DataFrame(data = np.array(image_file_name))
print("shape of node-label dataframe: " + str(df_image_file_name.shape))
print("\n--summary of dataframe--\n", image_file_name)





#omit activity name later for graph-label and node-label
#since GIN model will only accept the label
df_node_label = df_node_label.drop(["activity-name"], axis=1)
print(df_node_label.head(50))

df_graph_label = df_graph_label.drop(["activity-name"], axis=1)
print(df_graph_label.head(50))



def save_dataframe_to_txt(df, filepath):
    df.to_csv(filepath, header=None, index=None, sep=',', mode='w')




#save all the dataframes to .txt file
#path name: .../GraphTrain/dataset/<dataset_name>/raw/<dataset_name>_<type>.txt
# <type>:
# A--> adjancency matrix
#graph_indicator--> graph-ids of all node
#graph_labels--> labels for all graph
#node_attributes--> attribute(s) for all node
#node_labels--> labels for all node

# sourcepath='Dataset/BUSI-with-GT/data-edge/train/raw'
# os.makedirs(sourcepath, exist_ok=False)
print("The new directory is created!")
save_dataframe_to_txt(df_A, sourcepath + '/train_A.txt')
save_dataframe_to_txt(df_graph_indicator, sourcepath + '/train_graph_indicator.txt')
save_dataframe_to_txt(df_graph_label, sourcepath + '/train_graph_labels.txt')
# save_dataframe_to_txt(df_node_attr, sourcepath + '/train_node_attributes.txt')
save_dataframe_to_txt(df_node_label, sourcepath + '/train_node_labels.txt')
save_dataframe_to_txt(df_node_file_name, sourcepath + '/train_node_file_name.txt')
save_dataframe_to_txt(df_image_file_name, sourcepath + '/train_image_file_name.txt')


end = time.time()
time_to_construct = (end - start)/60
print("Total time (min) for constructing Graph: ", time_to_construct)
print("=======End constructing Graph process here======")


# datetime object containing current date and time
now = datetime.now()
 
print("now =", now)

# dd/mm/YY H:M:S
dt_string = now.strftime("%d/%m/%Y %H:%M:%S")
print("date and time =", dt_string)

## For Test Folder

In [None]:
import cv2
import os
import glob
import time
import pandas as pd
import numpy as np

from skimage.data import astronaut
from skimage.segmentation import slic
from scipy.spatial import Delaunay
from skimage.segmentation import mark_boundaries
from matplotlib.lines import Line2D
import matplotlib.pyplot as plt

from skimage.color import label2rgb
from skimage.transform import resize
from scipy.spatial.distance import cdist

from PIL import Image

import tensorflow as tf
from tensorflow.keras.models import Model
from tensorflow.keras import layers, models

from datetime import datetime

#define globals required through out the whole program
edges           = [] #containing all edge tuple
attrs           = [] #countaining list of attribute of all nodes
graph_id        = 1 #id of latest graph
node_id         = 0 #id of latest node
graph_indicator = [] #containing graph-id for each node
node_labels     = [] #containing labels for all node
graph_labels    = []#containing labels for all graph
edge_labels = []
edge_attrs = []
node_edge_attrs = []

# Selecting dataset and k
dataset_name = 'BUSI-with-GT'  # 'BUSI-with-GT' or  'BUSBRA' 
k = 6  # '2' '4' '6' '8'

mask_dir = 'Dataset/'+ dataset_name + '/original/Masks/'

patch_dir  = 'Dataset/'+ dataset_name + '/data-edge/k' + str(k) + '/test/patch-image/'

#working directories

benign_dir  = 'Dataset/'+ dataset_name + '/original/test/benign'
malignant_dir  = 'Dataset/'+ dataset_name + '/original/test/malignant'
normal_dir  = 'Dataset/'+ dataset_name + '/original/test/normal'

#Output directories
sourcepath='Dataset/'+ dataset_name + '/data-edge/k' + str(k) + '/test/raw'
if not os.path.exists(sourcepath):
    os.makedirs(sourcepath, exist_ok=False)


node_file_name = []
image_file_name = []

#activity-label vs activity-name mapping (4-class)
activity_map    = {}
activity_map[1] = 'benign'
activity_map[2] = 'malignant'
activity_map[3] = 'normal'


#
def normalize_rows(arr):
    min_values = arr.min(axis=1)
    max_values = arr.max(axis=1)
    normalized_arr = (arr - min_values[:, np.newaxis]) / (max_values - min_values)[:, np.newaxis]
    return normalized_arr

def find_k_min_indices_desc(array, k):
    if k > len(array):
        raise ValueError("K is larger than the size of the array.")
    sorted_indices = np.argsort(array)
    k_min_indices = sorted_indices[:k]
    return k_min_indices


def resize_superpixel(superpixel, target_size):
    # Calculate the resize ratio
    

    # Resize the superpixel using bilinear interpolation
    resized_superpixel = resize(superpixel, (target_size, target_size), 
                                mode='reflect', anti_aliasing=True)

    return resized_superpixel

def extract_features(model, img_array):
    # img = image.load_img(image_path, target_size=(image_shape[0], image_shape[1]))
    # img_array = image.img_to_array(img)
    img_array = np.expand_dims(img_array, axis=0)

    features = model.predict(img_array, verbose=0)
    return features.flatten()

#z-score normalization
def normalize(arr):
    arr = np.array(arr)
    m   = np.mean(arr)
    s   = np.std(arr)
    return np.round((arr - m)/s, 3)

def is_coordinate_in_white_region(image, x, y):

    # Check each coordinate
    
    # Check if the coordinate is within the image bounds
        # Check if the pixel is white (assuming white is represented as 255)
    if image[int(x), int(y)] == 255:
        
        mask_check = True
    else: 

        mask_check = False

    return mask_check

def replace_prefix(filename, new_prefix):
    # Split the filename into prefix and the rest
    parts = filename.split('_', 1)
    
    # If the filename doesn't have a prefix, return it as is
    if len(parts) < 2:
        return filename
    
    # Return the new filename with the replaced prefix
    return new_prefix + '_' + parts[1]

def get_filename(directory):
    # Split the directory string by '/'
    parts = directory.split('/')
    
    # The filename is the last part of the directory
    filename = parts[-1]
    
    # Return the filename
    return filename

#generate graph for a given edge-image file
def generate_graphs(filename, node_label, activity_map):
    print(" ... Reading image: " + filename+" ...")
    global node_id, edges, attrs, graph_id, node_labels, graph_indicator, edge_labels, edge_attrs, node_edge_attrs, mask_dir, node_file_name, image_file_name, k
    cnt           = 0
    img           = cv2.imread(filename, cv2.IMREAD_GRAYSCALE)
    img = cv2.cvtColor(img, cv2.COLOR_GRAY2BGR)
    dim1, dim2, _ = img.shape

    saved_filename = get_filename(filename)

    image_mask_name = get_filename(filename)
    image_mask_name = replace_prefix(image_mask_name, 'mask')
    image_mask_name = mask_dir + image_mask_name
    image_mask = cv2.imread(image_mask_name, cv2.IMREAD_GRAYSCALE)

    attrs1        = []
    edge = 0
    # Assuming you have an image loaded as 'image'
    image = img
    # Step 1: Apply SLIC to get initial superpixels
    segments_slic = slic(image, n_segments=150, compactness=50)
    # Convert segments back to an image
    segmented_image = label2rgb(segments_slic, image, kind='avg')

    segments_ids = np.unique(segments_slic)

    # centers
    centers = np.array([np.mean(np.nonzero(segments_slic==i),axis=1) for i in segments_ids])

    # Calculate the average intensity for each superpixel
    unique_segments = np.unique(segments_slic)
    average_intensities = []

    print("Image type: " + activity_map[node_label] + "\nPixel matrix is of: " + str(dim1) + "x" + str(dim2))

    for seg_id in unique_segments:
        mask = segments_slic == seg_id
        average_intensity = np.mean(image[mask], axis=0)
        average_intensity = average_intensity.astype(int)
        average_intensities.append(average_intensity)

    average_intensities = np.array(average_intensities)

    # Compute distances between each pair of superpixels based on Average intensity superpixel
    distances_intensity = normalize_rows(cdist(average_intensities, average_intensities, metric='euclidean'))

    # Compute distances between each pair of superpixels based on location of Center
    distances_pos = normalize_rows(cdist(centers, centers, metric='euclidean'))

    # Combine distance
    distances = (distances_pos + distances_intensity)/2

    for i in unique_segments:       #Scan all the superpixel in image

        edges_image = find_k_min_indices_desc(distances[i-1], k+1)
        edges_image = edges_image + 1
        edges_image = edges_image + node_id

        for j in edges_image:        # Forming edges
            if(j != (i+node_id)):
                edges.append([i+node_id, j])
                edge += 1
    
    # model = building_model()

    for (i, segment_val) in enumerate(np.unique(segments_slic)):
        

        # Mask for the current superpixel
        mask = segments_slic == segment_val
        
        # Find the bounding box of the superpixel
        positions = np.where(mask)
        top, left = np.min(positions, axis=1)
        bottom, right = np.max(positions, axis=1)
        
        # Extract the superpixel image
        superpixel_image = image[top:bottom+1, left:right+1]
        mask1 = mask[top:bottom+1, left:right+1]
        
        # Calculate the size and see if it needs to be increased
        current_size = superpixel_image.shape[:2]


        resized_superpixel = resize_superpixel(superpixel_image, 32)

        PIL_img = Image.fromarray((resized_superpixel * 255).astype(np.uint8))


        # Apply the transformations to the image
        feature_vector = 0
        
        # feature_vector = extract_features(model, resized_superpixel)
        # feature_vector = feature_vector.reshape(1, -1)
        # feature_vector = feature_vector.tolist()
        # feature_vector = feature_vector[0]

        # attrs1.append(feature_vector)

        # Assign node label
        x, y = centers[i]
        mask_check2 = is_coordinate_in_white_region(image_mask, x, y)
        if mask_check2 == True:
            node_labels.append([node_label, activity_map[node_label]])
            patch_file_name = patch_dir + activity_map[node_label] + '/' + str(node_id+1) + '.png'
            print( activity_map[node_label])
            PIL_img.save(patch_file_name)
            node_file_name.append([node_id+1])

            image_file_name.append([saved_filename])
        else:
            patch_file_name = patch_dir +  'normal/' + str(node_id+1) + '.png'
            PIL_img.save(patch_file_name)
            node_labels.append([3, 'normal'])
            node_file_name.append([node_id+1])

            image_file_name.append([saved_filename])
            

        # Assign node id 
        node_id += 1
        cnt     += 1
        graph_indicator.append(graph_id)


    # attrs1=normalize(attrs1)

 

    print("For given image nodes formed: " + str(cnt)+" edges formed: " + str(edge))
    # if(cnt != 0): 
    #     graph_id += 1

#generate graphs for all edge-image under given dir along with proper label
def generate_graph_with_labels(dirname, label, activity_map):
    print("\n... Reading Directory: " + dirname + " ...\n")
    global graph_labels
    filenames = glob.glob(dirname + '/*.png')
    for filename in filenames:
        generate_graphs(filename, label, activity_map)
        graph_labels.append([label, activity_map[label]])

#generate graphs for all directories
def process_graphs(
                   benign_dir,
                   malignant_dir,
                   normal_dir,
                   activity_map):
    global node_labels, graph_labels
    generate_graph_with_labels(benign_dir,  1, activity_map)
    generate_graph_with_labels(malignant_dir,  2, activity_map)
    generate_graph_with_labels(normal_dir,  3, activity_map)


    print("Processing done")
    print("Total nodes formed: " + str(len(node_labels)) + "Total graphs formed: " + str(len(graph_labels)))



start = time.time()

#generate_graph_with_labels(BIRAD_0_dir, 1, activity_map)
process_graphs( 
               benign_dir, 
               malignant_dir, 
               normal_dir, 
               activity_map)

#check all the lengths of globals
#comment if not necessary
print(len(node_labels))
print(len(graph_labels))
print(len(edges))
print(len(attrs))
print("Calculating complete, Start Saving process =================")


#create adjacency dataframe
df_A = pd.DataFrame(columns = ["node-1", "node-2"], data = np.array(edges))
print("Shape of edge dataframe: " + str(df_A.shape))
print("\n--summary of dataframe--\n", df_A.head(50))

#create node label dataframe
df_node_label = pd.DataFrame(data = np.array(node_labels), columns=["label", "activity-name"])
print("shape of node-label dataframe: " + str(df_node_label.shape))
print("\n--summary of dataframe--\n", df_node_label)

#create graph label dataframe
df_graph_label = pd.DataFrame(data = np.array(graph_labels), columns = ["label","activity-name"])
print("shape of node-label dataframe: " + str(df_graph_label.shape))
print("\n--summary of dataframe--\n", df_graph_label.head(50))

#create node-attribute dataframe (normalized grayscale value)
# df_node_attr = pd.DataFrame(data = np.array(attrs))
# print("shape of node-attribute dataframe: " + str(df_node_attr.shape))
# print("\n--summary of dataframe--\n", df_node_attr.head(50))

#create graph-indicator datframe
df_graph_indicator = pd.DataFrame(data = np.array(graph_indicator), columns=["graph-id"])
print("shape of graph-indicator dataframe: " + str(df_graph_indicator.shape))
print("\n--summary of dataframe--\n", df_graph_indicator.head(50))

#create node file name
df_node_file_name = pd.DataFrame(data = np.array(node_file_name))
print("shape of node-label dataframe: " + str(df_node_file_name.shape))
print("\n--summary of dataframe--\n", node_file_name)

df_image_file_name = pd.DataFrame(data = np.array(image_file_name))
print("shape of node-label dataframe: " + str(df_image_file_name.shape))
print("\n--summary of dataframe--\n", image_file_name)


#omit activity name later for graph-label and node-label
#since GIN model will only accept the label
df_node_label = df_node_label.drop(["activity-name"], axis=1)
print(df_node_label.head(50))

df_graph_label = df_graph_label.drop(["activity-name"], axis=1)
print(df_graph_label.head(50))



def save_dataframe_to_txt(df, filepath):
    df.to_csv(filepath, header=None, index=None, sep=',', mode='w')




#save all the dataframes to .txt file
#path name: .../GraphTrain/dataset/<dataset_name>/raw/<dataset_name>_<type>.txt
# <type>:
# A--> adjancency matrix
#graph_indicator--> graph-ids of all node
#graph_labels--> labels for all graph
#node_attributes--> attribute(s) for all node
#node_labels--> labels for all node

# sourcepath='Dataset/BUSI-with-GT/data-edge/test/raw'
# os.makedirs(sourcepath, exist_ok=False)
print("The new directory is created!")
save_dataframe_to_txt(df_A, sourcepath + '/test_A.txt')
save_dataframe_to_txt(df_graph_indicator, sourcepath + '/test_graph_indicator.txt')
save_dataframe_to_txt(df_graph_label, sourcepath + '/test_graph_labels.txt')
# save_dataframe_to_txt(df_node_attr, sourcepath + '/train_node_attributes.txt')
save_dataframe_to_txt(df_node_label, sourcepath + '/test_node_labels.txt')
save_dataframe_to_txt(df_node_file_name, sourcepath + '/test_node_file_name.txt')
save_dataframe_to_txt(df_image_file_name, sourcepath + '/test_image_file_name.txt')


end = time.time()
time_to_construct = (end - start)/60
print("Total time (min) for constructing Graph: ", time_to_construct)
print("=======End constructing Graph process here======")




# datetime object containing current date and time
now = datetime.now()
 
print("now =", now)

# dd/mm/YY H:M:S
dt_string = now.strftime("%d/%m/%Y %H:%M:%S")
print("date and time =", dt_string)