In [13]:
import sys
import os
import os.path as osp
import numpy as np
import pandas as pd
import torch
import torch.nn.functional as F
from sklearn.model_selection import train_test_split
import ecole
import json
from torch_geometric.data import (InMemoryDataset, Data)
from torch_geometric.data import DataLoader

In [61]:
bias_threshold = 0.5

# Preprocess indices of bipartite graphs to make batching work.
class MyData(Data):
    def __inc__(self, key, value):
        if key in ['edge_index_var']:
            return torch.tensor([self.num_nodes_var, self.num_nodes_con]).view(2, 1)
        elif key in ['edge_index_con']:
            return torch.tensor([self.num_nodes_con, self.num_nodes_var]).view(2, 1)
        elif key in ['index']:
            return torch.tensor(self.num_nodes_con)
        elif key in ['index_var']:
            return torch.tensor(self.num_nodes_var)
        else:
            return 0


class MyTransform(object):
    def __call__(self, data):
        new_data = MyData()
        for key, item in data:
            new_data[key] = item
        return new_data
class GraphDataset(InMemoryDataset):
    def __init__(self, name, root, data_path, bias_threshold, transform=None, pre_transform=None,
                 pre_filter=None):
        super(GraphDataset, self).__init__(root, transform, pre_transform, pre_filter)
        self.name = name
        self.data, self.slices = torch.load(self.processed_paths[0])

        self.bias_threshold = bias_threshold
        global global_name
        global global_data_path
    @property
    def raw_file_names(self):
        return "SC"

    @property
    def processed_file_names(self):
        return "SC"

    def download(self):
        pass
    
    def process(self):
        print("Preprocessing.")

        data_list = []
        pd = "DataSetMIPGNN/"
        num_graphs = len(os.listdir(pd))

        print(pd)

        # Iterate over instance files and create data objects.
        for num, dirname in enumerate(os.listdir(pd)):
            filename = pd + dirname+ "/problem.lp"
            print(filename, num, num_graphs)
            problem = ecole.scip.Model.from_file(filename).as_pyscipopt()
            data = Data()

            #  Maps networkx ids to new variable node ids.
            node_to_varnode = {}
            #  Maps networkx ids to new constraint node ids.
            node_to_connode = {}

            # Number of variables.
            num_nodes_var = 0
            # Number of constraints.
            num_nodes_con = 0
            # Targets (classes).
            y = []
            y_real = []
            # Features for variable nodes.
            feat_var = []
            # Feature for constraints nodes.
            feat_con = []
            # Right-hand sides of equations.
            feat_rhs = []

            index = []
            index_var = []
            obj = []

            label_file = open(pd + dirname+"/label.json","rb")
            aux = json.load(label_file)
            bestSol = aux["Best_Solution"]
            data_label = np.array(bestSol)
            label_file.close()

            edges_file = open(pd + dirname+"/edges_features.json","rb")
            aux = json.load(edges_file)
            indiceVars,indiceCons,values = np.array(aux["indices"][1]),np.array(aux["indices"][0]),np.array(aux["values"])
            edges_file.close()

            # Iterate over nodes, and collect features.
            for i, var in enumerate(problem.getVars()):
                # Node is a variable node.
                node_to_varnode[i] = num_nodes_var
                num_nodes_var += 1

                y_real.append(bestSol[i])
                if (bestSol[i] < bias_threshold):
                    y.append(0)
                else:
                    y.append(1)

                feat_var.append([var.getObj(),len(np.where(indiceVars == i)[0])])
                obj.append([var.getObj()])

            # Node is constraint node.
            for i, cons in enumerate(problem.getConss()):
                node_to_connode[i] = num_nodes_con
                num_nodes_con += 1

                rhs = problem.getLhs(cons)
                feat_rhs.append([rhs])
                feat_con.append([rhs, len(np.where(indiceCons == i)[0])])
                # feat_con.append([rhs])
                index.append(0)

             # Edge list for var->con graphs.
            edge_list_var = []
            # Edge list for con->var graphs.
            edge_list_con = []

            # Create features matrices for variable nodes.
            edge_features_var = []
            # Create features matrices for constraint nodes.
            edge_features_con = []

            # Remark: graph is directed, i.e., each edge exists for each direction.
            # Flow of messages: source -> target.
            print(len(indiceCons))
            for i,(var,cons) in enumerate(zip(indiceVars,indiceCons)):
                # Source node is con, target node is var.

                # Source node is constraint. C->V.
                edge_list_con.append([node_to_connode[cons], node_to_varnode[var]])
                edge_features_con.append(var)

                # Source node is variable. V->C.
                edge_list_var.append([node_to_varnode[var], node_to_connode[cons]])
                edge_features_var.append(var[i])
            print(len(edge_list_con[0]))
            data.edge_features_con = torch.from_numpy(np.array(edge_features_con)).to(torch.float)
            data.edge_features_var = torch.from_numpy(np.array(edge_features_var)).to(torch.float)
            edge_index_var = torch.tensor(edge_list_var).t().contiguous()
            edge_index_con = torch.tensor(edge_list_con).t().contiguous()

            # Create data object.
            data.edge_index_var = edge_index_var
            data.edge_index_con = edge_index_con

            data.y = torch.from_numpy(np.array(y)).to(torch.long)
            data.y_real = torch.from_numpy(np.array(y_real)).to(torch.float)
            data.var_node_features = torch.from_numpy(np.array(feat_var)).to(torch.float)
            data.con_node_features = torch.from_numpy(np.array(feat_con)).to(torch.float)
            data.rhs = torch.from_numpy(np.array(feat_rhs)).to(torch.float)
            data.obj = torch.from_numpy(np.array(obj)).to(torch.float)
            data.num_nodes_var = num_nodes_var
            data.num_nodes_con = num_nodes_con
            data.index = torch.from_numpy(np.array(index)).to(torch.long)
            data.index_var = torch.from_numpy(np.array(index_var)).to(torch.long)

            data_list.append(data)

        data, slices = self.collate(data_list)
        torch.save((data, slices), self.processed_paths[0])
train_dataset = GraphDataset("SC", "/", "DataSetMIPGNN/", bias_threshold,transform=MyTransform()).shuffle()

Processing...


Preprocessing.
DataSetMIPGNN/
DataSetMIPGNN/set_cover_{1000*1500_0.1_2}/problem.lp 0 5
150000
2


MemoryError: Unable to allocate 168. GiB for an array with shape (150000, 150000) and data type float64