In [1]:
import math
import torch
import random
import argparse
import numpy as np
import torch.nn as nn
import torch.nn.functional as F
from sklearn.utils import shuffle
from sklearn.metrics import f1_score
from collections import defaultdict
import pyhocon


In [133]:
class Config():
    def __init__(self):
        self.dataSet = 'cora'
        self.agg_func = 'MEAN'
        self.epochs = 2
        self.b_sz = 20
        self.seed = 824
        self.cuda = 'use CUDA'
        self.gcn = 'store_true'
        self.learn_method = 'unsup'
        self.unsup_loss = 'normal'
        self.max_vali_f1 = 0
        self.name = 'debug'
        self.config = './../data/experiments.conf'

args = Config()

if torch.cuda.is_available():
    if not args.cuda:
        print("WARNING: You have a CUDA device, so you should probably run with --cuda")
    else:
        device_id = torch.cuda.current_device()
        print('using device', device_id, torch.cuda.get_device_name(device_id))

device = torch.device("cuda" if args.cuda else "cpu")
print('DEVICE:', device)

using device 0 TITAN X (Pascal)
DEVICE: cuda


In [173]:
## 数据集介绍为： https://www.cnblogs.com/popodynasty/p/14975945.html
class DataCenter(object):
    def __init__(self,config):
        super(DataCenter,self).__init__()
        self.config = config
    def load_dataSet(self,dataSet='cora'):
        cora_content_file = self.config["file_path.cora_content"]
        cora_cite_file = self.config["file_path.cora_cite"]
        feat_data = []
        labels = []
        node_map = {}
        label_map = {}
        
        with open(cora_content_file) as fp:                              # 打开节点特性信息文件
            for i, line in enumerate(fp):
                info = line.strip().split()                              
                feat_data.append([float(x) for x in info[1:-1]])         # 节点特征信息
                node_map[info[0]] = i                                    # 节点的index  值->ID
                if not info[-1] in label_map:                            # 判断标签是否已存在
                    label_map[info[-1]] = len(label_map)                 # 标签的index  值->ID
                labels.append(label_map[info[-1]])                       # 节点对应的标签信息
        
        # 原始版  读取已有embedding
        feat_data = np.asarray(feat_data)                                # 转换成numpy格式

        
        labels = np.asarray(labels, dtype=np.int64)                      # 转换成numpy格式
        adj_lists = defaultdict(set)  
        
        with open(cora_cite_file) as fp:
            for i, line in enumerate(fp):
                info = line.strip().split()
                assert len(info) == 2
                paper1 = node_map[info[0]]
                paper2 = node_map[info[1]]
                # 得到的字典可以查找每一个点的邻居节点
                adj_lists[paper1].add(paper2)
                adj_lists[paper2].add(paper1)
        assert len(feat_data) == len(labels) == len(adj_lists)
        
        # 把数据分为 训练，验证，测试集
        test_indexs,val_indexs, train_indexs = self._split_data(feat_data.shape[0])


        setattr(self, dataSet+"_test", test_indexs)
        setattr(self,dataSet+"_val", val_indexs)
        setattr(self,dataSet+"_train", train_indexs)
        
        setattr(self,dataSet+"_feats", feat_data)
        setattr(self,dataSet+"_labels", labels)
        setattr(self, dataSet+"_adj_lists", adj_lists)
    
    def _split_data(self,num_nodes,test_split =3, val_split=6):
        # 随机打乱
        rand_indices = np.random.permutation(num_nodes)
        test_size = num_nodes // test_split
        val_size = num_nodes // val_split
        train_size = num_nodes -(test_size + val_size)
        test_indexs = rand_indices[:test_size]
        val_indexs = rand_indices[test_size:(test_size+val_size)]
        train_indexs = rand_indices[(test_size+val_size):]
        
        return test_indexs, val_indexs, train_indexs



In [112]:
class SageLayer(nn.Module):
    def __init__(self, input_size, out_size,gcn=False):
        super(SageLayer,self).__init__()
        self.input_size = input_size
        self.out_size = out_size
        self.gcn = gcn
        # 创建weight
        self.weight = nn.Parameter(torch.FloatTensor(out_size, self.input_size if self.gcn else 2*self.input_size))
        # 初始化参数
        self.init_params()
        
    def init_params(self):
        for param in self.parameters():
#             print("param=====",param)
            nn.init.xavier_uniform_(param)
    def forward(self, self_feats, aggregate_feats,neghs=None):
        if not self.gcn:
            # concat自己信息和邻居信息
            combined = torch.cat([self_feats,aggregate_feats],dim=1)
        else:
            combined = aggregate_feats
        # mm() 代表 matmul()  .t() 为 转置
        combined = F.relu(self.weight.mm(combined.t())).t()
        return combined 

class GraphSage(nn.Module):
    def __init__(self, num_layers,input_size,out_size, raw_features, adj_lists, device,gcn=False,agg_func="MEAN"):
        super(GraphSage,self).__init__()
        # 输入尺寸 1433
        self.input_size = input_size
        # 输出尺寸 128
        self.out_size = out_size
        # 聚合层数 2
        self.num_layers = num_layers
        # 是否使用GCN 
        self.gcn = gcn
        # 使用训练设备
        self.device = device
        # 聚合函数
        self.agg_func = agg_func
        # 节点特征
        self.raw_features = raw_features
        # 边
        self.adj_lists = adj_lists
        
        for index in range(1,num_layers+1):
            layer_size = out_size if index!=1 else input_size
            setattr(self, 'sage_layer'+str(index), SageLayer(layer_size,out_size,gcn=self.gcn))
    
    def forward(self,nodes_batch):
        # 把当前训练的节点转换成list
        lower_layer_nodes = list(nodes_batch)
        # 放入训练节点
        nodes_batch_layers = [(lower_layer_nodes,)]
        # 遍历每一次聚合，获得neighbors
        for i in range(self.num_layers):
            # batch涉及到的所有节点:Set(本身+邻居), dict(节点编号->当前字典中顺序index), [合并所有节点]
            lower_samp_neighs,lower_layer_nodes_dict, lower_layer_nodes = self._get_unique_neighs_list(lower_layer_nodes)
              
            #[([合并所有节点], [Set(本身+邻居)] , dict(节点编号->当前字典中顺序index)),([batch节点]),] 
            nodes_batch_layers.insert(0,(lower_layer_nodes,lower_samp_neighs,lower_layer_nodes_dict))
        
        assert len(nodes_batch_layers) == self.num_layers +1
        
        pre_hidden_embs = self.raw_features
        
        for index in range(1,self.num_layers+1):
            # 聚合自己和邻居的节点
            nb = nodes_batch_layers[index][0]
            #[([合并所有节点], [Set(本身+邻居)] , dict(节点编号->当前字典中顺序index)),([batch节点]),] 
            pre_neighs = nodes_batch_layers[index-1]
            # 聚合函数。聚合的节点， 节点特征，集合节点邻居信息
            aggregate_feats = self.aggregate(nb, pre_hidden_embs, pre_neighs)
            
            sage_layer = getattr(self,  "sage_layer"+str(index))
            
            if index>1:
                # 第一层的batch节点，没有进行转换
                nb = self._nodes_map(nb,pre_hidden_embs, pre_neighs)
            # 进入SageLayer。weight*concat(node,neighbors)
            cur_hidden_embs = sage_layer(self_feats=pre_hidden_embs[nb], aggregate_feats=aggregate_feats)
            
            pre_hidden_embs = cur_hidden_embs
        return pre_hidden_embs
            
                
            
        
    def _nodes_map(self, nodes,hidden_embs, neighs):
        layer_nodes, samp_neighs, layer_nodes_dict = neighs
        assert len(samp_neighs) == len(nodes)
        # 得到上一层dict中的节点index
        index = [layer_nodes_dict[x] for x in nodes]
        return index
        
    def aggregate(self,nodes,pre_hidden_embs,pre_neighs,num_sample=10):
        #[([合并所有节点], [Set(本身+邻居)] , dict(节点编号->当前字典中顺序index)),([batch节点]),] 
        unique_nodes_list, samp_neighs, unique_nodes = pre_neighs
        assert len(nodes) == len(samp_neighs)
        # 判断是否包含本身
        indicator = [(nodes[i] in samp_neighs[i]) for i in range(len(samp_neighs))]
        assert (False not in indicator)
        
        if not self.gcn:
            # 中心节点删除
            samp_neighs = [(samp_neighs[i]-set([nodes[i]])) for i in range(len(samp_neighs))]
        if len(pre_hidden_embs) == len(unique_nodes):
            embed_matrix = pre_hidden_embs
        else:
            embed_matrix = pre_hidden_embs[torch.LongTensor(unique_nodes_list)]
        # (本层节点数，总邻居节点数)
        mask = torch.zeros(len(samp_neighs),len(unique_nodes))  
        # 每个node 邻接节点对应的列 index
        column_indices = [unique_nodes[n] for samp_neigh in samp_neighs for n in samp_neigh ]
        # 每个node 对应的行 index 
        row_indices = [i for i in range(len(samp_neighs)) for _ in range(len(samp_neighs[i]))]
        # 构建邻接矩阵
        mask[row_indices,column_indices] = 1
        
        if(self.agg_func=="MEAN"):
            # 按行求和，保持和输入一个维度
            num_neigh = mask.sum(1,keepdim=True)
            # 归一化操作
            mask = mask.div(num_neigh).to(embed_matrix.device)
            # 矩阵相乘，相当于聚合周围邻接信息求和
            aggregate_feats = mask.mm(embed_matrix)
        elif self.agg_func == "MAX":
            # 不为0的行
            indexs = [x.nonzero() for x in mask==1]
            aggregate_feats = []
            for feat in [embed_matrix[x.squeeze()] for x in indexs]:
                if len(feat.size())==1:
                    # view 相当于tf的reshape
                    aggregate_feats.append(feat.view(1, -1))
                else:
                    aggregate_feats.append(torch.max(feat,0)[0].view(1,-1))
            aggregate_feats = torch.cat(aggregate_feats,0)
        
        return aggregate_feats
                
            
        
        
        
    
    
    
    def _get_unique_neighs_list(self,nodes,num_sample=10):
        _set = set
         # self.adj_lists边矩阵，获取节点的邻居
        to_neighs = [self.adj_lists[int(node)] for node in nodes]
        
        if not num_sample is None:
            _sample = random.sample
            # 对邻居节点进行采样，如果大于邻居数据，则进行采样
            samp_neighs = [_set(_sample(to_neigh, num_sample)) if len(to_neigh)>=num_sample else to_neigh 
                           for to_neigh in to_neighs]
        else:
            samp_neighs = to_neighs
        # 两个set  | 相当于取并集 与union()效果相同
        # 加入本身节点
        samp_neighs = [samp_neigh | set([nodes[i]]) for i, samp_neigh in enumerate(samp_neighs)]
        # 这个batch涉及到的所有节点
        _unique_nodes_list = list(set.union(*samp_neighs))
        # 建立编号
        i = list(range(len(_unique_nodes_list)))
        # 节点编号->当前字典中顺序index
        unique_nodes = dict(list(zip(_unique_nodes_list, i)))
        # 聚合自己和邻居节点，点的dict，batch涉及到的所有节点
        return samp_neighs, unique_nodes, _unique_nodes_list
            
        
    

In [145]:
class Classification(nn.Module): 
    # 把GraphSAGE的输出链接全连接层每个节点映射到7维
    def __init__(self,emb_size,num_classes):
        super(Classification,self).__init__()
        self.layer = nn.Sequential(nn.Linear(emb_size,num_classes))
        self.init_params()
    def init_params(self):
        for param in self.parameters():
            if len(param.size())==2:
                nn.init.xavier_uniform_(param)
    def forward(self, embeds):
        logists = torch.log_softmax(self.layer(embeds),1)
        return logists

    
class UnsupervisedLoss(object):
    # 无监督loss
    def __init__(self,adj_lists,train_nodes,device):
        super(UnsupervisedLoss,self).__init__()
        self.Q = 10
        self.N_WALKS = 6
        self.WALK_LEN = 1
        self.N_WALK_LEN = 5
        self.MARGIN = 3
        self.adj_lists = adj_lists
        # train_nodes 代表所有训练节点
        self.train_nodes = train_nodes
        self.device = device
        
        self.target_nodes = None
        self.positive_pairs = []
        self.negtive_pairs = []
        self.node_positive_pairs = {}
        self.node_negtive_pairs = {}
        self.unique_nodes_batch = []
        
    def get_loss_sage(self, embeddings, nodes):
        assert len(embeddings) == len(self.unique_nodes_batch)
        assert False not in [nodes[i]==self.unique_nodes_batch[i] for i in range(len(nodes))]
        node2index = {n:i for i,n in enumerate(self.unique_nodes_batch)}

        nodes_score = []
        assert len(self.node_positive_pairs) == len(self.node_negtive_pairs)
        for node in self.node_positive_pairs:
            pps = self.node_positive_pairs[node]
            nps = self.node_negtive_pairs[node]
            if len(pps)==0 or len(nps)==0:
                continue
            # Q * Exception(negative score)
            indexs = [list(x) for x in zip(*nps)]
            node_indexs = [node2index[x] for x in indexs[0]]
            neighb_indexs = [node2index[x] for x in indexs[1]]
            neg_score = F.cosine_similarity(embeddings[node_indexs],embeddings[neighb_indexs])
            neg_score = self.Q*torch.mean(torch.log(torch.sigmoid(-neg_score)),0)


#             # multiple positive score
            indexs = [list(x) for x in zip(*pps)]
            node_indexs = [node2index[x] for x in indexs[0]]
            neighb_indexs = [node2index[x] for x in indexs[1]]
            pos_score = F.cosine_similarity(embeddings[node_indexs], embeddings[neighb_indexs])
            pos_score = torch.log(torch.sigmoid(pos_score))
    
    
            nodes_score.append(torch.mean(-pos_score-neg_score).view(1,-1))
        loss = torch.mean(torch.cat(nodes_score,0))
        return loss


    def get_loss_margin(self,embeddings,nodes):
        assert len(embeddings) == len(self.unique_nodes_batch)
        assert False not in [nodes[i]==self.unique_nodes_batch[i] for i in range(len(nodes))]
        node2index = {n:i for i,n in enumerate(self.unique_nodes_batch)}

        nodes_score = []
        assert len(self.node_positive_pairs) == len(self.node_negtive_pairs)
        for node in self.node_positive_pairs:
            pps = self.node_positive_pairs[node]
            nps = self.node_negtive_pairs[node]
            if len(pps) == 0 or len(nps) == 0:
                continue

            indexs = [list(x) for x in zip(*pps)]
            node_indexs = [node2index[x] for x in indexs[0]]
            neighb_indexs = [node2index[x] for x in indexs[1]]
            pos_score = F.cosine_similarity(embeddings[node_indexs], embeddings[neighb_indexs])
#             pos_score = F.cosine_similarity(embeddings[node_indexs], embeddings[neighb_indexs])
            pos_score, _ = torch.min(torch.log(torch.sigmoid(pos_score)), 0)

            indexs = [list(x) for x in zip(*nps)]
            node_indexs = [node2index[x] for x in indexs[0]]
            neighb_indexs = [node2index[x] for x in indexs[1]]
            neg_score = F.cosine_similarity(embeddings[node_indexs], embeddings[neighb_indexs])
            heg_score,_ = torch.max(torch.log(torch.sigmoid(neg_score)),0)

            nodes_score.append(torch.max(torch.tensor(0.0).to(self.device), neg_score-pos_score+self.MARGIN).VIEW(1,-1))


        loss = torch.mean(torch.cat(nodes_score,0),0)

        return loss

    def extend_nodes(self, nodes,num_neg=6):
        self.positive_pairs = []
        self.node_positive_pairs = {}
        self.negtive_pairs = []
        self.node_negtive_pairs = {}

        self.target_nodes = nodes
        self.get_positive_nodes(nodes)
#         print("positive_pairs", self.positive_pairs)
        self.get_negtive_nodes(nodes, num_neg)
#         print("negtive_pairs", self.negtive_pairs)

        self.unique_nodes_batch = list(set([i for x in self.positive_pairs for i in x])| set([i for x in self.negtive_pairs for i in x]))
        assert set(self.target_nodes) < set(self.unique_nodes_batch)
        return self.unique_nodes_batch

    def get_positive_nodes(self,nodes):
        return self._run_random_walks(nodes)

    def get_negtive_nodes(self, nodes, num_neg):
        for node in nodes:
            neighbors = set([node])
            frontier = set([node])
            # 下方循环就是得到 当前node 0~N_WALK_LEN 度的所有节点set集合：neighbors  
            for i in range(self.N_WALK_LEN):
                current = set()
                for outer in frontier:
                    # 取并集 current 为outer所有相邻节点 set集合
                    current |= self.adj_lists[int(outer)]
                # 表示node i+1度的关联节点
                frontier = current - neighbors
                # neighbors 添加上 current： 表示node 从0~i+1 度 所有关联的节点
                neighbors |= current

            # train_nodes 代表所有训练节点
            # 除去neighbors 后进行采样
            far_nodes = set(self.train_nodes) - neighbors
            #  num_neg为采样数量 这里默认设置为 6 
            neg_samples = random.sample(far_nodes,num_neg) if num_neg<len(far_nodes) else far_nodes 

            self.negtive_pairs.extend([node, neg_node] for neg_node in neg_samples)
            self.node_negtive_pairs[node] = [(node, neg_node) for neg_node in neg_samples]
        return self.negtive_pairs

    def _run_random_walks(self, nodes):
        for node in nodes:
            if(len(self.adj_lists[int(node)])) ==0 :
                continue
            cur_pairs = []
            for i in range(self.N_WALKS):
                curr_node = node
                for j in range(self.WALK_LEN):
                    neighs = self.adj_lists[int(curr_node)]
                    next_node = random.choice(list(neighs))
                    # 共线是无用的
                    if next_node != node and next_node in self.train_nodes:
                        self.positive_pairs.append((node, next_node))
                        cur_pairs.append((node,next_node))
                    curr_node = next_node
            self.node_positive_pairs[node] = cur_pairs
        return self.positive_pairs
                
        

In [92]:
import sys
import os
import torch
import random
import math

from sklearn.utils import shuffle
from sklearn.metrics import f1_score

import torch.nn as nn
import numpy as np



In [131]:
def evaluate(dataCenter, ds, graphSage, classification, device, max_vali_f1, name, cur_epoch):
    test_nodes = getattr(dataCenter, ds+'_test')
    val_nodes = getattr(dataCenter, ds+'_val')
    labels = getattr(dataCenter, ds+'_labels')
    
    models = [graphSage, classification]
    
    params = []
    for model in models:
        for param in model.parameters():
            if param.requires_grad:
                param.requires_grad = False
                params.append(param)
    
    embs = graphSage(val_nodes)
    logists = classification(embs)
    _,predicts = torch.max(logists,1)
    labels_val = labels[val_nodes]
    
    assert len(labels_val) == len(predicts)
    print("!!!!!!!",type(predicts))
    print("@@@@@@@@@@@",type(predicts.data))
#     print("#####", predicts==predicts.data)
    comps = zip(labels_val, predicts.data)
    
    vali_f1 = f1_score(labels_val, predicts.cpu().data, average="micro")
    print("Validation F1:", vali_f1)
    
    if vali_f1 > max_vali_f1:
        max_vali_f1 = vali_f1
        embs = graphSage(test_nodes)
        logists = classification(embs)
        _,predicts = torch.max(logists, 1)
        labels_test = labels[test_nodes]
        assert len(labels_test) == len(predicts)
        comps = zip(labels_test, predicts.data)
        
        test_f1 = f1_score(labels_test, predicts.cpu().data, average="micro")
        print("Test F1:", test_f1)
        
        for param in params:
            param.requires_grad = True
        

        torch.save(models, '../models/model_best_{}_ep{}_{:.4f}.torch'.format(name, cur_epoch, test_f1))
        
    for param in params:
        param.requires_grad = True
        
    return max_vali_f1
        

In [168]:
def apply_model(dataCenter, ds, graphSage, classification, unsupervised_loss, b_sz, unsup_loss, device, learn_method):
    
    test_nodes = getattr(dataCenter, ds+'_test')
    val_nodes = getattr(dataCenter, ds+'_val')
    train_nodes = getattr(dataCenter, ds+'_train')
    labels = getattr(dataCenter, ds+'_labels')
    
    if unsup_loss == 'margin':
        num_neg = 6
    elif unsup_loss == 'normal':
        num_neg = 100
    else:
        print("unsup_loss can be only 'margin' or 'normal'.")
        sys.exit(1)
        
    
    train_nodes = shuffle(train_nodes)
    
    models = [graphSage, classification]
    
    params = []
    # 初始化模型参数
    for model in models:
        for param in model.parameters():
            if param.requires_grad:
                params.append(param)
    
    # 梯度优化算法
    optimizer = torch.optim.SGD(params, lr=0.7)  
    # 梯度清零
    optimizer.zero_grad()
    for model in models:
        model.zero_grad()
    
    # 有多少个batches
    batches = math.ceil(len(train_nodes)/b_sz)
    
    visited_nodes = set()
    
    for index in range(batches):
        nodes_batch = train_nodes[index*b_sz:(index+1)*b_sz]
        
        # 负采样
        nodes_batch = np.asarray(list(unsupervised_loss.extend_nodes(nodes_batch,num_neg=num_neg)))
        visited_nodes |= set(nodes_batch)
        
        # 拿标签
        labels_batch = labels[nodes_batch]
        
        # 整个batch数据输入 graphSage
        
        embs_batch = graphSage(nodes_batch)
        
        if learn_method == 'sup':
            # 有监督学习
            logists = classification(embs_batch)
            loss_sup = -torch.sum(logists[range(logists.size(0)), labels_batch],0)
            loss_sup /= len(nodes_batch)
            loss = loss_sup
            
        elif learn_method == 'plus_unsup':
            # 混合 有监督和无监督
            logists = classification(embs_batch)
            loss_sup = -torch.sum(logists[range(logists.size(0)), labels_batch], 0)
            loss_sup /= len(nodes_batch)
            
            # 无监督
            if unsup_loss == 'margin':
                loss_net = unsupervised_loss.get_loss_margin(embs_batch, nodes_batch)
            elif unsup_loss == 'normal':
                loss_net = unsupervised_loss.get_loss_sage(embs_batch, nodes_batch)
            
            loss = loss_sup + loss_net
        else:
            
            # 无监督
            if unsup_loss == 'margin':
                loss_net = unsupervised_loss.get_loss_margin(embs_batch, nodes_batch)
            elif unsup_loss == 'normal':
                loss_net = unsupervised_loss.get_loss_sage(embs_batch, nodes_batch)
            loss = loss_net
            
        if (index+1)%20 ==0:
            print('Step [{}/{}], Loss: {:.4f}, Dealed Nodes [{}/{}] '.format(index+1, batches, loss.item(), len(visited_nodes), len(train_nodes)))

        
        loss.backward()
        if learn_method == 'unsup':
            for model in models[:-1]:
            # 相当于dropout
                nn.utils.clip_grad_norm_(model.parameters(), 500)
        else:    
            for model in models:
                # 相当于dropout
#                 print(model.parameters)
                nn.utils.clip_grad_norm_(model.parameters(), 500)
        # 更新梯度
        optimizer.step()                                      
        optimizer.zero_grad() 
        for model in models:
            model.zero_grad()
        
    return graphSage, classification
    
        

In [174]:
if __name__ == '__main__':
    random.seed(args.seed)
    np.random.seed(args.seed)
    torch.manual_seed(args.seed)
    torch.cuda.manual_seed_all(args.seed)
    # load config file
    config = pyhocon.ConfigFactory.parse_file(args.config)
    # load data
    ds = args.dataSet
    
    dataCenter = DataCenter(config)
    dataCenter.load_dataSet(ds)
    features = torch.FloatTensor(getattr(dataCenter, ds+'_feats')).to(device)  
    print("~~~~~~~~~~~",features.shape)
    graphSage = GraphSage(config['setting.num_layers'], features.size(1), config['setting.hidden_emb_size'], features, getattr(dataCenter, ds+'_adj_lists'), device, gcn=args.gcn, agg_func=args.agg_func)
    graphSage.to(device)

    num_labels = len(set(getattr(dataCenter, ds+'_labels')))
    classification = Classification(config['setting.hidden_emb_size'], num_labels)
    classification.to(device)
    
    unsupervised_loss = UnsupervisedLoss(getattr(dataCenter, ds+'_adj_lists'), getattr(dataCenter, ds+'_train'), device)
    
    args.learn_method = "sup"
    
    if args.learn_method == 'sup':
        print('GraphSage with Supervised Learning')
    elif args.learn_method == 'plus_unsup':
        print('GraphSage with Supervised Learning plus Net Unsupervised Learning')
    else:
        print('GraphSage with Net Unsupervised Learning')
    
    for epoch in range(args.epochs):
        print('----------------------EPOCH %d-----------------------' % epoch)
        graphSage, classification = apply_model(dataCenter, ds, graphSage, classification, unsupervised_loss, args.b_sz, args.unsup_loss,device, args.learn_method)
        
#         if(epoch+1) %2 ==0 and args.learn_method == "unsup":
#             classification, args.max_vali_f1 = train_classification(dataCenter, graphSage, classification,ds, device, args.max_vali_f1, args.name)
        
        if args.learn_method != 'unsup':
            args.max_vali_f1 = evaluate(dataCenter, ds, graphSage, classification, device,args.max_vali_f1,args.name, epoch)
        
    
    
    
    

~~~~~~~~~~~ torch.Size([2708, 1433])
GraphSage with Supervised Learning
----------------------EPOCH 0-----------------------
Step [20/68], Loss: 1.0012, Dealed Nodes [1355/1355] 
Step [40/68], Loss: 0.3863, Dealed Nodes [1355/1355] 
Step [60/68], Loss: 0.1952, Dealed Nodes [1355/1355] 
!!!!!!! <class 'torch.Tensor'>
@@@@@@@@@@@ <class 'torch.Tensor'>
Validation F1: 0.8869179600886918
----------------------EPOCH 1-----------------------
Step [20/68], Loss: 0.1496, Dealed Nodes [1355/1355] 
Step [40/68], Loss: 0.1300, Dealed Nodes [1355/1355] 
Step [60/68], Loss: 0.1189, Dealed Nodes [1355/1355] 
!!!!!!! <class 'torch.Tensor'>
@@@@@@@@@@@ <class 'torch.Tensor'>
Validation F1: 0.8736141906873615


In [153]:
val_indexs = features[1]
print(val_indexs)

tensor([0., 0., 0.,  ..., 0., 0., 0.], device='cuda:0')


In [15]:
s = [[1,2,3],[4,5,6]]
print(*s)

[1, 2, 3] [4, 5, 6]
