In [2]:
import os.path
import os
import torch
import torch.nn.functional as F
import torch.nn as nn
import torch_geometric.transforms as T
from torch_geometric.datasets import Planetoid,TUDataset
from torch_geometric.nn import GATConv, GCNConv,GINConv
import numpy as np
import igraph as ig
from torch_geometric.nn import global_mean_pool,global_add_pool
from functools import reduce
import pickle
from sklearn.model_selection import KFold,StratifiedKFold
import torch.optim as optim
import csv
import pandas as pd
from torch_geometric.loader import DataLoader
from torch_geometric.utils import degree



  from .autonotebook import tqdm as notebook_tqdm


In [3]:
import utils


In [4]:
dataset_name = 'COX2_MD'
dataset = TUDataset(root='dataset', name=dataset_name,use_edge_attr=True )

print()
print(f'Dataset: {dataset}:')
print('====================')
print(f'Number of graphs: {len(dataset)}')
print(f'Number of features: {dataset.num_features}')
print(f'Number of classes: {dataset.num_classes}')


data = dataset[0]  # Get the first graph object.
print()
print(data)
print('=============================================================')

# Gather some statistics about the first graph.
print(f'Number of nodes: {data.num_nodes}')
print(f'Number of edges: {data.num_edges}')
print(f'Average node degree: {data.num_edges / data.num_nodes:.2f}')
print(f'Has isolated nodes: {data.has_isolated_nodes()}')
print(f'Has self-loops: {data.has_self_loops()}')
print(f'Is undirected: {data.is_undirected()}')

Downloading https://www.chrsmrrs.com/graphkerneldatasets/COX2_MD.zip
Extracting dataset/COX2_MD/COX2_MD.zip
Processing...



Dataset: COX2_MD(303):
Number of graphs: 303
Number of features: 7
Number of classes: 2

Data(edge_index=[2, 506], x=[23, 7], edge_attr=[506, 6], y=[1])
Number of nodes: 23
Number of edges: 506
Average node degree: 22.00
Has isolated nodes: False
Has self-loops: False
Is undirected: True


Done!


In [6]:
dataset=utils.data_load(dataset,normalize=False)
max_node=utils.max_node_dataset(dataset)
max_node

36

### 데이터 체크

In [7]:
file = f'./dataset/{dataset_name}/H1_ver2'

if os.path.isfile(file):
    NEWDATA = torch.load(file)     
    print('file on')

else:        
    SUB_ADJ=[]
    RAW_SUB_ADJ=[]
    NEWDATA=[]
    for i in range(len(dataset)):                    
        
        data=dataset[i]
        v1=data.edge_index[0,:]
        v2=data.edge_index[1,:]
        #print(torch.max(v1))
        adj = torch.zeros((max_node,max_node))
        adj[v1,v2]=1
        adj=adj.numpy()
        (adj==adj.T).all()
        list_feature=(data.x)
        list_adj=(adj)       
        
        #print(dataset[i])
        _, _, _, _, sum_sub_adj = utils.make_cycle_adj_speed_nosl(list_adj,data)
        
        if i % 100 == 0:
            print(i)
            
        #_sub_adj=np.array(sub_adj)

        if len(sum_sub_adj)>0:    
            new_adj=np.stack((list_adj,sum_sub_adj),0)
        else :
            sum_sub_adj=np.zeros((1, list_adj.shape[0], list_adj.shape[1]))
            new_adj=np.concatenate((list_adj.reshape(1, list_adj.shape[0], list_adj.shape[1]),sum_sub_adj),0)

        #SUB_ADJ.append(new_adj)
        SUB_ADJ=new_adj
        #------합치기
        data=dataset[i]
        check1=torch.sum(data.edge_index[0]-np.where(SUB_ADJ[0]==1)[0])+torch.sum(data.edge_index[1]-np.where(SUB_ADJ[0]==1)[1])
        if check1 != 0 :
            print('error')

        data.cycle_index=torch.stack((torch.LongTensor(np.where(SUB_ADJ[1]!=0)[0]), torch.LongTensor(np.where(SUB_ADJ[1]!=0)[1])),1).T.contiguous()
        #data.cycle_attr = torch.FloatTensor(SUB_ADJ[1][np.where(SUB_ADJ[1]!=0)[0],np.where(SUB_ADJ[1]!=0)[1]]) 
        #FloatTensor 형태여야됨 
        NEWDATA.append(data)
        
    torch.save(NEWDATA,file)

0
100
200
300


# stratified 10-fold

In [8]:
dataset_class=[]
for i in range(len(dataset)):
    dataset_class.append(dataset[i].y)

In [9]:
dataset_class=torch.FloatTensor(dataset_class).numpy()

In [10]:
folder = f'./dataset/{dataset_name}/kfold_data'
if os.path.isdir(folder):
    print('folder_on')
    for j in range(10):
        print(j)
        test_index = torch.as_tensor(np.loadtxt(f'./dataset/{dataset_name}/kfold_data/test_idx-{j}.txt',dtype=np.int32), dtype=torch.long)
        for k in range(10):
            train_index = torch.as_tensor(np.loadtxt(f'./dataset/{dataset_name}/kfold_data/train_total_{j}/train_idx-{k}.txt',dtype=np.int32), dtype=torch.long)
            valid_index = torch.as_tensor(np.loadtxt(f'./dataset/{dataset_name}/kfold_data/train_total_{j}/valid_idx-{k}.txt',dtype=np.int32), dtype=torch.long)    
            all_index = reduce(np.union1d, (train_index, valid_index, test_index))
            assert len(dataset) == len(all_index)

else :        
    os.makedirs(folder)
    kkf=StratifiedKFold(n_splits=10, shuffle=True)
    #kf = KFold(n_splits=10, shuffle=True)
    kkf2=StratifiedKFold(n_splits=10, shuffle=True)
    #kf2 = KFold(n_splits=10, shuffle=True)
    kkf.get_n_splits(dataset,dataset_class)
    print(kkf)
    j=0
    for train_total_index, test_index in kkf.split(dataset,dataset_class):
        #print(train_index, test_index)
        np.savetxt(f'./dataset/{dataset_name}/kfold_data/train_total_idx-{j}.txt',(train_total_index.astype(np.int64)), fmt='%i', delimiter='\t')
        np.savetxt(f'./dataset/{dataset_name}/kfold_data/test_idx-{j}.txt',(test_index.astype(np.int64)), fmt='%i', delimiter='\t')
        assert len(dataset)==len(reduce(np.union1d, (test_index, train_total_index)))
        k=0
        os.mkdir(f'./dataset/{dataset_name}/kfold_data/train_total_{j}') 
        
        dataset_class_train=[]
        dataset_train=[]
        for i in train_total_index:
            dataset_class_train.append(dataset[i].y)
            dataset_train.append(dataset[i])
        dataset_class_train=torch.FloatTensor(dataset_class_train).numpy()
        dataset_train=(dataset_train)
        kkf2.get_n_splits(train_total_index,dataset_class_train)
        
        for ii, jj in kkf2.split(dataset_train,dataset_class_train):        
            valid_index=train_total_index[jj]
            train_index=train_total_index[ii]
            np.savetxt(f'./dataset/{dataset_name}/kfold_data/train_total_{j}/valid_idx-{k}.txt',(valid_index.astype(np.int64)), fmt='%i', delimiter='\t')
            np.savetxt(f'./dataset/{dataset_name}/kfold_data/train_total_{j}/train_idx-{k}.txt',(train_index.astype(np.int64)), fmt='%i', delimiter='\t')
            k+=1
            assert len(train_total_index)==len(reduce(np.union1d, (valid_index, train_index)))
        j+=1


StratifiedKFold(n_splits=10, random_state=None, shuffle=True)


In [11]:
from nets_attr import Cy2C_GCN_attr_1,Cy2C_GCN_attr_3
from Trainer_CB_attr import Trainer
device = torch.device('cuda:1' if torch.cuda.is_available() else 'cpu')
model_name=['Cy2C_GCN_attr_1','Cy2C_GCN_attr_3']
class_name=[Cy2C_GCN_attr_1,Cy2C_GCN_attr_3]



In [12]:
lr = 1e-3
batch_size=32

In [None]:
for i,CLASS_NAME in enumerate(class_name):
    for hidden_dim in [32,64]:
        for decay in [0.0,0.0001]:
            for mid_drop in [0.0,0.2, 0.4]:
                for dropout in [0.0, 0.2, 0.4]:
                    for n_layer in [1,3,5]:
                        name=f'{model_name[i]}_{n_layer}_{hidden_dim}_{dropout}({mid_drop})_{decay}'
                        print(name)
                        print('=====================================')
                        print('=====',name,'=====',dataset_name,'=====')
                        print('=====================================')
                        trainer=Trainer(name, dataset_name,NEWDATA,device,CLASS_NAME,dataset.num_node_features,dataset.num_classes,batch_size=batch_size,lr=lr,hidden_dim=hidden_dim,n_layer=n_layer,num_workers=1,dropout=dropout,decay=decay,mid_drop=mid_drop)
                        trainer.train()

Cy2C_GCN_attr_1_1_32_0.0(0.0)_0.0
===== Cy2C_GCN_attr_1_1_32_0.0(0.0)_0.0 ===== COX2_MD =====
load mainfold, subfold== 0 0
Mainfold_index: 0, Subfold_index:0
main & sub ===0,0,best acc & loss==,0.6452,0.0191,final acc & loss==0.8387,0.0170,best_epoch==9,final_epoch==110
load mainfold, subfold== 1 0
Mainfold_index: 1, Subfold_index:0
main & sub ===1,0,best acc & loss==,0.6129,0.0272,final acc & loss==0.5806,0.0255,best_epoch==11,final_epoch==112
load mainfold, subfold== 2 0
Mainfold_index: 2, Subfold_index:0
main & sub ===2,0,best acc & loss==,0.6452,0.0214,final acc & loss==0.6452,0.0196,best_epoch==81,final_epoch==182
load mainfold, subfold== 3 0
