In [1]:
import os
import os.path as osp
import argparse
import json

from utils import EarlyStop, setup_seed

import numpy as np
import torch
import torch.nn.functional as F
from tqdm import tqdm

from torch_geometric.loader import NeighborLoader
from sklearn.metrics import average_precision_score

from model import RGCN, RGPRGNN
import nni
import wandb
import random


parser = argparse.ArgumentParser()
parser.add_argument('--dataset', type=str, default='dataset/pyg_data/icdm2022_session1.pt')
parser.add_argument('--labeled-class', type=str, default='item')

parser.add_argument("--test-file", type=str, default="dataset/icdm2022_session1_test_ids.txt")



args = parser.parse_args(args=[])

print(args)


  from .autonotebook import tqdm as notebook_tqdm


Namespace(dataset='dataset/pyg_data/icdm2022_session1.pt', labeled_class='item', test_file='dataset/icdm2022_session1_test_ids.txt')


In [2]:
hgraph = torch.load(args.dataset)

labeled_class = args.labeled_class
train_idx = hgraph[labeled_class].pop('train_idx')
val_idx = hgraph[labeled_class].pop('val_idx')
test_id = [int(x) for x in open(args.test_file).readlines()]
converted_test_id = []
for i in test_id:
    converted_test_id.append(hgraph['item'].maps[i])
test_idx = torch.LongTensor(converted_test_id)

In [3]:
num_relations = len(hgraph.edge_types)
num_relations

14

In [4]:
features = hgraph[labeled_class]['x'][train_idx]
labels = hgraph[labeled_class]['y'][train_idx]

In [5]:
features.shape

torch.Size([68449, 256])

In [6]:
labels.shape

torch.Size([68449])

In [7]:
positive_index = torch.nonzero(labels)
negative_index = torch.nonzero(labels==0)

In [8]:
positive_features = features[positive_index][:].squeeze(1)
positive_features.shape


torch.Size([6651, 256])

In [9]:
negative_features = features[negative_index][:].squeeze(1)
negative_features.shape

torch.Size([61798, 256])

In [10]:
positive_mean = torch.mean(positive_features, dim=0)
positive_var = torch.var(positive_features, dim=0)

In [11]:
negative_mean = torch.mean(negative_features, dim=0)
negative_var = torch.var(negative_features, dim=0)

In [23]:
positive_var_list = positive_var.tolist()
positive_var

tensor([1.4761e-02, 1.2893e-05, 7.3621e-05, 6.1906e-04, 1.0372e-04, 2.3915e-03,
        2.5265e-03, 2.5772e-02, 4.3384e-04, 8.3119e-03, 1.8658e-02, 3.9367e-03,
        3.7559e-05, 0.0000e+00, 6.5861e-04, 5.0176e-04, 1.7915e-03, 1.0130e-04,
        3.3916e-02, 6.8875e-05, 9.8172e-03, 1.6743e-03, 2.0580e-03, 6.0806e-04,
        3.9782e-05, 8.5119e-03, 2.9454e-04, 3.1441e-03, 6.7135e-03, 2.7939e-04,
        9.8997e-07, 3.1884e-03, 4.6778e-05, 7.3643e-04, 7.6050e-04, 5.8580e-04,
        2.5491e-07, 1.0412e-04, 4.7913e-03, 1.3791e-03, 3.1918e-04, 1.0409e-02,
        2.0646e-05, 4.0382e-03, 2.2748e-02, 5.7126e-06, 1.0625e-07, 4.2468e-03,
        2.0156e-05, 7.0492e-05, 2.9047e-05, 6.2764e-06, 8.0776e-03, 4.1194e-04,
        1.7825e-03, 2.9103e-03, 8.0336e-03, 2.3629e-04, 4.1433e-03, 1.0750e-04,
        1.3240e-03, 7.1483e-04, 3.9969e-03, 5.9362e-06, 4.8801e-04, 1.3971e-05,
        1.3997e-04, 2.0667e-05, 2.0359e-03, 2.9154e-03, 4.6873e-04, 1.4109e-05,
        2.6815e-03, 3.9006e-04, 2.9042e-

In [22]:
mean_sub = positive_mean - negative_mean
mean_sub = torch.abs(mean_sub)
mean_sub = mean_sub.tolist()
mean_sub

[0.07444097101688385,
 1.0878313332796097e-05,
 0.00263596442528069,
 0.017834104597568512,
 0.0031388523057103157,
 0.0050936415791511536,
 0.009041108191013336,
 0.19961662590503693,
 0.00467655761167407,
 0.0027357563376426697,
 0.1442474126815796,
 0.06210366636514664,
 0.0005452845362015069,
 8.324076043209061e-05,
 0.004496878944337368,
 0.009943588636815548,
 0.00529930554330349,
 0.0028528976254165173,
 0.039070770144462585,
 0.00018845527665689588,
 0.05950113758444786,
 0.0037598770577460527,
 0.05736037716269493,
 0.003965579904615879,
 0.00017065895372070372,
 0.04277452081441879,
 0.0014906779397279024,
 0.02941899374127388,
 0.019188445061445236,
 0.002605182584375143,
 1.67280959431082e-05,
 0.0069110207259655,
 0.0002944477600976825,
 0.004540066234767437,
 0.029999008402228355,
 0.00024567311629652977,
 0.00019646667351480573,
 0.0005237248260527849,
 0.024009093642234802,
 0.022419419139623642,
 0.00015388242900371552,
 0.030446771532297134,
 6.37775519862771e-06,
 0.

In [29]:
keep_index = []
for i in range(len(mean_sub)):
    if mean_sub[i] > 0.005 or positive_var_list[i] > 5e-5:
        keep_index.append(i)



In [30]:
keep_index

[0,
 2,
 3,
 4,
 5,
 6,
 7,
 8,
 9,
 10,
 11,
 14,
 15,
 16,
 17,
 18,
 19,
 20,
 21,
 22,
 23,
 25,
 26,
 27,
 28,
 29,
 31,
 33,
 34,
 35,
 37,
 38,
 39,
 40,
 41,
 43,
 44,
 47,
 49,
 52,
 53,
 54,
 55,
 56,
 57,
 58,
 59,
 60,
 61,
 62,
 64,
 66,
 68,
 69,
 70,
 72,
 73,
 74,
 75,
 76,
 77,
 79,
 80,
 82,
 83,
 84,
 85,
 87,
 88,
 89,
 90,
 91,
 93,
 94,
 96,
 98,
 99,
 100,
 101,
 102,
 104,
 106,
 108,
 112,
 113,
 114,
 115,
 116,
 117,
 118,
 119,
 120,
 121,
 123,
 124,
 125,
 126,
 127,
 129,
 130,
 131,
 132,
 133,
 135,
 136,
 137,
 140,
 141,
 142,
 144,
 145,
 146,
 147,
 149,
 150,
 153,
 154,
 155,
 156,
 157,
 158,
 159,
 160,
 161,
 162,
 163,
 164,
 165,
 167,
 168,
 169,
 170,
 171,
 173,
 174,
 175,
 176,
 177,
 178,
 179,
 180,
 182,
 185,
 186,
 188,
 190,
 191,
 192,
 193,
 195,
 196,
 197,
 198,
 199,
 200,
 201,
 202,
 203,
 205,
 206,
 207,
 208,
 209,
 210,
 211,
 212,
 213,
 214,
 216,
 217,
 218,
 219,
 220,
 221,
 222,
 224,
 225,
 226,
 228,
 229,
 232,


In [None]:
df = pd.DataFrame()

type_list = ['Negative' for _ in range(negative_features.shape[0])].extend(['Positive' for _ in range(positive_features.shape[0])])
df['type'] = type_list

for show_dim in range(256):
    negative_dim_feature = torch.index_select(negative_features, 1, torch.tensor(show_dim)).squeeze(1).numpy()
    positive_dim_feature = torch.index_select(positive_features, 1, torch.tensor(show_dim)).squeeze(1).numpy()
    df[f'x_{show_dim}'] = np.concatenate((negative_dim_feature, positive_dim_feature))




In [None]:
df.mean()

In [None]:
import matplotlib.pyplot as plt
import seaborn as sns
import pandas as pd
import numpy as np

plt.rcParams["savefig.bbox"] = 'tight'
plt.rc('font',family='Times New Roman', size=14)


fig, ax = plt.subplots(figsize=(6, 4))
# sns.kdeplot(data=negative_dim_feature, color='r')
# sns.kdeplot(data=positive_dim_feature, color='b')
sns.distplot(negative_dim_feature, color='r')
sns.distplot(positive_dim_feature, color='g')
