# Imports

In [8]:
import dgl
from dgl.data import DGLDataset
import os
import pickle
import numpy as np
import random
import networkx as nx 
from scipy.spatial.distance import cosine
from operator import itemgetter
from collections import OrderedDict
from tabulate import tabulate
from termcolor import colored
import logging
import copy

import matplotlib.pyplot as plt
import tqdm
from scipy.stats import norm
from sklearn import metrics
from sklearn.metrics.pairwise import cosine_similarity
import torch

import dgl.function as fn
import dgl.nn as dglnn
import torch.nn as nn
import torch.nn.functional as F
from evaluation_metrics import print_csv_format , eval_metrics

# Utils

In [6]:
def compute_cosine_similarity(features, others):
    """Computes cosine similarity.
    Args:
        features (torch.Tensor): 2-D feature matrix.
        others (torch.Tensor): 2-D feature matrix.
    Returns:
        torch.Tensor: similarity matrix.
    """
    features = F.normalize(features, p=2, dim=1)
    others = F.normalize(others, p=2, dim=1)

    return torch.mm(features, others.t())

In [20]:
def nearest_neighbors(graph, node):
    return np.array(list(map(itemgetter(1),
                    sorted([(e[2]['weight'], e[1])
                            for e in graph.edges(node, data=True)]))))

In [21]:
def queries_in_graph(graph, queries, N):
  indices = np.zeros((len(queries), N), dtype='int')
  for i in range(len(queries)):
    nn = nearest_neighbors(graph, queries[i])
    indices[i,:len(nn)] = nn
    indices[i,len(nn):] = list(set(range(N)) - set(indices[i,:len(nn)]))
    
  return indices

# Data

In [51]:
class MarketDataset(DGLDataset):
    def __init__(self):
        super().__init__(name='mydataset')

    def process(self):
        dataset = 'Market'

        with open(f'datasets/{dataset}/feat.pkl', 'rb') as f :
            features = pickle.load(f)  # (19281, 2048) [:100,:]
        with open(f'datasets/{dataset}/ids.pkl', 'rb') as f :
            pids = pickle.load(f) # 751 unique id [:100]
        with open(f'datasets/{dataset}/camids.pkl', 'rb') as f :
            camids = pickle.load(f)  # 6 unique camids [:100]
        print('number of unique ids: ', len(np.unique(pids)))
        
        number_ids = 100
        k=10
        selected_ids = np.unique(pids)[:number_ids]
        print(f'Using a subsample of {number_ids} ids')
        def select(id):
            return id in selected_ids

        self.selector = list(map(select, pids))
        print('selected', sum(self.selector))
        self.features = features[self.selector]
        self.pids = pids[self.selector]
        self.camids = camids[self.selector]

        features = torch.from_numpy(self.features)
        pids = torch.from_numpy(self.pids)
        camids = torch.from_numpy(self.camids)

        self.original_score = compute_cosine_similarity(features, features) 
        S, initial_rank = self.original_score.topk(k=k, dim=-1, largest=True, sorted=True)
        src = torch.cat([ torch.tensor(list(range(len(pids)))) for _ in range(k)])
        dst = torch.cat([initial_rank[:,i] for i in range(k)])
        scr = torch.cat([S[:,i] for i in range(k)])

        labels = torch.zeros_like(src)
        n=0
        for s,d in zip(src, dst):
            if pids[s] == pids[d]:
                labels[n] = 1
            n += 1
        
        self.graph = dgl.graph( (src, dst), num_nodes=pids.shape[0])
        self.graph.ndata['feat'] = features
        self.graph.ndata['label'] = pids
        #self.graph.edata['weight'] = edge_features
        self.graph.edata['feat'] = scr[:,None]
        self.graph.edata['label'] = labels#[:,None]
        self.graph.edata['train_mask'] = torch.zeros(len(labels), dtype=torch.bool).bernoulli(0.7)

    def __getitem__(self, i):
        return self.graph

    def __len__(self):
        return 1

dataset = MarketDataset()
graph = dataset[0]

print(graph)

number of unique ids:  751
Using a subsample of 100 ids
selected 5146
Graph(num_nodes=5146, num_edges=51460,
      ndata_schemes={'feat': Scheme(shape=(2048,), dtype=torch.float32), 'label': Scheme(shape=(), dtype=torch.int64)}
      edata_schemes={'feat': Scheme(shape=(1,), dtype=torch.float32), 'label': Scheme(shape=(), dtype=torch.int64), 'train_mask': Scheme(shape=(), dtype=torch.bool)})


In [94]:
original_score = compute_cosine_similarity(torch.from_numpy(dataset.features), torch.from_numpy(dataset.features))

In [98]:
graph = nx.DiGraph()
src = list(dataset.graph.edges()[0])
dst = list(dataset.graph.edges()[1])
graph.add_nodes_from(range(5146))

edges = []
for i, e in enumerate(zip(src,dst)):
  edges.append((int(e[0]), int(e[1]), -original_score[int(e[0]),int(e[1])]))

graph.add_weighted_edges_from(edges) 

In [99]:
queries = list(graph.nodes())[:300]
indices = queries_in_graph(graph, queries,len(graph.nodes()))

In [100]:
all_cmc, all_AP, all_INP = eval_metrics('Market', indices, q_ids=dataset.pids[:300], g_ids=dataset.pids, q_camids=dataset.camids[:300], g_camids=dataset.camids, max_rank=10)

number of queries that do not exist in the gallery : 0
Evaluation results in csv format: 
[36m| Dataset   | Rank-1   | Rank-5   | Rank-10   | mAP   | mINP   | metric   |
|:----------|:---------|:---------|:----------|:------|:-------|:---------|
| Market    | 85.33    | 94.33    | 95.67     | 34.58 | 7.84   | 59.96    |[0m


# GNN

In [52]:
class SAGE(nn.Module):
    def __init__(self, in_feats, hid_feats, out_feats):
        super().__init__()
        self.conv1 = dglnn.SAGEConv(
            in_feats=in_feats, out_feats=hid_feats, aggregator_type='mean')
        self.conv2 = dglnn.SAGEConv(
            in_feats=hid_feats, out_feats=out_feats, aggregator_type='mean')

    def forward(self, graph, inputs):
        # inputs are features of nodes
        h = self.conv1(graph, inputs)
        h = F.relu(h)
        h = self.conv2(graph, h)
        return h
    
class DotProductPredictor(nn.Module):
    def forward(self, graph, h):
        # h contains the node representations computed from the GNN defined
        # in the node classification section (Section 5.1).
        with graph.local_scope():
            graph.ndata['h'] = h
            graph.apply_edges(fn.u_dot_v('h', 'h', 'score'))
            return graph.edata['score']

class MLPPredictor(nn.Module):
    def __init__(self, in_features, out_classes):
        super().__init__()
        self.W = nn.Linear(in_features * 2, out_classes)

    def apply_edges(self, edges):
        h_u = edges.src['h']
        h_v = edges.dst['h']
        score = self.W(torch.cat([h_u, h_v], 1))
        return {'score': score}

    def forward(self, graph, h):
        # h contains the node representations computed from the GNN defined
        # in the node classification section (Section 5.1).
        with graph.local_scope():
            graph.ndata['h'] = h
            graph.apply_edges(self.apply_edges)
            return graph.edata['score']
        
class Model(nn.Module):
    def __init__(self, in_features, hidden_features, out_features):
        super().__init__()
        self.sage = SAGE(in_features, hidden_features, out_features)
        self.pred = MLPPredictor(out_features, 2) # DotProductPredictor()
    def forward(self, g, x):
        h = self.sage(g, x)
        return self.pred(g, h)

dataset =  MarketDataset()
sco = DotProductPredictor()
graph = dataset[0]
print(sco(graph, graph.ndata['feat']).shape)

node_features = graph.ndata['feat']
edge_label = graph.edata['label']
train_mask = graph.edata['train_mask']
model = Model(2048, 128, 64)
opt = torch.optim.Adam(model.parameters())
criterion = nn.CrossEntropyLoss()
for epoch in range(300):
    pred = model(graph, node_features)
    #print(pred[train_mask].shape, edge_label[train_mask].shape)
    loss = criterion(pred[train_mask], edge_label[train_mask])
    opt.zero_grad()
    loss.backward()
    opt.step()
    print('Epoch: ',epoch,' loss: ', loss.item(),' criterion :', criterion(pred[~train_mask], edge_label[~train_mask]).item() )

number of unique ids:  751
Using a subsample of 100 ids
selected 5146
torch.Size([51460, 1])




Epoch:  0  loss:  0.8615552186965942  criterion : 0.8615074753761292
Epoch:  1  loss:  0.2987363040447235  criterion : 0.29802241921424866
Epoch:  2  loss:  0.3059459626674652  criterion : 0.30457252264022827
Epoch:  3  loss:  0.34599733352661133  criterion : 0.3443947434425354
Epoch:  4  loss:  0.35275641083717346  criterion : 0.3511804938316345
Epoch:  5  loss:  0.34131723642349243  criterion : 0.33983471989631653
Epoch:  6  loss:  0.3222794830799103  criterion : 0.32082146406173706
Epoch:  7  loss:  0.30266305804252625  criterion : 0.30125394463539124
Epoch:  8  loss:  0.28665629029273987  criterion : 0.28534674644470215
Epoch:  9  loss:  0.27674710750579834  criterion : 0.2755874693393707
Epoch:  10  loss:  0.2747274339199066  criterion : 0.273699015378952
Epoch:  11  loss:  0.2798071801662445  criterion : 0.27888962626457214
Epoch:  12  loss:  0.2867882251739502  criterion : 0.2859501838684082
Epoch:  13  loss:  0.290097177028656  criterion : 0.2892961800098419
Epoch:  14  loss:  

In [53]:
print(model)

Model(
  (sage): SAGE(
    (conv1): SAGEConv(
      (feat_drop): Dropout(p=0.0, inplace=False)
      (fc_self): Linear(in_features=2048, out_features=128, bias=False)
      (fc_neigh): Linear(in_features=2048, out_features=128, bias=False)
    )
    (conv2): SAGEConv(
      (feat_drop): Dropout(p=0.0, inplace=False)
      (fc_self): Linear(in_features=128, out_features=64, bias=False)
      (fc_neigh): Linear(in_features=128, out_features=64, bias=False)
    )
  )
  (pred): MLPPredictor(
    (W): Linear(in_features=128, out_features=2, bias=True)
  )
)


In [54]:
dataset.graph

Graph(num_nodes=5146, num_edges=51460,
      ndata_schemes={'feat': Scheme(shape=(2048,), dtype=torch.float32), 'label': Scheme(shape=(), dtype=torch.int64)}
      edata_schemes={'feat': Scheme(shape=(1,), dtype=torch.float32), 'label': Scheme(shape=(), dtype=torch.int64), 'train_mask': Scheme(shape=(), dtype=torch.bool)})

In [85]:
graph = nx.DiGraph()
src = list(dataset.graph.edges()[0])
dst = list(dataset.graph.edges()[1])
graph.add_nodes_from(range(5146))

edges = []
for i, e in enumerate(zip(src,dst)):
  edges.append((int(e[0]), int(e[1]), -float(prediction[i,1])))

graph.add_weighted_edges_from(edges) 

In [86]:
queries = list(graph.nodes())[:300]
indices = queries_in_graph(graph, queries,len(graph.nodes()))

In [87]:
all_cmc, all_AP, all_INP = eval_metrics('Market', indices, q_ids=dataset.pids[:300], g_ids=dataset.pids, q_camids=dataset.camids[:300], g_camids=dataset.camids, max_rank=10)

number of queries that do not exist in the gallery : 0
Evaluation results in csv format: 
[36m| Dataset   | Rank-1   | Rank-5   | Rank-10   | mAP   | mINP   | metric   |
|:----------|:---------|:---------|:----------|:------|:-------|:---------|
| Market    | 75.67    | 91.33    | 95.67     | 31.65 | 6.00   | 53.66    |[0m
