In [1]:
import numpy as np
import pandas as pd
import faiss
from tqdm import tqdm as tqdm

## Model evaluation
- Intersection
- Concatenation
- Title -> Img
- Img -> Title

## Load data

In [6]:
# load data (title, cleaned_title, arr_weight)
# data path
DATA_PATH = 'res/data/new_feat_df.csv'
TEST_PATH = 'res/data/test_df.csv'

arr_title = pd.read_csv(DATA_PATH, usecols=['title']).values
cleaned_arr_title = pd.read_csv(DATA_PATH, usecols=['cleaned_title']).fillna('').values
arr_weight = pd.read_csv(DATA_PATH, usecols=['weight']).values

In [7]:
total_df = pd.read_csv(DATA_PATH)

test_df = pd.read_csv(TEST_PATH)
test_title = test_df['title'].values

print(test_title.shape)

(5000,)


In [13]:
from faiss_handle import FaissHandler

# vector path
arr_title_vector_path = 'res/data/가공식품_wvs.npz'
arr_img_vector_path = 'res/data/가공식품_IMGF.npz'
arr_concat_vector_path = 'res/data/CONCAT_가공식품.npz'

# load vector (title / img / concat)
arr_title_vector = np.ascontiguousarray(np.load(arr_title_vector_path)['arr_0'], dtype=np.float32)
arr_img_vector = np.ascontiguousarray(np.load(arr_img_vector_path)['arr_0'], dtype=np.float32)
arr_concat_vector = np.ascontiguousarray(np.load(arr_concat_vector_path)['arr_0'], dtype=np.float32)

# faiss_handler
faiss_handler = FaissHandler()

# index path
title_index_path = 'res/model/index/title.h5'
img_index_path = 'res/model/index/image.h5'
concat_index_path = 'res/model/index/concat.h5'

# load index
title_index = faiss_handler.load_index(title_index_path)
img_index = faiss_handler.load_index(img_index_path)
concat_index = faiss_handler.load_index(concat_index_path)

## EVAL MODEL

### Intersection

In [14]:
from procs.evaluation.model_evaluation import *

scores = np.zeros(len(test_title))
precisions = np.zeros(len(test_title))

num = 0

for query_title in tqdm(test_title):
    
    # intersection
    acc, prec = evaluate_model(query_title, total_df, arr_title, \
                               img_index, title_index, arr_img_vector, arr_title_vector, \
                               img_thrshold=0.3, title_threshold=0.1, weight_check=True)
    scores[num] = acc
    precisions[num] = prec
    num += 1

avg_scores, avg_precision = np.round(scores.mean(),3) * 100, np.round(precisions.mean(),3) * 100

print('Intersection \n recall : {0} \t precision : {1}'.format(avg_scores, avg_precision))

100%|██████████| 5000/5000 [07:51<00:00, 10.61it/s]

Intersection 
 recall : 79.5 	 precision : 81.8





### Concatenation

In [15]:
scores = np.zeros(len(test_title))
precisions = np.zeros(len(test_title))

num = 0

for query_title in tqdm(test_title):

    # 단일 모델과 concatenation 모델과 로직은 같음
    acc, prec = title_evaluate_model(query_title, total_df, arr_title, \
                                     title_index, arr_title_vector, \
                                     title_threshold=0.05, weight_check=True)
    scores[num] = acc
    precisions[num] = prec
    num += 1

avg_scores, avg_precision = np.round(scores.mean(),3) * 100, np.round(precisions.mean(),3) * 100

print('Concatenation \n recall : {0} \t precision : {1}'.format(avg_scores, avg_precision))

100%|██████████| 5000/5000 [05:59<00:00, 13.93it/s]

Concatenation 
 recall : 81.6 	 precision : 79.80000000000001





### Title -> Img

In [16]:
scores = np.zeros(len(test_title))
precisions = np.zeros(len(test_title))

num = 0

for query_title in tqdm(test_title):
    
    # model_1 = title -> img
    acc, prec = multi_filter_evaluate_model_1(query_title, total_df, arr_title, \
                                              img_index, arr_img_vector, arr_title_vector, \
                                              img_thrshold=0.4, cos_threshold=0.9999, weight_check=True)
    scores[num] = acc
    precisions[num] = prec
    num += 1

avg_scores, avg_precision = scores.mean(), precisions.mean()

print('Multifilter1 \n recall : {0} \t precision : {1}'.format(avg_scores, avg_precision))

  similarity = np.inner(find_vector, sim_vector) / (np.linalg.norm(find_vector) * np.linalg.norm(sim_vector))
  sim_title_idx = np.where(similarity_array > cos_threshold)[0]
100%|██████████| 5000/5000 [22:29<00:00,  3.71it/s]

Multifilter1 
 recall : 0.7923576843838324 	 precision : 0.8685172861588982





### Img -> Title

In [17]:
scores = np.zeros(len(test_title))
precisions = np.zeros(len(test_title))

num = 0

for query_title in tqdm(test_title):
    
    # model_2 = img -> title
    acc, prec = multi_filter_evaluate_model_2(query_title, total_df, arr_title, \
                                              title_index, arr_img_vector, arr_title_vector, \
                                              title_threshold=0.1, cos_threshold=0.9, weight_check=True)
    scores[num] = acc
    precisions[num] = prec
    num += 1

avg_scores, avg_precision = scores.mean(), precisions.mean()

print('Multifilter2 \n recall : {0} \t precision : {1}'.format(avg_scores, avg_precision))

100%|██████████| 5000/5000 [07:17<00:00, 11.43it/s]

Multifilter2 
 recall : 0.7955635767104599 	 precision : 0.8455598132853204



