<a href="https://colab.research.google.com/github/dragonsan17/faq_retrieval_deep_learning/blob/main/flair_and_inltk_libraries_workflow.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Imports and Repo Downloading

In [None]:
!pip install torch==1.3.1+cpu -f https://download.pytorch.org/whl/torch_stable.html
!pip install inltk
!pip install flair

import os
from getpass import getpass
import urllib
import pandas as pd
import numpy as np
pd.set_option('max_colwidth', 1000)
from IPython.display import display
from tqdm.notebook import tqdm
import flair
import warnings
from flair.embeddings import FlairEmbeddings, DocumentRNNEmbeddings
from flair.data import Sentence
from sklearn.metrics.pairwise import cosine_similarity as cs
from tqdm.notebook import tqdm
from inltk.inltk import get_sentence_similarity
warnings.filterwarnings('ignore') 

In [None]:
# Enter Username, Password and Repo name which will then download all the repo contents here in colab.
# You can then access and run all files of the same. This is to get an idea of how the code works in a cloud environment
# source : https://stackoverflow.com/questions/48350226/methods-for-using-git-with-google-colab
user = input('User name: ')
password = getpass('Password: ')
password = urllib.parse.quote(password)

# repo_name = input('Repo name: ')
repo_name = 'faq_retrieval_deep_learning'
cmd_string = 'git clone https://{0}:{1}@github.com/{0}/{2}.git'.format(user, password, repo_name)
os.system(cmd_string)
cmd_string, password = "", "" 

%cd faq_retrieval_deep_learning

# Data Loading

In [None]:
df_all_data = pd.read_csv('data/all_data.csv', encoding = 'utf-8')
df_test = pd.read_csv('data/test.csv', encoding = 'utf-8')
df_train = pd.read_csv('data/train.csv', encoding = 'utf-8')

df_test['t1'] = [(list(df_all_data[df_all_data['STT Transcript'] == q]['Broad theme']) + list(df_all_data[df_all_data['Caller query transcription'] == q]['Broad theme']))[0] for q in list(df_test['q1']) ]
df_test['t2'] = [(list(df_all_data[df_all_data['STT Transcript'] == q]['Broad theme']) + list(df_all_data[df_all_data['Caller query transcription'] == q]['Broad theme']))[0] for q in list(df_test['q2']) ]

# Flair and iNLTK Models

Take very long to predict on whole test dataset. Hence theme information is taken here itself and a score of 0 is given when theme is not same. Takes about an hour after this, for each of these models

In [None]:
"""
  Flair
"""
f_embedding = FlairEmbeddings('hi-forward')
document_embeddings = DocumentRNNEmbeddings([f_embedding])

def flair_docrnn(df_test):
  test_q1 = df_test['q1']
  test_q2 = df_test['q2']
  predictions = []
  dummy = np.arange(len(test_q1))
  bar = tqdm(dummy)
  for i in bar:
    if df_test['t1'][i] != df_test['t2'][i]:
      predictions.append(0)
      continue
    q1 = Sentence(test_q1[i])
    q2 = Sentence(test_q2[i])
    document_embeddings.embed(q1)
    document_embeddings.embed(q2)
    e1 = q1.embedding.cpu().detach().numpy().reshape(1, -1)
    e2 = q2.embedding.cpu().detach().numpy().reshape(1, -1) 
    score = cs(e1,e2)[0][0]
    predictions.append(score)

  return predictions

df_test['positive_score'] = flair_docrnn(df_test)

In [None]:
# # Run only once, to setup functions for Hindi in iNLTK
# from inltk.inltk import setup
# setup('hi')

In [None]:
# """
#   iNLTK
# """

# def inltk_sentence_similarity(df_test):
#   test_q1 = df_test['q1']
#   test_q2 = df_test['q2']
#   predictions = []
#   dummy = np.arange(len(test_q1))
#   bar = tqdm(dummy)
#   for i in bar:
#     if df_test['t1'][i] != df_test['t2'][i]:
#       predictions.append(0)
#       continue
#     q1 = test_q1[i]
#     q2 = test_q2[i]
#     score = get_sentence_similarity(q1,q2, 'hi')
#     predictions.append(score)

#   return predictions

# df_test['positive_score'] = inltk_sentence_similarity(df_test)

# Evaluation

In [None]:
def performance_metric(df):

  average_precision = 0
  correct_answers = 0
  success_rate = [0,0,0,0,0]
  precision = [0,0,0,0,0]
  reciprocal_rank = 0
  
  for index,row in df.iterrows():
    query_question = row['q1']
    predicted_question = row['q2']
    
    query_question_answer_index = list(df_all_data[df_all_data[TEST_COLUMN] == query_question]['Answer Index'])[0]
    predicted_question_answer_index = list(df_all_data[df_all_data[TRAIN_COLUMN] == predicted_question]['Answer Index'])[0]

    if query_question_answer_index == predicted_question_answer_index:
      
      correct_answers += 1
      average_precision += correct_answers/(index + 1)
      for i in range(index,5):
        success_rate[i] = 1
        precision[i] += 1/(i + 1)

      if reciprocal_rank == 0:
        reciprocal_rank = 1/(index + 1)

  average_precision /= len(df)

  calculated_metric = {'SR@1' : success_rate[0], 'SR@3' : success_rate[2], 'SR@5' : success_rate[4], 
                       'P@1' : precision[0], 'P@3' : precision[2], 'P@5' : precision[4],
                       'MRR' : reciprocal_rank, 'MAP' : average_precision}
  return calculated_metric
  
calculated_metric = {'SR@1' : 0, 'SR@3' : 0, 'SR@5' : 0, 
                      'P@1' : 0, 'P@3' : 0, 'P@5' : 0,
                      'MRR' : 0, 'MAP' : 0}

calculated_metric_with_themes = {'SR@1' : 0, 'SR@3' : 0, 'SR@5' : 0, 
                      'P@1' : 0, 'P@3' : 0, 'P@5' : 0,
                      'MRR' : 0, 'MAP' : 0}

query_question_groups = df_test.groupby(['q1'])

for query_question in df_test['q1'].unique():
    group = query_question_groups.get_group(query_question)
    group['ai'] = [list(df_all_data[df_all_data[TRAIN_COLUMN] == ri]['Answer Index'])[0] for ri in list(group['q2'])]
    ai_groups = group.groupby(['ai'])

    for ans_i in group['ai'].unique():
      group_ai = ai_groups.get_group(ans_i)
      avg_score = group_ai['positive_score'].max() 
      group['positive_score'] = group.apply(lambda x: avg_score if x['ai'] == ans_i else x['positive_score'],  axis=1)
    group = group.drop_duplicates(subset=['ai'])

    
    query_question_theme = list(df_all_data[df_all_data[TEST_COLUMN] == query_question][BROAD_THEME])[0]
    group_with_themes = group.copy()
    
    for index, row in group_with_themes.iterrows():
        if query_question_theme != list(df_all_data[df_all_data[TRAIN_COLUMN] == row['q2']][BROAD_THEME])[0]:
            group_with_themes.loc[index, 'positive_score'] = 0
    
    group = group.sort_values(by=['positive_score'], ascending = False).reset_index(drop = True)
    group_with_themes = group_with_themes.sort_values(by=['positive_score'], ascending = False).reset_index(drop = True)
    group = group[group.index < 10]
    group_with_themes = group_with_themes[group_with_themes.index < 10]
    calculated_metric_for_group = performance_metric(group)
    calculated_metric_for_group_with_themes = performance_metric(group_with_themes)

    for key in calculated_metric_for_group:
      calculated_metric[key] += calculated_metric_for_group[key]
      calculated_metric_with_themes[key] += calculated_metric_for_group_with_themes[key]

calculated_metric['Hit@1'] = calculated_metric['SR@1'] 
calculated_metric['Hit@3'] = calculated_metric['SR@3']
calculated_metric['Hit@5'] = calculated_metric['SR@5']

calculated_metric_with_themes['Hit@1'] = calculated_metric_with_themes['SR@1']
calculated_metric_with_themes['Hit@3'] = calculated_metric_with_themes['SR@3']
calculated_metric_with_themes['Hit@5'] = calculated_metric_with_themes['SR@5']

for key in calculated_metric:
  if 'Hit' not in key: 
    calculated_metric[key] /= len(query_question_groups)
    calculated_metric_with_themes[key] /= len(query_question_groups)

print("Results without theme information : ")
print("Hit@1 : {}, 3: {}, 5 : {}, all : {}".format(calculated_metric['Hit@1'], calculated_metric['Hit@3'], calculated_metric['Hit@5'], len(df_test['q1'].unique())))
print("SR@1 : {:.3f}, 3: {:.3f}, 5 : {:.3f}".format(calculated_metric['SR@1'], calculated_metric['SR@3'], calculated_metric['SR@5']))
print("P@1 : {:.3f}, 3: {:.3f}, 5 : {:.3f}".format(calculated_metric['P@1'], calculated_metric['P@3'], calculated_metric['P@5']))

print("MAP : {:.3f}".format(calculated_metric['MAP']), end=", ")
print("MRR : {:.3f}".format(calculated_metric['MRR']))
# print("NDCG : {:.3f}".format(MDCG/deno_dd["Exist"]))

print("Results with theme information : ")
print("Hit@1 : {}, 3: {}, 5 : {}, all : {}".format(calculated_metric_with_themes['Hit@1'], calculated_metric_with_themes['Hit@3'], calculated_metric_with_themes['Hit@5'], len(df_test['q1'].unique())))
print("SR@1 : {:.3f}, 3: {:.3f}, 5 : {:.3f}".format(calculated_metric_with_themes['SR@1'], calculated_metric_with_themes['SR@3'], calculated_metric_with_themes['SR@5']))
print("P@1 : {:.3f}, 3: {:.3f}, 5 : {:.3f}".format(calculated_metric_with_themes['P@1'], calculated_metric_with_themes['P@3'], calculated_metric_with_themes['P@5']))

print("MAP : {:.3f}".format(calculated_metric_with_themes['MAP']), end=", ")
print("MRR : {:.3f}".format(calculated_metric_with_themes['MRR']), end=", ")