# Base example

In [8]:
from typing import List, Union, Tuple

from arqmath_code.Entities.Post import Question, Answer
from arqmath_code.topic_file_reader import Topic
from src.base.model import Model
from src.base.pipeline import Pipeline
from src.base.post_processor import PostProcessor
from src.base.pre_processing import PreProcessor
from src.runner import Runner

## Create a model

In [7]:
from sklearn.metrics.pairwise import cosine_similarity
import numpy as np
import scipy
from sklearn.feature_extraction.text import TfidfVectorizer
from typing import List, Union, Tuple

class TfIdfModel(Model):
    def forward(self, queries: List[Topic], documents: List[Union[Question, Answer]]) -> List[
        Tuple[Topic, Union[Question, Answer], float]]:
        answer_bodys: List[str] = [answer.body for answer in documents]
        query_bodys: List[str] = [query.body for query in queries]

        training_set: List[str] = answer_bodys.copy()
        training_set = training_set + query_bodys

        vectorizer: TfidfVectorizer = TfidfVectorizer()
        vectorizer.fit(training_set)
        query_vector: scipy.sparse_csr.csr_matrix = vectorizer.transform(query_bodys)
        word_term_matrix: scipy.sparse_csr.csr_matrix = vectorizer.transform(answer_bodys)
        cos_sims: np.ndarray = cosine_similarity(query_vector, word_term_matrix)
        res = []
        for i in range(len(queries)):
           per_query = list(zip([query.topic_id for query in queries] ,[answer.post_id for answer in documents], cos_sims[i,]))
           res += per_query
        return res

In [10]:
class BinaryTagRetrieval(PreProcessor):
    def forward(self, queries: List[Topic], documents: List[Union[Question, Answer]]) -> List[Union[Question, Answer]]:
        questions = list(set([question for tag in query.lst_tags for question in data_reader.get_question_of_tag(tag=tag)]))
        questions = filter(lambda question: question.answers is not None, questions)
        return [answer for single_question in questions for answer in single_question.answers]