In [23]:
# -*- coding: utf-8 -*-
import fitz  # PyMUPDF
import os
import unidecode
import re
import voila
import ipywidgets
from pylatexenc.latexwalker import LatexWalker, LatexEnvironmentNode
from pylatexenc.latex2text import LatexNodes2Text

"""
    Class Local Search representa um motor de pesquisa capaz de pesquisar por exercicios de
    python em .pdf .tex e .py numa diretoria local.

    Os ficheiros devem estar dentro de uma diretoria com a seguinte estrutura:
    question_x\version_x\true_or_false_question.pdf sendo x o numero da questão

    O nome dos ficheiros deve ser o seguinte:
    Pdf -> true_or_false_question.pdf
    Latex -> true_or_false_question.tex
    Python -> program.py
    -----------------------------------------------------------------------------------------
    Parametros:
        path : str
            string com o caminho para a diretoria que contem as várias questões

    ------------------------------------------------------------------------------------------
    Metodos Publicos:
        searchKeyword(self, keyword, docType='pdf')

    Metodos Privados:
        decodePython(self, path)
        decodeTex(self, path)
        decodePdf(self, path)
        removeSpecialChar(self,text)

"""
class LocalSearch:
    directory = None
    specialCharacters = ['´', '¸', '˜', '`', '^']
    questionFolders = None
    def __init__(self, path):
        assert path != None, 'None path'
        assert isinstance(path,str), 'Path must be a string'
        assert path , 'Path must be not empty'
        self.directory = path
        self.questionFolders = [x for x in os.listdir(self.directory)]
    
    def __decodePdf(self,path):
        doc = fitz.open(path)
        for page in doc:
            text = page.getText()
        return text


    def __decodeTex(self,path):
        doc = " ".join(open(path, "r").readlines()).encode("ISO-8859-1").decode("utf-8")
        a = LatexNodes2Text().latex_to_text(doc)
        return a

    def __decodePy(self,path):
        doc = open(path,'r')
        return doc.read()

    def __removeSpecialChar(self,text):
        for sChar in self.specialCharacters:
            text = text.replace(sChar, '')

        return text


    def searchKeyword(self,keyword,docType='all'):
        pdfResult = []
        resultadoFinal = {}
        keyword = unidecode.unidecode(keyword)

        keyword = keyword.split()   
        for i in keyword:
            if(len(i)<3):
                keyword.remove(i)
        
        # Percorre todas as perguntas

        for i in keyword:    
            for question in self.questionFolders:
                text = ""
                path = self.directory + "\\" + question + "\\version_1"
                # Para cada pergunta vai ao pdf, ao tex e ao py para ver se 
                if docType == 'pdf' or docType =='all':
                    pathPdf = path +"\\true_or_false_question.pdf"
                    text = self.__decodePdf(pathPdf)

                    if re.search(i, text, flags=re.IGNORECASE) is not None:
                        pdfResult.append(pathPdf)


                if docType == 'latex'or docType =='all':
                    pathTex = path +"\\true_or_false_question.tex"
                    text = self.__decodeTex(pathTex)
                    pathPy = path +"\\program.py"
                    text += self.__decodePy(pathPy)

                    if re.search(i, text, flags=re.IGNORECASE) is not None:
                        pdfResult.append(pathTex)

                if docType == 'python'or docType =='all':
                    pathPy = path +"\\program.py"
                    text = self.__decodePy(pathPy)

                    if re.search(i, text, flags=re.IGNORECASE) is not None:
                        pdfResult.append(pathPy)


                if docType !='python' and docType !='latex' and docType !='pdf' and docType !='all':
                    return ["Error - wrong Document Type"]

        for i in pdfResult:
            if i in resultadoFinal.keys():
                resultadoFinal[i]+=1
            else:
                resultadoFinal[i]=1
        {k: v for k, v in sorted(resultadoFinal.items(), key=lambda item: item[1])}
        return list(resultadoFinal.keys())

   
        
# ls = LocalSearch("C:\\Users\\ASUS\\Desktop\\Semestre6\\Projeto\\perguntas")
# print(ls.searchKeyword("pseudo_random_integer"))

############################ TESTE INTERFACE ############################
def SearchWidget(event):
    out.clear_output()
    with out:
        result = "\n".join(LS.searchKeyword(text.value, dropDown.value))
        print(result)
            
    return

out = ipywidgets.Output()

LS = LocalSearch("C:\\Users\\ASUS\\Desktop\\Semestre6\\Projeto\\perguntas")


text = ipywidgets.Text(placeholder = 'keyword')
button = ipywidgets.Button(description='Search', icon='search')
button.on_click(SearchWidget)

dropDown = ipywidgets.Dropdown(
    options=['all','pdf', 'python', 'latex'],
    value='all',
    description='Type:',
    disabled=False,
)

display(text,dropDown,button)
display(out)

Text(value='', placeholder='keyword')

Dropdown(description='Type:', options=('all', 'pdf', 'python', 'latex'), value='all')

Button(description='Search', icon='search', style=ButtonStyle())

Output()