# 面向对象

## 对象的进阶应用

In [22]:
class Document():
    WELCOME_STR = 'Welcome! The context for this book is {}.'
    def __init__(self, title, author, context):
        print('init function called')
        self.title = title
        self.author = author
        self.__context = context
    # 类函数
    @classmethod
    def create_empty_book(cls, title, author):
        return cls(title=title, author=author, context='nothing')
    
    # 成员函数
    def get_context_length(self):
        return len(self.__context)
    
    # 静态函数
    @staticmethod
    def get_welcome(context):
        return Document.WELCOME_STR.format(context)

empty_book = Document.create_empty_book('What Every Man Thinks About Apart from Sex', 'Potter')
# init function called
print(empty_book.get_context_length())		# 7
print(empty_book.get_welcome('indeed nothing'))		
# Welcome! The context for this book is indeed nothing.

init function called
7
Welcome! The context for this book is indeed nothing.


In [None]:
class ObjectTest():
    CONST_STR = "这是一个类的常量"
    
    @staticmethod
    def print_text():
        print("hello world")
    
    # 成员函数可以正常用 self.function() 调用 静态函数 和 常量
    def test1(self):
        self.print_text()
        print(self.CONST_STR)
    
    # 静态函数没有 self 函数，只能靠 类对象 来调用 类的 静态函数 和 常量
    @staticmethod
    def test2():
        ObjectTest.print_text()
        print(ObjectTest.CONST_STR)

## 面向对象案例

### 如何实现一个搜索引擎？

#### 先定义一个基类

In [17]:
class SearchEngineBase(object):
    def __init__(self):
        pass
    
    def add_corpus(self, file_path):
        with open(file_path, 'r') as fin:
            text = fin.read()
        self.process_corpus(file_path, text)
    
    def process_corpus(self, id, text):
        raise Exception('process_corpus not implement.')
        
    def search(self, query):
        raise Exception('search not implemented.')
    
def main(search_engine):
    for file_path in ['1.txt', '2.txt', '3.txt', '4.txt', '5.txt']:
        search_engine.add_corpus("tmp\\search_txt\\" + file_path)
    while True:
        query = input()
        results = search_engine.search(query)
        print('found {} result(s):'.format(len(results)))
        for result in results:
            print(result)

#### 最基本的搜索引擎

In [None]:
class SimpleEngine(SearchEngineBase):
    def __init__(self):
        super(SimpleEngine, self).__init__()
        self.__id_to_texts = {}
    
    def process_corpus(self, id, text):
        self.__id_to_texts[id] = text
    
    def search(self, query):
        results = []
        for id, text in self.__id_to_texts.items():
            if query in text:
                results.append(id)
        return results

search_engine = SimpleEngine()
main(search_engine)

simple
found 0 result(s):
dream
found 3 result(s):
tmp\search_txt\1.txt
tmp\search_txt\2.txt
tmp\search_txt\3.txt


#### Bag of Words 和 Inverted Index

In [19]:
import re


class BOWEngine(SearchEngineBase):
    def __init__(self):
        super(BOWEngine, self).__init__()
        self.__id_to_word = {}
        
    def process_corpus(self, id, text):
        self.__id_to_word[id] = self.parse_text_to_words(text)
    
    def search(self, query):
        query_words = self.parse_text_to_words(query)
        results = []
        for id, words in self.__id_to_word.items():
            if self.query_match(query_words, words):
                results.append(id)
        return results
    
    @staticmethod
    def query_match(query_words, words):
        for query_word in query_words:
            if query_word not in words:
                return False
        return True
    
    @staticmethod
    def parse_text_to_words(text):
        # 使用正则表达式去除标点符号和换行符
        text = re.sub(r'[^\w]', ' ', text)
        # 转为小写
        text = text.lower()
        # 生成所有单词的列表
        word_list = text.split(' ')
        # 去除空白单词
        word_list = filter(None, word_list)
        # 返回单词的 set
        return set(word_list)

    
search_engine = BOWEngine()
main(search_engine)

"""
simple
found 0 result(s):
dream
found 3 result(s):
tmp\search_txt\1.txt
tmp\search_txt\2.txt
tmp\search_txt\3.txt
"""

'\nsimple\nfound 0 result(s):\ndream\nfound 3 result(s):\ntmp\\search_txt\x01.txt\ntmp\\search_txt\x02.txt\ntmp\\search_txt\x03.txt\n'