From 48e5d5842766f0d3efc626d22383dcbf2c40465b Mon Sep 17 00:00:00 2001 From: peterzhang2029 Date: Thu, 12 Oct 2017 10:19:00 +0800 Subject: [PATCH 1/8] add nest text classification --- nest_text_classification/README.md | 173 ++++++++++++ nest_text_classification/data/infer.txt | 4 + .../data/test_data/test.txt | 4 + .../data/train_data/train.txt | 4 + nest_text_classification/images/model.jpg | Bin 0 -> 56078 bytes nest_text_classification/index.html | 237 ++++++++++++++++ nest_text_classification/infer.py | 82 ++++++ nest_text_classification/network_conf.py | 45 ++++ nest_text_classification/reader.py | 252 ++++++++++++++++++ nest_text_classification/train.py | 163 +++++++++++ nest_text_classification/utils.py | 61 +++++ 11 files changed, 1025 insertions(+) create mode 100644 nest_text_classification/README.md create mode 100644 nest_text_classification/data/infer.txt create mode 100644 nest_text_classification/data/test_data/test.txt create mode 100644 nest_text_classification/data/train_data/train.txt create mode 100644 nest_text_classification/images/model.jpg create mode 100644 nest_text_classification/index.html create mode 100644 nest_text_classification/infer.py create mode 100644 nest_text_classification/network_conf.py create mode 100644 nest_text_classification/reader.py create mode 100644 nest_text_classification/train.py create mode 100644 nest_text_classification/utils.py diff --git a/nest_text_classification/README.md b/nest_text_classification/README.md new file mode 100644 index 0000000000..0950564b57 --- /dev/null +++ b/nest_text_classification/README.md @@ -0,0 +1,173 @@ +# 双层序列文本分类 +## 简介 +序列数据是自然语言处理任务面对的一种主要输入数据类型:一句话是由词语构成的序列,多句话进一步构成了段落。因此,段落可以看作是一个嵌套的双层的序列,这个序列的每个元素又是一个序列。 + +双层序列是`PaddlePaddle`支持的一种非常灵活的数据组织方式,帮助我们更好地描述段落、多轮对话等更为复杂的语言数据。基于双层序列输入,我们可以设计一个层次化的网络,分别从词语和句子级别编码输入数据,更好地完成一些复杂的语言理解任务。 + +本示例将演示如何使用`PaddlePaddle`来组织双层序列文本数据,完成文本分类任务。 + +## 模型介绍 +对于文本分类,我们将一段文本看成句子的数组,每个句子又是单词的数组,这便是一种双层序列的输入数据。而将这个段落的每一句话用卷积神经网络编码为一个向量,再将每句话的表示向量经过池化层编码成一个段落的向量, 即可得到段落的表示向量。对于分类任务,将段落表示向量作为分类器的输入可以得到分类结果。 + +**模型结构如下图所示** +

+
+图1. 本例中的文本分类模型 +

+ +PaddlePaddle 实现该网络结构的代码见 `network_conf.py`。 + +对于双层序列的处理,需要先将双层时间序列数据先变换成单层时间序列数据,再对每一个单层时间序列进行处理。 PaddlePaddle提供了 `recurrent_group` 接口进行转换,在本例中,我们将文本数据的每一段,通过 recurrent_group 进行拆解,拆解成的每一句话再通过一个 CNN网络学习对应的向量表示。 +``` python +nest_group = paddle.layer.recurrent_group(input=[paddle.layer.SubsequenceInput(emb), + hidden_size], + step=cnn_cov_group) +``` +使用`recurrent_group`接口进行变换时,需要将输入序列传入 `input` 属性。 由于本例要实现的变换是`双层时间序列 => 单层时间序列`,所以我们需要将输入数据标记成 `SubsequenceInput`。 + +拆解后的单层序列数据经过一个CNN网络学习对应的向量表示,CNN的网络结构包含以下部分: + +- **卷积层**: 文本分类中的卷积在时间序列上进行,卷积核的宽度和词向量层产出的矩阵一致,卷积后得到的结果为“特征图”, 使用多个不同高度的卷积核,可以得到多个特征图。本例代码默认使用了大小为 3(图1红色框)和 4(图1蓝色框)的卷积核。 +- **最大池化层**: 对卷积得到的各个特征图分别进行最大池化操作。由于特征图本身已经是向量,因此最大池化实际上就是选出各个向量中的最大元素。将所有最大元素又被拼接在一起,组成新的向量。 +- **线性投影层**: 将不同卷积得到的结果经过最大池化层之后拼接为一个长向量, 然后经过一个线性投影得到对应单层序列的表示向量。 + +CNN网络具体代码实现如下: +```python +def cnn_cov_group(group_input, hidden_size): + conv3 = paddle.networks.sequence_conv_pool( + input=group_input, context_len=3, hidden_size=hidden_size) + conv4 = paddle.networks.sequence_conv_pool( + input=group_input, context_len=4, hidden_size=hidden_size) + output_group = paddle.layer.fc(input=[conv3, conv4], + size=hidden_size, + param_attr=paddle.attr.ParamAttr(name='_cov_value_weight'), + bias_attr=paddle.attr.ParamAttr(name='_cov_value_bias'), + act=paddle.activation.Linear()) + return output_group +``` +PaddlePaddle 中已经封装好的带有池化的文本序列卷积模块:`paddle.networks.sequence_conv_pool`,可直接调用。 + +在得到每个句子的表示向量之后, 将所有句子表示向量经过一个平均池化层, 得到一个样本的向量表示, 向量经过一个全连接层输出最终的预测结果。 代码如下: +```python +avg_pool = paddle.layer.pooling(input=nest_group, pooling_type=paddle.pooling.Avg(), + agg_level=paddle.layer.AggregateLevel.TO_NO_SEQUENCE) +prob = paddle.layer.mixed(size=class_num, + input=[paddle.layer.full_matrix_projection(input=avg_pool)], + act=paddle.activation.Softmax()) +``` + + +## 使用 PaddlePaddle 内置数据运行 + +### 训练 +在终端执行: +```bash +python train.py +``` +将以 PaddlePaddle 内置的情感分类数据集: `imdb` 运行本例。 +### 预测 +训练结束后模型将存储在指定目录当中(默认models目录),在终端执行: +```bash +python infer.py +``` +默认情况下,预测脚本将加载训练一个pass的模型对 `imdb的测试集` 进行测试。 + +## 使用自定义数据训练和预测 + +### 训练 +1.数据组织 + +假设有如下格式的训练数据:每一行为一条样本,以 `\t` 分隔,第一列是类别标签,第二列是输入文本的内容。以下是两条示例数据: + + ``` + 1 This movie is very good. The actor is so handsome. + 0 What a terrible movie. I waste so much time. + ``` + +2.编写数据读取接口 + +自定义数据读取接口只需编写一个 Python 生成器实现**从原始输入文本中解析一条训练样本**的逻辑。以下代码片段实现了读取原始数据返回类型为: `paddle.data_type.integer_value_sub_sequence` 和 `paddle.data_type.integer_value` +```python +def train_reader(data_dir, word_dict): + """ + Reader interface for training data + + :param data_dir: data directory + :type data_dir: str + :param word_dict: path of word dictionary, + the dictionary must has a "UNK" in it. + :type word_dict: Python dict + """ + + def reader(): + UNK_ID = word_dict[''] + word_col = 1 + lbl_col = 0 + + for file_name in os.listdir(data_dir): + file_path = os.path.join(data_dir, file_name) + if not os.path.isfile(file_path): + continue + with open(file_path, "r") as f: + for line in f: + line_split = line.strip().split("\t") + doc = line_split[word_col] + doc_ids = [] + for sent in doc.strip().split("."): + sent_ids = [ + word_dict.get(w, UNK_ID) + for w in sent.split()] + if sent_ids: + doc_ids.append(sent_ids) + + yield doc_ids, int(line_split[lbl_col]) + + return reader +``` +需要注意的是, 本例中以英文句号`'.'`作为分隔符, 将一段文本分隔为一定数量的句子, 且每个句子表示为对应词表的索引数组(`sent_ids`)。 由于当前样本的表示(`doc_ids`)中包含了该段文本的所有句子, 因此,它的类型为:`paddle.data_type.integer_value_sub_sequence`。 + +3.指定命令行参数进行训练 + +`train.py`训练脚本中包含以下参数: +``` +--train_data_dir TRAIN_DATA_DIR + path of training dataset (default: None). if this + parameter is not set, imdb dataset will be used. +--test_data_dir TEST_DATA_DIR + path of testing dataset (default: None). if this + parameter is not set, imdb dataset will be used. +--word_dict WORD_DICT + path of word dictionary (default: None).if this + parameter is not set, imdb dataset will be used.if + this parameter is set, but the file does not exist, + word dictionay will be built from the training data + automatically. +--class_num CLASS_NUM + class number. +--batch_size BATCH_SIZE + the number of training examples in one + forward/backward pass +--num_passes NUM_PASSES + number of passes to train +--model_save_dir MODEL_SAVE_DIR + path to save the trained models. +``` + +修改`train.py`脚本中的启动参数,可以直接运行本例。 以`data`目录下的示例数据为例,在终端执行: +```bash +python train.py --train_data_dir 'data/train_data' --test_data_dir 'data/test_data' --word_dict 'dict.txt' +``` +即可对样例数据进行训练。 + +### 预测 + +1.修改 `infer.py` 中以下变量,指定使用的模型、指定测试数据。 + +```python +model_path = "models/params_pass_00000.tar.gz" # 指定模型所在的路径 +assert os.path.exists(model_path), "the trained model does not exist." +infer_path = 'data/infer.txt' # 指定测试文件所在的目录 +word_dict = 'dict.txt' # 指定字典所在的路径 +``` + +2.在终端中执行 `python infer.py`。 diff --git a/nest_text_classification/data/infer.txt b/nest_text_classification/data/infer.txt new file mode 100644 index 0000000000..3d9681b21d --- /dev/null +++ b/nest_text_classification/data/infer.txt @@ -0,0 +1,4 @@ +At this point it seems almost unnecessary to state that Jon Bon Jovi delivers a firm, strong, seamless performance as Derek Bliss. His capability as an actor has been previously established by his critical acclaim garnered in other films (The Leading Man, No Looking Back). But, in case anyone is still wondering, yes, Jon Bon Jovi can act. He can act well and that's come to be expected of him. It's easy to separate Derek from the guy who belts out hits on VH-1.

I generally would not watch a horror movie. I've come to expect them to focus on sensationalistic gore rather than dialogue and plot. What pleased me most about this film was that there really was a viable plot being moved along. The gore is not so much as to become the focus of the film and does not have a disturbingly realistic quality of films with higher technical effects budgets. So, gore fans might be disappointed, but story fans will not.

Unlike an action film like U-571 where the dialogue takes a back seat to the bombast, we get a chance to know "the good guys" and actually care what happens to them. A few scenes are left unexplained (like Derek's hallucinations) but you get the feeling certain aspects were as they were to lay the foundation for a sequel. Unfortunately, with the lack of interest shown by Hollywood in this film, that sequel will never happen. These few instances are forgiveable knowing that Vampires could have been a continuing series.

Is this the best film I've ever seen in my life? No. Is it a good way to spend about two hours being entertained? Yes. It won't leave the person who fears horror movies with insomnia and it won't leave the horror movie lover completely disappointed either. If you're somewhere in between the horror genre loather and the horror genre lover, this film is for you. It reaches a happy medium with the effects and story balancing each other.

+The original Vampires (1998) is one of my favorites. I was curious to see how a sequel would work considering they used none of the original characters. I was quite surprised at how this played out. As a rule, sequels are never as good as the original, with a few exceptions. Though this one was not a great movie, the writer did well in keeping the main themes & vampire lore from the first one in tact. Jon Bon Jovi was a drawback initially, but he proved to be a half-way decent Slayer. I doubt anyone could top James Wood's performance in the first one, though. unless you bring in Buffy!

All in all, this was a decent watch & I would watch it again.

I was left with two questions, though... what happened to Jack Crow & how did Derek Bliss come to be a slayer? Guess we'll just have to leave that to imagination. +The movie opens with a flashback to Doddsville County High School on April Fool's Day. A group of students play a prank on class nerd Marty. When they are punished for playing said prank, they follow up with a bigger prank which (par for the course in slasher films involving pranks on class nerds) goes ridiculously awry leaving Marty simultaneously burned by fire and disfigured by acid for the sake of being thorough. Fast forward five years, where we find members of the student body gathering at the now abandoned high school for their five year class reunion. We find out that it is no coincidence that everyone at the reunion belonged to the clique of pranksters from the flashback scene, as all of the attendees are being stalked and killed by a mysterious, jester mask-clad murderer in increasingly complicated and mind-numbingly ludicrous fashions. It doesn't take Sherlock Holmes to solve the mystery of the killer's identity, as it is revealed to be none other than a scarred Marty who has seemingly been using his nerd rage and high intellect to bend the laws of physics and engineering in order to rig the school for his revenge scenario. The film takes a turn for the bizarre as Marty finishes exacting his revenge on his former tormentors, only to be haunted by their ghosts. Marty is finally pushed fully over the edge and takes his own life. Finally, the film explodes in a crescendo of disjointed weirdness as the whole revenge scenario is revealed to be a dream in the first place as Marty wakes up in a hospital bed, breaks free of his restraints, stabs a nurse, and finally disfigures his own face.

The script is tired and suffers from a terminal case of horror movie logic. The only originality comes from the mind-numbingly convoluted ways that the victims are dispatched. The absurd it-was-all-a-dream ending feels tacked on. It's almost as if someone pointed out the disjointed nature of the film and the writer decided then and there that it was a dream.

Technically speaking, the film is atrocious. Some scenes were filmed so dark that I had to pause the film and play with the color on my television. The acting is sub-par, even for slasher films. I can't help but think that casting was a part of the problem as all of the actors look at least five years older than the characters they portray, which makes the flashback scene even more unintentionally laughable. Their lack of commitment to the movie is made obvious as half of them can't bother to keep their accents straight through the movie.

All of this being said, if you like bad horror movies, you might like this one, too. It isn't the worst film of the genre, but it's far from the best. +Robert Taylor definitely showed himself to be a fine dramatic actor in his role as a gun-slinging buffalo hunter in this 1956 western. It was one of the few times that Taylor would play a heavy in a film. Nonetheless, this picture was far from great as shortly after this, Taylor fled to television with the successful series The Detectives.

Stuart Granger hid his British accent and turned in a formidable performance as Taylor's partner.

Taylor is a bigot here and his hatred for the Indians really shows.

Another very good performance here was by veteran actor Lloyd Nolan as an aged, drinking old-timer who joined in the hunt for buffalo as well. In his early scenes, Nolan was really doing an excellent take-off of Walter Huston in his Oscar-winning role in The Treasure of the Sierre Madre in 1948. Note the appearance of Russ Tamblyn in the film. The following year Tamblyn and Nolan would join in the phenomenal Peyton Place.

The writing in the film is stiff at best. By the film's end, it's the elements of nature that did Taylor in. How about the elements of the writing here? \ No newline at end of file diff --git a/nest_text_classification/data/test_data/test.txt b/nest_text_classification/data/test_data/test.txt new file mode 100644 index 0000000000..5660ee2f1a --- /dev/null +++ b/nest_text_classification/data/test_data/test.txt @@ -0,0 +1,4 @@ +1 I liked the film. Some of the action scenes were very interesting, tense and well done. I especially liked the opening scene which had a semi truck in it. A very tense action scene that seemed well done.

Some of the transitional scenes were filmed in interesting ways such as time lapse photography, unusual colors, or interesting angles. Also the film is funny is several parts. I also liked how the evil guy was portrayed too. I'd give the film an 8 out of 10. +0 The plot for Descent, if it actually can be called a plot, has two noteworthy events. One near the beginning - one at the end. Together these events make up maybe 5% of the total movie time. Everything (and I mean _everything_) in between is basically the director's desperate effort to fill in the minutes. I like disturbing movies, I like dark movies and I don't get troubled by gritty scenes - but if you expect me to sit through 60 minutes of hazy/dark (literally) scenes with NO storyline you have another thing coming. Rosario Dawson, one of my favorite actresses is completely wasted here. And no, she doesn't get naked, not even in the NC-17 version, which I saw.

If you have a couple of hours to throw away and want to watch "Descent", take a nap instead - you'll probably have more interesting dreams. +0 This film lacked something I couldn't put my finger on at first: charisma on the part of the leading actress. This inevitably translated to lack of chemistry when she shared the screen with her leading man. Even the romantic scenes came across as being merely the actors at play. It could very well have been the director who miscalculated what he needed from the actors. I just don't know.

But could it have been the screenplay? Just exactly who was the chef in love with? He seemed more enamored of his culinary skills and restaurant, and ultimately of himself and his youthful exploits, than of anybody or anything else. He never convinced me he was in love with the princess.

I was disappointed in this movie. But, don't forget it was nominated for an Oscar, so judge for yourself. +0 I read the book a long time back and don't specifically remember the plot but do remember that I enjoyed it. Since I'm home sick on the couch it seemed like a good idea and Hey !! It is a Lifetime movie.

The movie is populated with grade B actors and actresses.

The female cast is right out of Desperate Housewives. I've never seen the show but there are lots of commercials for the show and I get the gist. Is there nothing original anymore? Sure, but not on Lifetime.

The male cast are all fairly effeminate looking and acting but the girls need to have husbands I suppose.

In one scene a female is struggling with a male, for her life, and what does she do??? Kicks him in the testicles. What else? Women love that but let me tell you girls something. It's not as easy as it's always made to look.

It wasn't all bad. I did get the chills a time or two so I have to credit someone with that. diff --git a/nest_text_classification/data/train_data/train.txt b/nest_text_classification/data/train_data/train.txt new file mode 100644 index 0000000000..6a335327d1 --- /dev/null +++ b/nest_text_classification/data/train_data/train.txt @@ -0,0 +1,4 @@ +0 I admit that I am a vampire addict: I have seen so many vampire movies I have lost count and this one is definitely in the top ten. I was very impressed by the original John Carpenter's Vampires and when I descovered there was a sequel I went straight out and bought it. This movie does not obey quite the same rules as the first, and it is not quite so dark, but it is close enough and I felt that it built nicely on the original.

Jon Bon Jovi was very good as Derek Bliss: his performance was likeable and yet hard enough for the viewer to believe that he might actually be able to survive in the world in which he lives. One of my favourite parts was just after he meets Zoey and wanders into the bathroom of the diner to check to see if she is more than she seems. His comments are beautifully irreverant and yet emminently practical which contrast well with the rest of the scene as it unfolds.

The other cast members were also well chosen and they knitted nicely to produce an entertaining and original film. It is not simply a rehash of the first movie and it has grown in a similar way to the way Fright Night II grew out of Fright Night. There are different elements which make it a fresh movie with a similar theme.

If you like vampire movies I would recommend this one. If you prefer your films less bloody then choose something else. +0 Almost too well done... "John Carpenter's Vampires" was entertaining, a solid piece of popcorn-entertainment with a budget small enough not to be overrun by special effects. And obviously aiming on the "From Dusk Till Dawn"-audience. "Vampires: Los Muertos" tries the same starting with a rock-star Jon Bon Jovi playing one of the main characters, but does that almost too well...: I haven't seen Jon Bon Jovi in any other movie, so I am not able to compare his acting in "Vampires: Los Muertos" to his other roles, but I was really suprised of his good performance. After the movie started he convinced me not expecting him to grab any guitar and playing "It' my life" or something, but kill vampires, showing no mercy and doing a job which has to be done. This means a lot, because a part of the audience (also me) was probably thinking: "...just because he's a rockstar...". Of course Bon Jovi is not James Woods but to be honest: It could have been much worse, and in my opinion Bon Jovi did a very good performance. The vampiress played by Arly Jover is not the leather dressed killer-machine of a vampire-leader we met in Part 1 (or in similar way in "Ghosts of Mars"). Jover plays the vampire very seductive and very sexy, moving as lithe as a cat, attacking as fast as a snake and dressed in thin, light almost transparent very erotic cloth. And even the optical effects supporting her kind of movement are very well made. It really takes some beating. But the director is in some parts of the film only just avoiding turning the movie from an action-horrorfilm into a sensitive horrormovie like Murnau's "Nosferatu". You can almost see the director's temptation to create a movie with a VERY personal note and different to the original. This is the real strength of the movie and at the same time its weakest point: The audience celebrating the fun-bloodbath of the first movie is probably expecting a pure fun-bloodbath for the second time and might be a little disappointed. Make no mistake: "Vampires:Los Muertos" IS a fun-bloodbath but it's just not ALL THE TIME this kind of movie. Just think of the massacre in the bar compared to the scene in which the vampiress tries to seduce Zoey in the ruins: the bar-massacre is what you expect from american popcorn-entertainment, the seducing-Zoey-in-the-ruins-scene is ALMOST european-like cinema (the movie is eager to tell us more about the relationship between Zoey and the vampiress, but refuses answers at the same time. Because it would had slow down the action? Showed the audience a vampiress with a human past, a now suffering creature and not only a beast which is just slaughtering anybody). And that's the point to me which decides whether the movie is accepted by the audience of the original movie or not. And also: Is the "From Dusk Till Dawn"-audience really going to like this? I'm not sure about that. Nevertheless Tommy Lee Wallace did really a great job, "Vampires:Los Muertos" is surprisingly good. But I also think to direct a sequel of a popcorn movie Wallace is sometimes almost too creative, too expressive. Like he's keeping himself from developing his talent in order to satisfy the expectations of audience. In my opinion, Wallace' talent fills the movie with life and is maybe sometimes sucking it out at the same time. "Vampires: Los Muertos" is almost too well done. (I give it 7 of 10) +1 We all know that countless duds have graced the 80s slasher genre and often deserve nothing but our deepest disgust. Maybe that's a bit hastey but damn if "Slaughter High" wasn't terribly unoriginal, even for a slasher flick. Pretty much, the plot involves a kid who experienced a Carrie-like shower humiliation in high school and returns to the dilapidated building to seek out revenge on a group of former-bullies who all show up to reminisce. As you'd expect, they are killed off steadily by a masked madman on April 1st by means of electrocution, burning, hanging, and chemically altered beer. I've got a number of problems with the plot details and settings of this movie, but considering the ending, I feel the need to discard my complaints and just say that this is a complete waste of time. Ignore any thought of viewing this movie. +1 What a terrible movie. The acting was bad, the pacing was bad, the cinematography was bad, the directing was bad, the "special" effects were bad. You expect a certain degree of badness in a slasher, but even the killings were bad.

First of all, the past event that set up the motive for the slaughter went on for 15 or 20 minutes. I thought it would never end. They could have removed 80% of it and explained what happened well enough.

Then, the victims were invited to the "reunion" in an abandoned school which still had all the utilities turned on. One of the victims thought this was a little odd, but they dismissed it and decided to break in anyway.

Finally, the killings were so fake as to be virtually unwatchable.

There is no reason to watch this movie, unless you want to see some breasts, and not very good breasts at that. This movie makes Showgirls virtually indistinguishable from Citizen Kane. diff --git a/nest_text_classification/images/model.jpg b/nest_text_classification/images/model.jpg new file mode 100644 index 0000000000000000000000000000000000000000..4f63d8b55380f4210c2acf98251c8a4ce75a7efc GIT binary patch literal 56078 zcmdSB2Urx(vN$?q1q8_w7bOdbB1n`KktC8ukhCHambBy`3#%v~Sx`VwL6V3hktA^? ztB6F&amh(`0bzj+Z}1n-{hxRK=iGb0?|rXvW@@UdtGl{us=KRtB@dIQf#X-TbhQ8q z3INan{{Zp~;Hw!3cLV@^ec&_z07n373Lbz4gg_ENje_@2Sc^g&p!$WU1b|35K>a(7 z0r)&jAh%y-etS~BqWpt`vgZ}mpD^DcDR~`GG;&7xB7B??o`UDkoCOpw>*`Y0HAqc@9FIe0CemiUNOMe^AJ941G_iaKoGul2s`}-gLwe^ z?cd->zi6(TXn-^?K$ywi+3pSq7lE+U?Z4rj{s#Zn29N^eqwV0~;0?DIJnRc2Z-gV< z&EZ$ZZ@2$T_TMP~73l8f2g>{_LBKnskE^L3_!I|ae|X34${~CPgp2I$G)+NR7=%at z;WrMo&Q08k0SeRZ#cFvy2W%^P8INDtOa<#@;8iY5q4 zfpD}dLhrB*AWy0)AAi$B{9${B?>cK7w(}7G)y3|r5eV~uFvh`6`;Z>2i;CdsYxH{? zs0BRS3=ZoB>!QBm;PXpfkPo$huk&>s5D(T%?dI!!{r9rej~(G!y1&!EclOpf#2=O& z^>q8CqhQ-;D82nm4rK@HqB&#dt*H&dAWs@22M@Eu{yc>Jfg2QdfCGR4Zi9Dwzylxx zf`AF24O|AFo`5%);|Rb3HxT0h<~o2F2k`9=8tGrCHsT6;i4AP?{ZIXMGsAlCU$o`Qe|_~r-Voc>?<{MMG`-}xY@xv0-lp9e1$ z>TA>&sTHY}06}Uf^(AUG>MI~tiCX1P-bP@VKO{PA&oWrcZ+yJK8XbPo-2i280%i0C z?7;gBNPz%+{^alvz4DmHPyDZ(FmD*#zB>W?x4wddC0+ldkp71rHT=KzIn5l+63ut; zK0-78Cl5iIF`9AkZv^a{zhpuDmH&G$AC43p98ttS(EZo3_=8Fvl+^84Nq4Xp;GoP7 zf0J4I?>st?^(X$S?K|}c>wd-#q@%}Up1-h%z2FdOuwwv(N&SUAJ9Kva?Dw--XLHZ4 zQaz!nq-vn5p{k|&3J6ktq-vrXUyRt(Hj0`HoCZyTr|e^UH|qz=9gfrs-6pn>oV z@`gJ(`wGgQJ*yz74o-azg1R2|XQTw}+}s3zW%&qtJNP(w`#an@102qqhvfiZ%jDO5 zNWt0t7tU87051LjZC}q{I7>eODDnXSF1f#O;-IbM$OM277wrAK{eR;JPSjlh;Ny5+ z_SZWF<7Lo(O_Ir5pdF)20svwnnS78BS14zwI4%l%A981UHX}sF=9KX*u}|3W`d~8do&6 zv~_f^nqD`%VGfFCf5*Ym$rJK%w%`L5M?H!*!f9dNV_&zu^JTf|inVp+oSX^3O!EJ7B|J>Qd@9iJT zMFCL#CKh=AO|t(Y7YisCB{ek_HQk|H6qEsng0oQ5oIFR%s%}hY=f!qP_Wlv}%ZXWK zO-F^!n_xL^d-u|F3d_xi;0{ImCE33xSlE9{vOfj;ORh0+GNAmGs3<9^XsD>DXlQA` zM0`3>6~-7}u@FXgmxbgWuTu$=pHc zt|0@DH13mu%yI(s6Y)412rtL3k{-MZH6#PAy<~vFX=aE>cZ-lo2BJ#8?Tdrt=y;Ia zh=BSE#T=hTEv}IP=FNYjRwzbh7QHwCg={h7A1Xrj4xlZBgkEO66_g3Rq(V1G1_=5P zY*i>fs>h%CGa10A6E?CF{%mOr=_C^!DYP3)286aX{-K|=3DTL7uxNjSQ`sgdeT&>5gU{isKAQ&TJ{8Ol4@KmOet$9)5+cQJtQlgw*`&#C zMDf3H(UE)qSGA0)yHSD+e0orf8mc1$rwMDNy|8HU%|PaxIs@grpe0KREdACMuXq}D zngD70DsB1f?U@%=yS875h7$ylHq-p@qI}_o`MPTL3ezg+u0Aj1;5Ay9^sN=uT@&(^ z=yagXOa`D_lM%fJ!(?DW&JFb=9a>%n+4=$VVP5Bh5(u!nyqYTZWWcw#j11H||4|fL z;YkLza3M?X?CS}{eytADa?XJY3mH(G-#ts}Z6O2QZM#{2RhqE2rcF3V9dG%g#sqgf z1*iwhWS~?RwJls)<2o~SkL8o9Zy&pgX3+!1S&=6VA#JcewG(|WWZ$&Abf;ToeiIP= z9=oM2u2N$5&RCOUpJfkgtjP}iFI*JlzJGJv8lCWY2bWi&M@u#;QO5~NgS;R2HM@!% zHXfN^S~_EGRK22RD-N_r?}RBN-_RN^I@@Oa^v=it8lFJstPE{iH*LLOuI!7Zvq4w5 zk7p&Va7(lP7<5JQanv%;x1V^hAYmE}?5joZ#k9gIY>OR=P|Xt48|92|^J5rv&Ltmq7o@@nqQd>Y|#=X6YxH=y=8`@bdAX66Z$>!U`l3H;OngdmVf1MN~W>vnFu|d%V z>dj0RW2V^PmYT22N-S<#4|fogIevhOKCzJQ8}FKWPVc>94DAssPsY3kpN}Y3)pIzp znQsX=y=L!*Z&4){qFHV>{<`zZ9-}LUUsk`o>t;OC1q4)>w+YY@E2NM%_S4K9D^O;? z4Q9LyM!qZKhSU*UuKQy5 z{Ln0Tp&89HG5h5YFoR zjqG?8BYa}v=(2xp*Y1vA+soq0yks2eJtAY%Ir;SmmO4$=xn!HKW}b%0`nZg4Onw`5 zq`X~#rm?XCG>g%>IHQ{KrHJ;$JUb!Qt~je6Z!{VQv}&)3H5Ewf+koKt^SSr*4TZ!vqi@2r`B$(N~} zvK}`i$87g;{8)2sex%=2WWb=Me_mFdmGy+K&)Xm&L(VM?iRwg;xK@|gJ4U%L4=!~9 zA@-Wc8Enoh`cdOaf-cUmnGCQZp=fMq{tU04N?pwvgj-3L!vF*Esg`@5KjPf&r$4CZ z?Hz6(wI)I?(Xbo;x{i-qwFEF&jSB{j-+Is!y}*1QdD;L~o=pZCHT^0RF#I}8qnuf9 z3tM0$RDUs?|*-#D_2-b`Y zj0#=DC0F0C=stpnw(o~eU@SC6@2h2rRHVwDHH%i7Z&q@{8oi%Q3Y0(SoQY|$Bm*tA z?h9ykBNrd5)*)RAd-`wJxv3qnZB%z81X-1%fo?MJkgFNJ%@jl0WB+^YtIn^O^hn2H z^kkq%S>|BQm<$||2}dpQa}hTsi?i=3$>7zIl4Ri05_%(1n;_W80Z!t+u-#83Rnfp4 z`fqKAV8ig{r2D<7sZ+Ho%c2qAg52-PzCEY^G4TVFECn6+YBYfnr$FG)*hvHRqSYNX zQ_?WKaRjfnt66^`fa@-L=f$9x3S@#SX*h}1Y%lRkxWl5L5#0vU%b5l?-KX98qryyD zM~&kpy+6o=PfBFAqnTANVq!Wc6pZTzq;91w%xXXD&e!!c3Thv`tm};I8e`xe^UCnC zSqH!w@qg_?k^BEGyGpfrFdMKIfb+q`7vKaY*6XE9GC5Yx!JJ7a2Y+n9jChrZkac78mNREVFAa#<&@lSBwEt3j8@+y*0+Lf~9p1>i)7oS82ZuiKb zDtx;|jJf&JVl)+=eLlzrCVE0@GyxJs!S%e z)SSq8hTtKzUe$q|qcE%-I6*67o$*FiXFoj}AT;794)_Vy_?)*$?do6zgKK`<-B|Nc z=p(7uneQYYL;C3p6-f`fxZbmyWhEw(D7 z!#XoQT+{?Yg91BtdWqH>h>u#Tum0(aE^03xQ;)Ffq9EE@QQ2stxJLXe@TN8CT zL&RkCh4#Eh0&Ql|K5G%z5vGeZQ+nS(W$@r%(1 zKX-;qwiTZF7P{~s8%S-wQX$J$j`p?>Y4?~`uH5qON`$!^UxF|OO71w|79$AEScMja zp;6Zf*iV#v~j5Ws!GI2%xX}@>P3^?HWFk|{+PE?2}f7Ga>)E-j{JCr%N2$ep7D1&yU3Wj z%wC@86M|K-8VH^?p6%-tm%85-ID?zH%a_BxdeYqmEaPw94in1=VCgs^^|gckt&;GQ z9tzk|q&2azkvmY^=K0K2(}db0W+Ne0l9-lte5b1Wh2tg}SP0{wi)i05Z8`n3^tj>3 z;!Y~DaM4C@Cm(mSmmncY2crx0*OitdzPvm+{$SDiMacX_#n~@s`HbjFkl;oo>>ggA z!}_`ytm1W9%Y!nVDn`W#BX9MPiJH>)#SR-UBo zn6eK%Nh zc|X7CQi~s{>p}4Lxk@M;`OJq%cna}SU#M0UIS5%nGy9!Tg&f?$pFu>X{=l3dXyg~A zHuBf{d=%9@CS73PE6jYqQ#P7-rbMz0&0H@rYoi=;zEv5i;J+(wawSRL*Uuv>`934F zrzYTz4`|I0z=g)SVr1xD{71hfBg1sgtS#tHCo|)O6O)S6BB}~E$68@!%yDW=t_rqF z#K)@ke%O7I)0R`8mPdu;;C6cZwYRKlM!3xfKg3lBJHnxemx!5fS#%GfZfF|)X@+($ z#!#XD8+sc|<2BQ)D?F|y9oag(R3g#Q0P$S*JAwiR#T3|~S&0*SSgB{@s@tfKwHf)E zdi-i*f$*I|=B(l`fOvNF|M(&g{9CipzxHi3s^Di25Xckwb8j=LNA$gBGX*|f)tKpK z&L2Y{yLG=fs(30&FP!y6pVi{nV$7f>mZ4mSq;Wo$h;RW4aI>D~+R~58x4wc3BLgPw z2HK2?KrHj=TqC+3GQb1%*m#IytgKOk;6)`fqo$4z2;KS+rB0@S(g;*UX6K62Ko0E|>n*$VkTGVE2&HvlrDu z&cpg=>cZyfb&^ii)>c!?z7u>f&a<}kin$ZT8rm``gV!jY2;A6F$Au5IG+aH@t$w1I zMuXjGFt=>9&YniQn)R@R&3V}L~ltVvghE*ZL9I+z(iV;xOWw^+0WQp|)$~oHf zfwH9%NX_kR^tIJ^ocTS1%q;KG?TNe1l5fr&&y6`HSyIKyC>&Lw7+lz!?I>RmTFwY2Bbc=nFKjX8kszj+H^kkcz)FO)0BgjT}A^6 zByGlTty2L%`}AIG&|5>eIg`M<VVUBWrzZ09RM!?z7{EwDn zDDwCJiPG;HE^9g5ga%v~sS#3<<(k9CSJOLt1no_>46 z*++MKTj|{JzM_jqagcYLpIU4w4}J^VVS;bMuIzraL_GQlW#GT8gBQDmTfwE(NVZk!+3(w2@0pTn*VLPkwaq}Y2V(YWVD zTtAKVim%+b9msVd$H2^GW;E7ptlwcvHJa~cs#$uz!F%_mYBF$&41C-ek(}QB_)^#+ zP~~#is5#K3M60`KM=<%im!y#q>L}`yf{TZ!%i>Ewo317@aa;XAaRpgNMyoQ+ zBybHI=DSw0*l2i$^V?8KN_uKgo>l*G%x~L>t_8(N25u)DL2}{Sm|IK3EmN{)US2>b zuJG{3^=s$CmGj!?wmNbBHK`-VupAa<`F^4SJzKBeNZJo z@wcw$qOo{#@ZQiMyMBnoE89s5=OzQ#b5Q(5hyh(#xwXFdP?q`^*^yx`)boO!}C@QqWO`E4Tg| zu5PL=p`Cm(kkm*9b{HziKyNk2R%Hm=fhpKv80vs#6ty(6y68gc0A00LJ|MLB%J(wk zpmT88(6)9S;RIN+a>j7~06}CC39XG!-s8a9ldcYhLs!?)+gFnJSCq{$wz8mSa1c5T zdk(#h?IdY}i!e6AV{mBpB=!aWL!TH0dFcPUvH5c>{=XWV@}Y0vw++|9WvEC98Cc_j zlYtzk(wBt?ke1Rt)|m##Lia+b9Qf_zp`B_PgTBlvaHv_$SKlnd zq5?ZX|E2|K%tGCWrY$miT%82NP0+HK&*d-O|4$4INK5xWiC+BEZ>Rj=I{a=Cnmwnn zx-26&p#N5^s&RSAcFJw7>jIbT_J)?!PTYp~ev=p|6ybooOr(6pPhQMYPIH?vkghA{ zY%ol7?NwtzeKy9TiJ=KZioyuex9?{6Gmx3nqRe3&TH@nZp?x8*^1)5|+NM-!$VAgAar<$tWxg;DZLvzg!}z*evfl0Bd030O+MR9gydT>Xv1ie)qWjzPwUHxc}=apK_U`-c*fv2#X0^X z#_1#k)R}qYIc7H=>axLX9e7}=hk_7vYH<8JFE2iQAH?H(SB|Ce3Ewp{>mxgqU)co= zZ+wP=6GL-NJwl-+lS!d9%R}|Ll=RT`3%SpDu!3xI!N~@FdxG}yPbsE)|7l1Zkt~SDORO`RlK*aexZDP3@DtxVy_sXWHIY%?f&$NQm1ys=I6|UyxWQA-+TsMm~UOgmG3y=J7yuH zg_@ZiJmda{b6n{DPN|)rlW>MshHrA*_M0q6qnT4IZ=|?98SlRP43Lu6RaLTwzlc>- z#4FKVW~Z_;YemXuwkrq#*lqOj6l$E(yK0`y5#0+5SgppO7*Er8jaTQGB&%OhMvMQ) z7xTXgz?3GUT{H1N@nA&NL9zw1uY7fXp?(!3(;`iWaSP>5u+3PYmlY$PxB`xR@?N(SmPjGS=Kk6%A!>Jv1_ z5#%JLo@Bk@`O2$_U!d$jn_z?&$|rdChe~wY&8m=rh`XYxrSMe$3x=bF$$Aul7KlA<@CxW#alTB_5LRB6YiMXMNuae3{xjP&8~(ECQ)p4 zH$&o~Tz-&f&d}pH`LB)QJ$~*2o13#{C^}cQ6IH!_42IGv5+Vc1Cc*IUQ+A&cuc6NE zh2KV=BjzIQ-XOiO(EA>$hE`)t|pY(ZILlW;W&d>jVByOgTzJ8HQ++FGS{b<7}(uOh|YzrX8D&vjj?l8s2JL^?p8ad_Hu zarST%n0E*bHghKB0XE>HGNJ{|*zM!xrM2NaqjdK6%9;3r6u-?$2|C z&)^bsWUDl@GDf6it`^DH)HQsPT>dz|oo*-QPdmR*DKLx_Tv+U%=e1S`0V z^BUe+=r!I++?-)2y6(7vb?x9CRh4!+6+<~hU)5UMde?p%&6Pg(HPvmYTR8q^ zu2fN4Vw4i!bv0FDhTIe*Z-IyCg@(G;F=`S)J4>WF0IQyGpyU#qA zOH1NN*RxMLYR`6JI78nP25xy=(J>@w9hNykuF&11sSHMM1Cp--0$D6DgV7Z#_49YBiFY&k)n4fA| zmeFf>$ck3WW;ddr~^F|LCjR01u)F@~K`Vpcu8AaGR+C-8g*#e#oh z3sgr3ZrT1${s26C zF+9;Ekm^^!byFlU#;USU_(xrSv`ToqiH^hQSEd^|2cmdA;|hG%D{ulbE*ZlkFtxA? zdAZusSrt|F0}ow&f}FeE`h`n`Ad;VOVPE~d&r*D|4&0dKYAubvyW3p)Fnut2x)4I? zJbH10I6wJp^l7SjZLOX#C${;FpiVI+ny=0wEr!OXPPg=I9|{hQBLghkrD3B^4~DeB z;L{^yNH8Wu%ra$V(zR2?DuF5Oc6zYIpw6VG;T>M#>8x=X z=$vjJ`HpybyCU17wb;aQ1`Fv6%^d6xM?CTwEq+LE*#mc_flyack07e03GG+{o$qXe z?O&ce)qIBq!_^1E$0D}1^(7#W(OMsAp_h3AO$S)G& zW2=b^n1K0r71>jc>S7p8us1AC}elt4#w zLFfg1PBV#FIVC>}-`|pc!N3Fk%2b-9R%K>{^raEJE|iCY&#jS3wc0<5Iwh4H{OwfqCJ1aL2>oMx~luZ#U=D{ zwNvM-|9IQU*H`BNoBL1WEioRdO-cGcy?0Dpf}N061%!{76%PG zqs9y0%}{;cfZc7Lu(Z)LsIKlY8Qk2~L-^48F*Nxu-2C`~3|R4b723tLuYKvgcQ@c; zY3vkdqikTL&A=I?WZqKaksiM|goc89h*Wy`#JAL7>013`wJq~SB1Uvu!psXEqji9^ zJx04-Ci*U_y-w!-6g@%awaaMSZe*s7E75XaPft$=Q+sg!DCY^s6#dpWnwsCNq|D4S zb@1n~l+!rj=)(%?~4m_3KAJ zi>wB>L#XOQGubN>Vj#3gFZY>adTq)fuQQ9SR+J5Q8j3rH8N&uwIIIe9L2M0u5 zE4-Jh^R_rPXdJX=MjA+ zb~fLAu)?D|;N~YONmYAP|GkZss;Rb+&X;D#6CtrjfPqowwF0MC76i3;C~H^H8@ygy zz~tHQh!W|iokc5_$qjSn!f%yDKF9>rP)5=}DhTu5rWhfx;>*BQCZFr15*~$vXuj_y zm{0SL=>_M!w*KsA??Xd;Yh$I)(8XsWak|ZhThU?qao|2veN0m(c+8TD7gaL=)Fo(h&sMoEv+cHPBxLqhw|RG}vVvzciE$J(j} zdDdIC9Nu3^S@4U!wQ!|cmobP=674aQBumlJK@ub8g`NrRt9?3ol=xKoPEWmbeM`Ws z-qI1cxuRdc^f+ZlbvnL7@yO+}O>{h}obEv*6@la3cd0A2k&i0f{i)u#q{cfn)&*46 zIKp2ep3O&io~>Er{tPX{E=J%LXC-d=jJ}U^iU0Ccx|{cBq38=c5`z-DA}c3U6rT1i zMZoV(;b!5)`sT(SL@TfqQ@s`gj(Bt<3%=nk{$-{u);b^&z8Ryhdqciaop1WpLm?02 zglByIZ6O@f8>8CO)d}_5@EMt-fidg*#k*;9EjxN2s`rd5O1C=c1)C*Q6JOl&X$z${ z?*rq;yM!08hv;=OKn8R?B!3!+V4205=7(Q6oUXJkX!bULDjnEXiSmNKuFaX*Bq_8F zwwmUNxA(!__UlmR4q`xu_v;WXx3>aH9p27*-pW$cSYRwQIxYbuttOgF?_rn1c zT)8=XkB$tyMJp2Y4z!VW#C&<~dhP?sG0@;2;W}r7|F%PW)5&_KJP+q9D({5-OEc7zL%-TG;u)}dhK+qmdR@WShiqvm__;YM~G z3BtY~7bzRs(M)LjjXg#5QPdb2AUz&8{~;Z!hv}xV^X6;MV5XC}DjSws-yh(ew3PEY zCTm+&=7m}N%>0eFy7}%G*;b}Jv!f+9x0e4?Hw^QCD>`0Y8Dg)CC8hZlyH-D59u0Zw zF15?WDrMBh6I-rQL;se}#6WWEN!wJVSnI|pgV_fx^IDX%yPTPS*lv4u59H*zmXhaG z_Z+>*z(qcHx_4h1LZD383`yC=0*1>^)s&z3KfGvLjmW+IVt(iLSC^q!x!|3Z5E0zQ zP66HzEB2|UO> zbpxyRkJX2TK$|%Yw4S>Y2WxNn_?_j~6;b~LIXSF$4}bJPI8baxEE99=QfFykp{be! zsrZf3hlbkZ8hhc=BnoJ=;_{0O!=bVcJ{_UB%WsxwLii0%H#?m3RSZ!J$+Nxs9fG=3 zxnsPgKj0dqsldi@*-XB$+9Oj9j2(30?VEX+ANmfVem~$AaUoAbU9Opg;GM8Pz~~t?#^&c$-bb;|nh4Um*hEg0(vm zO(g(1w)VVDO=-ZKBtqivhXx0qKf7mXO?zB(ye>ZNba9yE0vX^S^-pm?onM-uC__JU zCcF+rDuL=U)B};BinO^f@lp2d^1=termOPDkc=ST0x5!|AS+0TFr_>rMYoUD-}-c_j*7|KFfbI%Jvm&q%)Bl zJM?fHN)PW@PM0q7@A()V{U+;@*$IH6hoZ;)a$@dPR(blISM2+Hp{FqQwoc-2yR=@& zg)5d=sQbqv_didX-2Gf#Psd5ks#s~R&!64K{a42)D&Gr~zrLgJ^LSh$$ z)G`Q;uAU$4aV0Tsnv3uR+>Y^~&*LT)$p9+!(LQVkRI?{B*i<5#3@jgO3&jl046T6y zEitYcnFACV=;Mthww1PpCQfatf_i2PUU>s*=oT`t3%WC+NXB4z1)bSl^Y2Dk{-A^Y zXD1Bu-?&`h(T{5{cst(a!e6{m?-a~(fwR@KxmgslEF1q->!VlG4W^4$(+X>mGtdZy^Z1t5C!hz2+a>ZV zdQe{M{G_$gYO33s;T7Anw0bP*MA1z8>CITNZ%4kM5fs;gr*PeCq9+kMxE2fHgXf@0#qMg7G3p3O`cjmOjUv zk!o|ae!61Go`p3Hn}2wbD`3#X!+X-iZ8Z5viy1l;{BF&^^3wbh{n_ORy+xV?6{YdS zgtM|!^BBLQN>}L6b-lVr)3g@T7O&D}la%L<9({~_APfY!@A>T5@-GCsi3U9^jVU~# zu4gWqos!_cmZ+G)`_<=~yqlU;%`|Sc^o`XUX z?M7K?Jx}KiT+W%ntHxqdl% zz-^A}OuAx}Xr+3mgBwUK{A%GiQk9snR+gVr*{~ajuoL~p3N9UI>_~}%Yik#%#%OFC zpGO87a;5ovdzp0=!mrzCp^mz1Go_O|FP+2r=VT>`TiwaQBe0w&?B}({aIH~5 zH8VA~OVU|L09{6_D56Mh=_gAfFR?*Q1u4_m<2w~z>tD(LqXGU_WTN>JW7(xLbvXy& zL~rV}L*2XeW2_w|3!M?}MJ>*P>Wadau6EPZU;5;!#OG{ZwhcE-H|Q@oQU@zf3Smyi zAdUo<^F~?33Yb1BpyPmR#p#OW?Haqfhwy|L7J;WP|Lf|BmDFB4nHF@Q8+dtg(tQ)U z6G1!3FLj35E1Ec3`i0wyh?QC6H2J8kcNu!-Xgx2tTbt?5&!n`B=GELs_6(o9per+5->A|R zk$Td#!zxzM4|$Fezt<;K<(ytky}+s9?&R~P$xZqz1xxS5$smn-*B1rgPXwme&D$nr zh_BSncY*sznfsw^fs#6dx9~xB`75SVE_Yv~MEUTq7miEUpKGK3>KGEs@!ZXY_GXme zjmUc_*Nc$W{j%q+cfFTv1qP6GcC$p;_4&z-TPste=dPV)*L7@{S{Ii6#LP~ybL^jl zJk8tgLf0WUtM(f(CZzNXGwpH@{-DEFAhpfhfB)rn&+@)jl5pI~^QuOl<;uS}#!FGPpRjzqTY5>pJVyCft*iG&`eJT(tboiu;)eW*N>VS?B(RYfA~> zvoH2iCIg`I!fQ~j!BaGNRvG5;C9$TI+Pup6d{h9nV$#p-;K)sPkyEpQ&O5RBPdTbR zL@pin=OU^_CZV?R2^QU~2El4SC}xf)1Yo*ZmoJRE)mDzzx|uGgrY5;4^i7UyOMk33 zX3!}s3hFF!eyi-8af(=kbjKbvp*i|HCRyLZc#)?guhu7x88!q}5Dhvlo;cJGZg#Po z2vH{G3OlG0oPG^uou@LL*Z~Lxv(fR7LVEzKI-wmO4g0wQu?+7b2DM z>f_G~=fhgd{EpB~Ah8r1PtBV~1J(m(Av}Sm)87iPV@>|)@ufIlmtN_H$-KGIVZ+>< zpl?GGY-xfw;x%f7&a7~`%zx-L97VTTI<;i7SqH{6-{{71wM?F=!d|g7%Uk7TSieCS zd#+?(nxqbI+ zB{eGVs>*Rj`v;MxiQ6w!nm^QkGx24OQ*>4OyxGi9pS!bpb@NG|L<1wZA5oa9HeJq) zpQOrUM{}T+7jjb3m|f^`F4pE<&8)~f=Jp*lJxL{W`0fi( z+16w&e}s(#Pzmkwenp~{Dnoi{zEQH>NI_b-++LQVjEpw2)bq98?m*hVGkz_u1L8X+zHzKhU3%sNr~c0iSL+&yl^y}O?II;0-ZO zGgy;7JT(S4uMKZ#SpR{1B%I88(`)E9D|gx2F_azz!&0E>V7)rZpOS;fnRL0(1Dl9# zK?r{;zk1R5DC5+<*DC^oJ-+uPT}G2DAK%mJ<6h9&IjVbI`a>+;>C1}+)J#t)N;S=_ zRz+($CB3+7oC!0>z3Mw&AQ)b(cXKbN7pm1{TaMbmCR{kNqr+u-QMT7DX1rpHVhz}1 zV;{t4-;)(BB=L=a<8O+<=h`E zd_C^+ZX=NsQf{To=w^BOBdxviQy9iN@ndl*UEmi48+^aL@XfI36Q_LGgq2m-&Y0_) zl48O3&W}4kR(U=hRkMDmY`;KxPvxzoK!2+;4Jl=RK0e>wD_@H{eL*R4+UP}@Eai2Y zjc$&MC1ts=iSgwfg&u30o9k?Cd~Y{3KR5)u5iZfbc0V2H%`W$Ich&B3`<7EOrJiqD zK<`+Sd@`1;htr$Z&;!2ycG>kdt+iJNq3Syxt%Ex*rgYLJCs^=*DrTy|KYN`t6QA#`!7vGaqJFhY41SG zU`YAm_Sk#MF62+#)J_`S2}|34grM&wnun3roA39Pz8KnDU|XYT^tkl&L#z$9{6QZMoN!IxCBOzt8hJ<^DH3mnNGJN>lx7Mx2>D)p*Z9F{PuBhms5}AiQLVS!o zk6j_5`0&juC=UGCbv1{b@&bjI0q5V%(Yo!yhfnPQte%qF>DCetzhoEU7?20;4{hG6 zeajgd(f*uoT%7&HRBu zJ$a7(Gz-@~piK{^)M{-N{WVC!peKhp>;*>_vu5$ws)T}4NWpoR>q5r@LiWM!3+${L z`*k1vV-+%9o~!J}1&;nF&8{;h=}rTZm`wiA?*WY`zdK>Y)@EMkM_2kHPS0AT3hBPL zA6Hxm;15GKG}$FuKdbjsmhfq?{p>Q8tripc!HPTWi{3GaH_sPum$P9T5*N8rZ)`^+ zG?o!jP>y;U?32rgyB7Yrku;MSy5Dj-?h_5U6kQ#-< zTEea|MVwpA`*p68p)3sNMx#G83D27=<{g|z3gEQcpuC+2hDi9k1A+X&j2riJs?RNr z!arXD?jP(?e2u#oBovzrx!FvRE$qqgdO4}6qjDNPYL;J{|FtT*cfbp8pEsVzPKu~P zrOcHM&wVy;{aLJd@OUfFz$Vxr8*R~i%o-nqL9CU_G=aae*2d+>%yEUv4*gtJ4-i~X z6mr$8=EUx6J!H%%nL4*d*HL*8Oa^EqG!v$^!XBM1#Hbk25o;#RF&rr6z|8m*p$o}3 zg-ep3ni%V4M_Y>LW$WER3V5W5!pjDYF|Z@(+o%Id-88RiTnO>oYXNXk=k5a+GK@zSL*6>~z6r8@Q@9GcP!Q#{HZB*NKyZY=DGyB?2$$dx^4%)#``2TSBo?%U{-PULn z6%+yK2uM^wnt=39RFonDQltt|=|qb39t8oVN?+6{y+?Xa=m-c%Z&E_BN)r_;nhP&vD0K zP$2h&e*mEAiGW=3*Ku9|#(M2cJq99I_q55wBYTp4Dcot>2WC9@vOKxhMo~+N>L>57 z(o!;86ZxqWm2yny2^9cfUgL`{RrD*_bTui8Qazn~_bU2g0<~RJPBeeliL~I=J)Zc( z!BBsFx=%Ak#_n4r1tR!anvNwL##>NlcRpSUg;kvKoc)j(Xn1oe*ZuGu)~myL@}P?> zI*j5cfZM^0VE_w+)81UuZ!RZyYZgz6?k+?NJo0jMiNM=i@cP`|fAqo0AW?<|X*+?U z_Rb~C5Zsnzg8iRgO2C^c`d&g>dgZ5ephmJASXI8qI^B{!*q!_OJ<(-q^2x#BbEb1R ze|UxA*Bo!N_E>M7F8EZS7}o1k^*sHCeB;KVp9lL<=Z|D*FKHW(gI9J=Pv02Y%?p<0 z2JWbp2KM-(BmL=d;(gs*QsydM2qq(~P@-Y57Evi9w=DjqpBc=I(Pf+a#di(AP#fF3 zI_K1;6P{7VZX&25RVZ14-;xX-kxUBt>fkFn>7Raro|a*$Jel*v)!I(g>`Zz&4PYX< zC>94>oEg0*77<}65pSW6ae0x_YX^R;_1xCh zX0-j++D~hB9^w3>l@iyO|5qXw$f4|q&?-}UZvm_+n=@JZOcnRTf`7dF#wDf8rKRrW zc~fSc%sut`PH4N#`{Mm|>iZHB4A%_=Y15Wsc9^m4c=*ukQGd4riA|Y#5k#)3D57A# zC;dVY$ko;R?&N*-i|=oO{~~jfCj)Qe@QohWM*PE(ESz#V;|VM3PC8?jeGb;Y21F{- z9lhA=4w3yuX@ph4Lx%nNN;Pp3i=Nq#C`G@5vk6mFmVZx$+OH5=L#mdA+wRb0*%om2 zPt09ulZ&4gSr(_8?9`rAY_HCa@uT*&9sTcBuD)<;%G=T0+xQKl+&@r!<$n%$jPFOs z=n+P(v1;uBTfFEG+j-VW8|Qj!@?9ED2H^rOl$=86b=Ga)1zsnpVNjo7UkGN}paKy1uP-inVqC)3-+zFKR4P`>hPnEs%8CIMm6 z)9YL;mHY7Cgx7olFu}5)hk0zhWjYsw=@(fuo7j6bQbm*$mFnz>VA3tv{n2p_N(;zn zqX;3m@KflOzt;L9So#gevPKwCyYRA7`a$SjLuPgUHb$4Vf$pjFw@5ocL zSUEeaPL1`i!sj7hRsT?Srf-ju4Np}C^^>O}l%ojsxC6{yYq9!7N3x?=iFwSS(w%;P zA~8u36ifRszH7A&-=k>M+mF$S23IqGM=U;#!GJF)T+%wZM{e9EwZb_?44xdvBzkUacmlOkg{v-HX$-KtrYggR<=LD4V;Culm>Edeb$6wy;_r> z`YXH=PhcQ*$5>?B#1lo`qWhgj z&l^7l(MmT?T0Qi!F<~e}gXQ$DIj|MjYj9{RT;r2K$62GDS*?p2x>E&-m6W?;K6+*~}&; zq7aTZa*HlR-vgaTL(mJki2a9EqQvXM@WbeRw2IKdAo+^PZxD~rL!k2=HvzP~_k^&! zfz-nMvja$hjVCxhZ0=gE{iR944-Y2l)gRpB|9739i81|%Q zByc@zS3;*Oa)f$vw>xHM7gC9hHZFQ)Dwl2M>-AMcT@y>sw#@_sh&%NWQSa<`&@AM%e%MVDUpy6kxOoK z?;z}Fyo+80F```HAhbI4>)PxjfgR`zw43702Zh$zBZtjqcAYd>COq+J57sZ2c4M8Q zu!}Pbw8g}yDo%>r$eu+VdharG?R(P(79!&vGM-_&3>4R{l_X!gKTH$;U5=(|VykPc z>(|>2_4T-0(T}f|+J0jB+kcqGjLP-Z5LuSmXBm z2CT6E+k2(6Ix@t75n95=)+Gxb{+z`^Yrq0}_jZ6zm$3K_!iV&SU{#(H z+KEbp;H5jCzQU~t$(Ye;hglel;r;`!qA$ZyB_ap64AUR4-o1r-!%#RHj@dng(|LPg z1}gJ5O|3i;SE>-v)1Prtxz(r{Hh1;J4-XgFCSF!(qAOF3oLas(amW~Lx2th1$cIip zxPHC$237t`+Tc=9psY8M%)FYP9XLY1P0FyWfQB6Si+elg4P~nz!VvUXa}48MH*fmr z{phN!GU5E<*3i7CL4YklL*P|>+k24qe%hG_X&vlzA9t!vO7*R#!+CRahtqljNgY9N zdQgV8ca7uYV22u(QP&=YGlN>5-2)wVLMY_E(1oI5^Be@3c9bB&4YPNBoy-(XV@Wi@ z;OL?p$zwV8&W@ZsZRPRoX=z_3BPEBdEnTP?YfusdA51@8H~4bkc|byn|F&tt1z}ax zw7WZexnz5~m;WeVon_omTof9R#d58z3{`K9>eTnbO;FjS9U-#8_ zkekZ<-qwm>3^1eKf~Kvi9Z?hmgwpc(EzO}tF&Is+P5B)TO}gQ@&s+9~8I$V>F`6 zLU9!3*KDb)Y_+Z4s{a0a=sj7g4Av_epmX?N01MLbZRX1(3lPP5RNf2C_dUs#-vdr( z-!d4&$W4Iwng%}7P3&&K`(}eJ1yjdnYEO<2XGRzOFJqV5;IxNk=5+1s{?gJl9~mS= znCY230j3Z9WDjWk$`l*w$EVNEjMX;Uni~G(&#Lg4^?QWvigzm`grDA!(XP}xc=1BL zydzwpQVe9M7x!d)JW;?P?gR6mMxAqoRB=b3$QhjAhjp5hl_DmiCPXPuxniwQ0Y`n{ z#u~9>1H|GR+Pa=alsk@fhY`oIq_myMLP{&)YFD=r>j$t~T~8idZdTU^R(l(r?T{|z zYt#mwg10MQs*CSvAoi7c#~to9jEcS(^pMER&8WDK=b9Qc#gUu!>GSCQ&;$72pjxBp zqamN>n4yU)3l$+dvO!v78)3n~ko_@>X^|$eCQ&>3UPJ5>0vmefL@IDH+**t*#Xv7g zT#9MO>wUIrEHG%y7Se=$+1~qIx#=3>fVO^s)!{8{KxiY;^~|Ii(6z$s9hDipA7aoLwhC@p+(YwLT5pf8=yU+7KVjC2;3q3IcZ^(DZLwiw)oVjK3q^>x}#Ubw+@daA4Z z4|YF)_D7Bax(C`YXZFNF8Q^x~;kk6S4^vn2X+|wk$dVk=MxT#HZB_+T_+iZL3BzOC zHN+WHC0X4t1n9f*BFV9HMi^4y0=!idvK(Ru#fzH)YJ?jrf#8V)%8%fEvA~bNK_wS>8$k-7&VQ{ zrvb)0f%$}y#E0Sh$e(n}|A)BnzhzJQAC{mLI94On(8vO7Cn8c>S$WKwP`H5Rge6UL zYL@bU+vghK# zJ=|&-D7P)q$$Kd3$Y_90J-QWbd%&YlmsTbIZH?z*5A)`n1VIU#yeOS(UE==$YZ$Qz zBT2p&fjCo1*H|B0H#LM{xkhAU-u|;u`iASc4#T<__eE#}j84{Yw8D z#|Q9lomR0*V;?&42B}+pE{!MU_gtlr44J%^RVge5BciX`WrBl^>v5DJO`^EHC&Nuj z0bZ`hampEVNvrg3LJ9*($8?ec+zOm`hP9_jk8sF#=P(3=`^PG}kAZ9>iduOsi|RYo z3bM|(qFP$4G!heyY0rT;-x|Kgvc7*gbcmpPfSHw761tzt5hXw5c+1RBptMI!!}*$d ztfEz1Quy5NJGhjZf52Y4Ucd>7=ZKR`~wZrt_fZ@@`DJUu-yf;l>j!|GV@++IAxlrGo4 zGhxEdtHKVYpY2*8JiLcKlu(C0mix~9 z9Wd@v0&+q494$^KY^p1)#2cZS_Q=s`n1|>u9TzueobYbvnyYb#v5?gvpX52L?RE$z zL!lGSdWifu&Ew^3lL07QwVv5)>^_Iy<4IS+3eYPBCJ zqvQ#G#Cu!5fLQLTzc#L{7_-`dbxd4ILePaODv*NsSj$VakIN>!Y@QoHXFGx$wE6BXkhDoP>&9WFeH|VxA+qxggEw@<#Wt zT_Whl>WJM3?8_kr*Fm4Dpa}tEFned~E3J-fQA{sEVo+m3*ts~r6uv#7BP4Gv$tRfL zU!>uuU2ZT(@Dnf?&x@!%`3*{%%g^+eCFm{;hMdwM!_EZqS}kFA@9A&f56?z{c6E4; z_~QG0&6$i=kG2L%R`hbx;=LR4RQB}iL^KE_O517@5%-BOrTM8{whV1Aa^Zuc8>Rk+NbVj(5a zXmrKAp?cf4NoaVM&Hc^p`h(5tLs0+~CtA=~O3vGl!+4hwiX%M-{X$2=+?mE}hzFBb z_5-WU=<~p{F$Yp;g3;#TcFHLjf5|M~Z>kSpJ#kj|p(^4xD1VG)&K=`(m{3@`wgX;( zcg)AgSED1)cIJ?7oermg8M0_-Nc4xS159BZV-XkWQOG4N=YclfQdmTDI7f2|Z54WPk)qtNXj^sXg zWpzxe^OYW-;$PC`i~XJ|ekBfWEBQ@L2}xcyZ_>}J>rT3j#=#9S+c)lJU*u81Ad>Fx@4g);$(qC~AA70X;IYua@V3jUa{539_IV@n^-DJ+7^- zh!jv{^-`>C^slaTjs?dEL7!cX@yu&cVci%NV&9T|c*>GleYJx4NBnT~wb|juC zi?``$)Wpaq$hC|1m!@ZluCSqD<&acyPBF{=bx zm?zp*gD3qe?x+VM90Rq`Yg1X6)+2GR&2BgAOnN_oZ&t0py(9o-#68P7op(rdwE21# ztMw%B_}Mr#Y98ATsTL(&hMee)pil0gLP-h0@HHd`K@+I#4edfnVbOTcsQiH1LB!6u z{+ol6{7=g2`Cf8fx^3yUe37Z7_^c*>KG4hcu@JifqX;dB28G`sfm6sel1N>=cpvV% zhriOrD?bx>MjgxrIyHo2*gfAbz)c8qUc9?)=)A#j4Tj=`S4w8rctIc zoUL@amt#+yu1bePhdd}=8czF5qVu2#M zL(8^fnp62sDE0NylEq-v`dcsSGyQsAKb9yLVxalaY5e)CMhP1PhQ7KL$c61dUB#kG zO9|*suj$M*%24#s4zkqgP<5(Mcsc&bw}(lkRSG9jbq#N5nv6tW5tYxh0XJF%wRiD? z%KGVJ_Ji3*Bn7- z5glBu#9>B=$lna)9X;yP?5-;x26zOFV3OSKtC~#NvOB@s{TGC&t8|)#Sg9MYfaW6+ zV5g?HSlDxO!4U&2AS2*ga=yI5K9HvPJ=wF=t=L%k!gz(E>&&m2-YX4qMyfZXvw+b# z1o%t_o8JNFIMXRnt}kj`5Q5Y8d%Yf3;my$uj4UK&Fl2o%nVOZ2O>;X}d4rC*IPeFA z5ttl=U*8J6Gb;J<%x!6p#oN-nDRf+8AjSF>W+qL7xkN(own+|AsWP7XF`EyVLsxZk zm{3au#=5)FSQNT^?{E^1v77OBxFIo^WW8^uI+*F?**2pB?^8-UI5V>YLXVrM(ON_R zw6LvQN?04@0!LkyMw5osbheqg$YGj2i@@^jar;P#M^+E98VB#%QTJCA>4?S-nUc8L zcW2`BSC0bto)8jWy)Cv&%@|wF)H9c#lU=d54waizpSK!^qcsZ&yRM0Z8LR@C}@HO@0&>FDh}pZ*2}u5|yG z5yaCt^+nYrL#mILZTLXvM&s|(AsGlW#D{>7;khq)l{gChDL1c#@)5`6fl<-ib?(}C zH-2c`WRZQ$w&y2*<_}>xAhP3uFDSm;wN#_waLDz zq3?WqHQmf8ifY3UmYq0a@P*hTS6%XQWc5XKW70Q|M57Z|IzW=n&0Wh0!2gx{>7kl-Vij zlTYQ=J}pg$rM;rHy`ZdJqkLrp0NP0cd;){SZ(2v5rbrh;Jy8ds!{@T*u40?3GR1LK zDfAoCSLqVw=Ni%yL)2VcXKC-bSqNwOU-@HOYg7v;J?RUni|r8o)jvyX)$&YFttC>@ zX{nX{BENM8P-0{7-dNGL47pdsC5dHFn!AGseL~0fH&8ZmM+=_kn@P`g9~QLS2yyEc z&9EJe@FH&#jF1Nqa|Q0H;x6EW6A7bx(As>%-fX%LEnHU6i4S&zX&Bf~t~DuO%o*G8 zx|16$Jr~~c>sH@y;RK2OG*SaJb40_ypQp)fL>dK5ej7pGkdV9dv^B@nrO%%JdmMSe zm4*Kuuh&%x+K}fs{PjyJAAgiTYsDBfoi;P+871W4b;GrK4xg;8CT{WP%T@jWL_FXK zvLNBp(nV4n`J#E1{BACL?Wd!UKJ3yNqQ;3~XH5h7AX~xwa)tOUaL_!Jq5u}!(%>9) zCSX@nQzLePZ6&3lQsQ4A8>i)i*lr{$+M_S{-|i2Pa6sJ|w}Q=@=_7>S`uNE7^F;T@ zN5)q%wrs^|Knx~^a29@H_-DQ1wd8)S`P6X_+sZ=TA@_V%tO{{B-uq|H-VR^m8LRWb z6@FwSO7_efcX96{bRn#zqBc-8{o<&{HJO=poXmytWvN!M2PCX;E&OcjdSnQjo8}O2 zV?grJ>LJikkaeR^SyYOm=1G>!73QWYUEv!NZEjQ*=*c;2#LtZK5GGhsh)QQdRq-7b z_eQFzj$o5tyyK|VZ`WoDR}Kng@kH9_75oeHA?M-kSIML3L!RHD6-{kmVk4E5M4-nC zJtRE->KNfMSL)jZ>+6X=+2so~)!^LM;5lA8Kz`X;#x>p0xdAD8y*xUvUTxJNtlzl% z8$@wR8H$RN9bjoINei=U^Y)fL+3eDGiznpT4G4-kW1>rimuCmN2B&oJ`h*kg#L`;G z8Egp(u-vwG(_S zkGp31iA*>bzYV9!Un*ihUQrF)d-mjthRG8>qF#LS^6B(AKq!Ud=?4DiLrMR*P~qYI z>x$>@Q(LIvVwMo?kauK23Q34B&Ob*JTP*uvU&Fh5R~(2Scm zPU}`ysyi38v%vMdVDolm@b3MHOzC&NOLja)>ys9A?F$0ktQ`z4S{9O)&M>K=##y~! zvVGq4__eq(SIuk7C-$(lAvofwaOGGYzC%NV{rc<7p%|y-Yn_o;`hnWn zo-JC64RrN+5w0#QQjBf$ZCjgQ5*IBlm%1Q-c5c^gPp(!Z`ALtxCT-S9Ci0JM-@YnF zUHzF!G-%603kS?Av=0WGnnDSKx>Ig%J4+)e`kNa9-@p0*bR&Pcd^vgr<$hu6??o!i z7xRu8z_PxxhU{OdaTP^!12>4C7LMm(#uJ<_?Kh=->e9Q!k}GoR^QYX8{CElYr^FZW zK}8c^$_bm{p&Td6)rqwR7@R?JegmSLzf?zy5KA;NRVyA`0KfNj!F7i}+fH(~BqhI| zl~CMjk0pH{RE%m36-vX(Y!8-sH$o#8A>F0sXZpyPRGtLK%E}9C*45}=_aRNH8XVtf zVH-MtjDoU>6+5UN`~vzJ6E~ZSoPs@Ho)V{~A^&0`4IC2}d4BL)io@g)JCwN2BtV}Dj&RwJv=Nq{C^0l#h_B{mn+@*a zOzK)XZUJ4TP57P_O*PJ+0N7^(11FRp-Of~PBa%cJVU0XVv5|UJmdwLqh8mu#RfYU& zi5LCNap0>t=Dve(Rut9_Cqo(A{(}D4h@fNfV8(1oryI=ZjqIpA zqb5*_rj#LOx7dgMe{P7?(r~_!g~fkgI13|vdE&>~O7|YcPI!QstRg9kitj_`Uc3Ml z_>%p{IkBfneEaLN$nauQ?-Y3h^Dk8i$>eyj@?Kt_Ia;r8|A(^L!|S287Zo4Hxfk>q z#7~z#KWl!abp~f@?h_aX{Ga*S(Nt9zLodk4XY@#z&;IZle5#f z8Y$5FFttK$?IhFoW7i^Ic&FAt&g_hDOwkl2JY#yd4#-OZ@;PJi)=+qQEa0C z#0jazp^boir`&K3@eBnjMZdj~197FA-~nYTy?ys&Viup|IexFSx#YF%=pPQo^xzE5 zb1V*>TVy#}u8ILjn3>+335ao5+B}`LeCp_sC$L>+|7=he5CDd?g0ECjL+J>bPyb{z z&#Xu94SW%pLUXIN(&Ym7QaTNeE-zJpfx-o05 z8B!u=ui)e~x6V9-lj7D)-oW`zs^w)|9p=Hpx3VWkm?t70H{v!2)r5M>WjAqc3mf&L zNkid{a3CWoy-m^86g#KJllji5Yn~qNunsmvv_Q|fkKt-ReLitB7l2F|-u3=5B6`{d z{uO!feKxkHX*xh+sdh(G4$5-fpL#)nj$NtbgZXUt%uamdHL5N4cM9~M*-@>)Bx*z} zn#TPvNhIwC&PFt z{aw-Hd+@{kx9}#|iG4NK5m&jg1D18Wub6=7p!ZiN>=bJ(BIyIxJC;~#g5{#viS+pg)9fJHTf4!5F zTF1Cl3ynsxi@7GGY6zP-KZ@I&nZ#Q2Bu=d%svzA+n!rIc-73zeoWp@VcP2uMTl2%} zk}f*=14CS1DOKk4UaKoSj? z43!EOt2TIf4w|(_DDshJ@VP^)F_w&5tsYY-5`%3&*pjYFA5V_FJ#uq>xeo@)urGF{xMa*i%(@5;l!}id6#b~;opReAFk#!@x zR3n{^oF66a;GyGb&2(iL6gx6y++ro86AkiZp?c|TrsZK_|2Kqb5@Mr!1chZ*kN6!HRsVU@j_0io@J0S@alF9nblZ@1!ySw8o@{SZnnhyz|&e8 z@r(l<3D^IXoXjtp68He+4VX$%hBi6(LBk5lCm)__V(r|_6E*hNG2~qf$f({(nUPWi z?hI%h=*^Qij{}p`$~*iuRVLw9D+1kDGiH3XOOj_Y8br&ft1q1P^gabtg(J#L|0Epz z?>zellRk>~1O!f^b)K?`x(E$z7tGm33ZIF}f&wbpL)2vdft* za8eptZCI)tya)%+_;VA?nzAR=FQcg@7iXtT0bG~zoTI8%F8p={V`WCZbh$RvlP7;lMqPbZ%7IYWbsxOqN5mV3k$J6J zy<72eVAHjjb_5NB#>ay_s01lLX&h;a0>q;A3i&&h{bgsv?zuna{nUO=#M$$*B=?ao zFHhU<#1Mry@xvA-{#rv9-ZGS~{`$zloylTeqE^Qy1)Le0ej+b?Nb7&UpkrWPs#Icr zn_s~#Jr*5$Lb=Y66!8%Jg?UQ{yVdU0`9qB_vpST6F+ni#h66L_je5Oers1t0vv+b= ztT{ZbWk)M2p7Fy3t3P_3Mhk@fmEh0nx8ioq|*Ht?$xy^udlVpitG;rvQ0 zvK3Bm?K#{*A|^o?>`T5D4h+oKc5udnZ@B=JO2M59d$S4a}1?96ZMj;)O0?0;ySsj1$&bB zqF6@WB?Q-V?9st&Wp+-TgGM^c{$8ruH#z#6&p8$=YD+fdQ1!MO{+ ztqZmpWO#9rlA~gus{AlsdDcM*O1^TLC~VJCGg2Fo)sXr&x%Pdw&#eVc^`}9~fO+~( zf578hZ^rfp$xuz9fSgD-V_?~iBZlPKyIplX(|F1Ne@)nR_%zVRx{5@FTqHxSeh~ZOJghdg+F*u*AaghqAGqH+iZ2 zP-CBO^S7`2i$mR zx>fd%N(Lu2TXcc#D~>=bICL*a$0uigE6Md~@q6y+%?Xzz(Ehop7G!x;MtQxEG5TewkSn1KC z!e(m2;DtC7lS}kTwd&TMBFhVMU2|eVDx&MAQ!{r%pGQ(EX$!3yJ6c8H+hVgLshtTA z^~pr?uoGNFXrbKLN1HQRyMN2^MzgPPsZQ>9iEy?TDxInyIKOgqYVXI0Z}nAW%#K=n z&pMr%)eVsw~1y@4DfzarFDxw&s{aK z@5vUE-N!FUww}*#RJf{N=l)r|0+|un)2=3nvEtKJy}=!sEWh3@x#rbdbH{WV#B?=3 zjlX;pY0b{nKbWffqOhRed^=}EqdDF_w>gEG*RIy>K&=y9J%At@ZjRUiN~HHNY#PRC zzcjbgMZ5fueo!aAgH7cRK*+6_dG#*s#sHat1<=m{W$PSmcy}w89CjxZ@w#-sWQ%b{ z)ucw5$5UxC=!aiC^tlNzP!>``eIvK#JFbO!+|(PcT`_`q`d&QpbHiBocY-10@5@Ix`^= zom_lPx5_+bRTqWRQZ!oZa||D|%~Uf+Us7*H7PE`R|G#0X;Rx}`Ie-XMrX^)j{vPk&l9kA`xi!Phw zFL%VFhhhi4ZqU`(){p=L59^tJ0S^u+>%)V}Yj65GlCOMOdMkRnGbZQ5!AO~MY(0>L zVTD#nW`WjnZ@li5Z75&F;v-iz75uF%Qw4$Fg3DOHpvcTikH*bw1+OvFm$qs&UT^@N z5^}ScNE#=2oy@zf^jrQb``h8iD-f)Yy(E`JFE*;~-*=(q;cc@`_mBJ)45jd%$EOlr z5%v9DM#7>!2=0R)l&>}E=TGWL-m57N%%Q89Q@i`|5&iOs-2)!0*aPv2t%*n!o1OCQ zVGp*Bn=gK~-sa|dpQ295HHLwI>wdq04ci{W1YFtCgEh+s7|v-tT=IKR_@c|0VnhM9 zQ4x7enY>W6=z7GzIDeO0KtnT0JhC}wA%VM(&zfn*Nl<%wTJ!SrY=Otxac^HtjBTA2 zZjInJ%`5*zf{hl3ac~<;rr1bR$d!wW7N(>~|HgW_f>-&8JlVsy9X~-zbFSXXxZGr) zKO>AfGw=6tsHu2!(?9agT`ya1LD@rOL4QVh@_<;>5=!ptn;ktB!7d6J{qVEBI3ohU z4yvzvr*hWdP4}mdfk7deGn$w|Q8w~OExhVd4uT3|v-9|N=C1f@O<85j{?t^`6gm2H z@0x$uO*wAoH;naxr348q^iR?1FX5bRm(*&d+n~X*8mB=w=jFU;`HM^eI``snNTnJP z>Y&OW)S}32n+b{Bl#zvezK!})_EefiFs@Xg-VynDWlD*Nj;sB-_CihW<%V^gHJh$s zye_&~X54X0;IXm+imiCcyr%*XMcpSXWc~kN)%;K9ZWN2ZK{H!qt;XlSLEp-N)R7M} zC2|dtm2LqmV&Q@(G5GJJlKnyp=5x8lsy4MX<4w}r;pgtMnpWHe@z6D%xS3Bm)J1;!kA(>WX zI52*UX&`FmHddX2^#Inid?oW7Q48K97(?!Y03%(S5Ar9_6Yl~?p!-j<0U>%!8~pUM z{n6eb$WRsimN)Mc-0gCE_l3wDmj_p178M>__M(zzTk3&lKO02H65bnWm07zeD*_!e z&rPFK^mfQXfSN&F-0tq8mZ^qK%8xirp48kKiN?nlr~z5S;D;NI zQ0s|4L^WwQCI9SpRm0e|9?rl!*z6q;gQ1dVml|j67<1Ut22`z$Ej7XIg|LI<$;$Q4 zf_&RMsTF9=ZEJJB%XfCZ9@W0gJ4|UjvfhkDYb5pJSt9Qgu`<{RA$!O}Pkd3Z8g9N}bc0 zKJL&}W9bfSlNK~1F367DHrJ9q-`MZG6XPiOrykW6f5V~fjwrcP1asg zVIPhn`fLR5O2=1Jq+e^je1ZOF_z9GJ`JljJ6Hy`h4taEaxuHJAgEh{?xG7G-ft5EU zrNTJQRYOkd$Ts3G-^1<>l$fvlhf+$XQ2)L*LHU^OIaqWJnXCI6IHX|LUr@YuSf8o4l7A?NIM|t zDlJ!G#=CSo@WQ7rs3uyaVYT&?di8Q=R3z9&>PgZ#nDFCyY;p z2gtl)b|JORK^@6>*L=_U#v5Y|fkK0S=6T9j|Iv--zvI~R=X3i9W4iyZJQRWNL8w&| zSiQW;@>6VSt1&gPo?*X&O)Gv$XVUeho`y3&|HLf!?2>Zgp?ArPl?2SXyrZg=V6{k8 zucW@_fMOvU$IT0#lL?GC;sFl(8jTK!lmcH)>IaUsXaxG`_NdVO*54BoNN8nCD8-8O zZLf4r_*>^f)g%ETaYJQo4JC^#fUOYuP>`A%*p#=hKimaXg9m%qKtO=}_;XS3J@oo9 zxwd@$WkNs(YzAY90!(lMx%R68mxwvbO(4{&6%T+zhCJcb1uWvaF%Xiv*N+?@Tfl@ zQHWsF&fZWow;^LXmV?n=Ie0l%w&c$xE+f#2tQ4AC+)Ut0d%YG~Tj_CU$+6xiqjbfw zDYAgChhxu*`QUsv@WyGPxPkQ>fJM{|T7@@2j!~{h=5MWDgh2>z?73%%z}ol+L$!Wz z`+GAa*veJ_LVv1{n{UTQpI|XCTf5tsy>ZH)RjUYGSpF{ZMMgoKkSDhd=ka3RfSa3k z(iq;{zm^E?$x*?E4?9|WwPXURhrUYsxyKC4=a;8lY<`D(e^O8V&wiKc!V$LwTb8Wa zcrCl8xvu9+-WoDLPdz8dN?HHHw#<}k&!e6zuTubKZtc&2=>7}6{MigXY1sik^=uqd+E?PWP&%13P zK&@@MI;3D$s=s9D3uMmyW?&xb6)C6+5O-ZA_s-E^@dZV-zOYTt{SY~*xUlL92W8!N zWX@N^Fuannf}ajdtL&_)TGXkgG3sm^$T$(zi1mOFz`d+L4J>Zj4I+K>V6j#9I=dxp zt>%cHHsT0d3qN&2SyoGDW`kVm5kS z&4c)Y)Tt&FqHU{dKACmPd*4zGv;|sjFbaH+IU||L{1@sL_cr~O1*iCm=6S@7;e)#C zd@FZQSs^U4u}sVSAfuth@@NxnFx~gzs3*v@ZIs=Rp-z*J6H!2|J}zdD1O50k6DT`q zbcssvpGWz>LGwVccMSdQ$pm@`D(m2Xi>Mel3_oAx9C~2lkSS4KlMeXuGhTQ-1x!lM zY-Bq(4#kdzl_OmM!lZzdiC*^wu1b5rPhGV8TaH_&Xn|mgAhyU;t}1{_0;WR=sV7~7 z_;7+0c*n$V(d5kjwfm>0+ezg;_4vCz0nN#~Samt%&*>A_u9-ekc{MdcRzY*2AXsho z{3fM6ClLtB)hY|lH-BJM8mUl!A~1f zC%W1Fp2o`Erw&-4g@lcxVoIR&D!g661rM(M+JA7n;D}e9*?s3n+10ac(kHn6UI-ph z2@Pdjl!+|aY0{MHLb?T3`bXO~jPB;!2!=B`%>$CWwRb|Bd;j%~HB|!8pXCt9PD)VZ z#_TwNE9Wy8j26a!mN><4vq- zN&J0{nVX}o2FtWRXaWkMVkIN+_E$Zu$;24KL&nu^9MFrAf#(8LTy5c&HBM*RTr^~N zXU1u-LOK0UkPZA}5CH~=M@=;&WQtPKauA1~qu>2FH6><7{aPEo$#bbhJ(L*6z(j?A#RxPc3D2G% zx$RhXsTirQJ5^cXXtDbU;zfvk&(($6`}Z{eg@VkUjraWx`nm;#APP8EpR)h<1PRZ2 z2T_uYnHF960ja*zxX6KEH_i}TYs0&!h^$TWMDObL9Qz(L;*i;Y3$C>)O*hsZpGAR3 zZ7QN4-k2nKJhk`%f;^KTOpG!|UEDpJ2mDcy!ZDiCiJSR;#BUpYc<$V$8uT=wA@lFY zcp{Aa6H$d|DYb23_sC3MQsDR$->FPnXRS9yZ{{7NooYmEW{|W|Zkd?C}`}Y8yP$ zFZbF}z?Svl-(VBK*A%U79T!;~Rn10|`N{6b#|%k-!eTa^Rt|}7vV|!fe>n0}cs?(Q zue>e#wDK0nZ4W9M*y5}F6$7m1QEN5jDrJrJdpVuJct^!CPDype*jj_~0{!Q=P>lz` z|0v(gcZTiQ&p0_*n5K_!!S?5KcLGC2$U~k$ajX9Kl=rtMu=#alnzZsgEM*7!+}R^i z$&nwLV)HYM1xJt`;z>QPZleDzJyDK=@PeJFjIyap)gi!0Pk3XFp;%yx$QwA@sMFh1 zM(ERV1G90`)cUHJMq7`+*wlQ!#bCoalkQ^&pVt3S5B%@1l;0sW&FCFnH*DxQWQwNE zyN_r%zhexO`n-WMn6ryQqFp)q_dhL5&ra@AqeNSYMv(0=Ch*mjFfwZlUOP_2Lc*?S zV(B}1c>n2wX^>jlw{)Pb>^U(3I5K=kaR%d&I+H7D=X#gNk=tIBJG_oqZNM+N=mB4J;Fzmfn&D?tE1&cF*2Fkc;07PtFE1NZ z?tiYk{>SM5-w^ws_J;pis8KA@%`C?fWgLJd8ejf{JzyV9Ge*yXdx1itqEr%we(wcC z)@hzA^>!N&{wytBZB=^gv##^xG@OaPBR2T+mn)}=h6-r{?U;ISqh1Np+2cSpyduDK zr78I_|FO6(X(>?ii1&Y1vo}&X8sWP~d7||uXch{Z#*~=hRkS|>tB-j7YhU5Pgf{^X zUR`*B3|bVgea-Pq;r?0;ozqbWG@J{aJ##v}R+yZ>UCx4TtEooG`de6}9=IOXAMrBj znq`^*rP#z-TmhiRvBVU7{=_DsRS=hqNG|gb{;$H*R=lyOXQ<&y}1p7*T@;68oDBEwbf_LEdVuLGCt%?5! zGU11znQvx|IZt!$w8hpev}P88P}%6^&#F=UM?UIL*&e6XD)fdE-uW0aPm(t?n)2A6 z*m-Z8GYfDA=GJXyF^t~Nq+Ks{O}sJGPX7%m7A46cvoTk_#wuTk+-mJhdg~e9l#vSf zI|R@tde1RfsEv%HaXYNU>=T0{PIU&J`>ssq^o;(egn_8t=b8K+zeT=Ky?vPvZ=M$U zULwBv0;Jh0+0s;aG0wN%tNC}}P^eWOp5_*-MP(mQ( zKQEqp@67S%P+9A-@Tu;*0Y|q7RL%<7dE(k^T7<*k!NZqqScYivOc)) zw}m_J;lL>Ol-a}>VDInBI1dYHsV1+*r% zqjd@+IReWhSOcnSIj4YLJC|6Xb=Dba8k|;qhBtMf`Z6pnP#SX0wEZ~@z{jW4s;HCY z7WEwpiJiUEYO9C%XhbdA@wg;1W0O(Kmsk@dRlD*>c>FK1)o-x*r|0y47-V~hItw3Q zHtWXpT*ICqilV9U4?<|_qu11*@MM4!tiSePSeb4D>v&C^U8@-STw@JOqdHPF{Mu!l z6{6RW{_qmW;uq>DU+sEMwB~>g6^dcidblqM9U!_kYD8*3T%_XB!UysFDs0=~xi5mg zp@B{kfF(+9ojQnfU^@)R%B9HhW9T(&ybm?oG|Znu!?raM)>=#bfeEW~ql=fKYcti} zFw<4y2Zu6`pt9k~MJ3h=gk}^HTG`AIJZLpd{^rG;kxrZ~Eub8F&l}XZ<;PFcF0W^= zvyA|3Wf@5)r&eAvVn{|$#mN^~!|&AOp?UuuiD=j|3N0uRnzeNet^%R~{1JAdDSbs? zRzdZdbyXR<55@B|r`3hEEM%$^8}=OuOn8;eXqffXDJaq)<~rOfwcefJ6{gW5Ts^!g z{QPCqnNLWXc(?J4vo>B)IMN1k}5hulV1vg4H9a9LR2%?yc?%6W#1O?pQvNF)w>?@#U7K~QIEcW935=PEy z?YO^jKQ3JPy7}m-*B{MvKU&1sdfSgl!b=mO+jlfO4uXafoLRttIm^wP)sYm^uD!)3 z959n-44ewc)_n=|n)ED~a?|n`ef&>P`smTXc~+BTK@7em@FNa-r$hBe+qA2V&=OiGwh5I{WC!*E+(Uq+-0(9g z(l3Zt_y0OJwWbF5sfAdR>W|jZ5`Y6A@A?JyV`>^k(U9r-#UMeb>ZlI@u?2hi7tQX) zK{2bIn^a(iCnMyngG4qIrZ>!T7m-s}_M-XxyF1M{ug?OOIxIlVXLfVUqj6GXNH9jR zUT;_PL(+L8X(3GF;8$Js3+~Wk>58NMlQ$yO$$Z6k893rKZwwo%stcc>VK#t>!y0d{ zmF#Se@iZiHhe|nnxX5xYM56Z@Dh8I(eQGAbkJ~Hxac`EmR`@sV+q#8 zYV*(=OUZGo3WCDFta9^!jr=|o1`KV&!WFSdP0#faz#}tiPXf?~=KuBw_J;}0Kz}RZ zMLLX0(6@u(A$ZwEgv815t>=XWn9}RK_(-6<5)(UZJ2p2pYmk-&{wa?p+f?^ruc?-g z2GLE}AB0Y-RMd<{oK+iVWjN?bVNGagklhtxd7pW|SMe4bKwCDQ1>opmhKz$@!l0&x zjDGp%h9Y6GIzEaJW2w2T#6jhboi?U00)ah{2qm_Ha6_=8=%3bLA}x@f%# zXDI21WL$^>xY@S|0XoBJqkJBJ4;$1D5V#o@sHAI&p)qsBmb6|p}{c|9l$RkJ6AKvN9zBgnPc0I&RFNeH;7193E8f6k#ON5)>(URCe$h@ZTng3&*z4T z$p$rakN9uX2QvjW1Ni#HuCJ%m^Z80&w0d4nnTD2BLsw{OsM%!4-`>?i&!4@S=|)-NxTbU z6mbk|8AgkXaw&?{ilj#sUWId9RF`a5aV6=I8Mg!{{_A=@{qnbWX1T}U>m~r2p?R2i zXuALQ>fy|kH1x0gA>Y*P&!pLMoU0As35?f#oVR)kCpsd;PVyGguJ>(2e1Lr5KLa}8 zCo-ww#Hir_Kk)*%-A@53>!0q%=8i7?(w8`3LXs#1lVCBjfmpMThnjw$JPBNoy^#%L zYs`g-p(P=!Nz|Lat^5(UjL&P51@z^TbHC=yDOS!-#fO&Pq}St5!gC8RU&2~$Zo>w) zZZdw?^WqSLFv=N0j=655TMoK5^QV7@f@VOFs)GSfe=nV8QW(_6A4z(M*9j?Mpv0`Z zu_0gWEax~?oDV3?;`O@TaO|zv#$npwccWp@``gt&_xH^NcoAji5tsw|wm`$I18QpFEX+^BMggCcx9qfw7Zn zu>_alN?u#TzH3B>k%$B1A4x@~7!enBJ%}WAtmY^&VcKFOK`8v<(n#G6o-1v)5=_b; zY*Tjw)m_3b3clGjJc^2Cd#|5l5Yd#o*_LAhR#%NQJ!paqUeyii@9qrB9@0|@m;sK} z?_MsDi3RjgLY1y0t^f;wZe`Crq+96%s;!(C4qe~gk9;|}hqtpfUV711WR&PEK=ekt zz0Y9a>#=$2yg4O2je5QXen59OJHGc6M}~ZlyQ4e){9hoKEKW>;zYzT~RyXT< z;c&=o>^_Gq_+Srkl5Wd-4$mhX59lE=LDRw9)E;F%e5m*5c9IJ)IR_;-LvCcoctv%3 zyEbiLYq=V}D=-{PqPT#Rz6;RN0{vn*{DeX-Xzy$dyt;65c8yJ3&N-@C*C_O|wzS#U zC7HyD$%$`y>Rl|ZWeoe#HmOve)A#Gpm`=9W1;%w*6D6cLvCbafOKkz}jl$ z9V*JDTqBa&*Hy=xd9ft#Ao52NGCEfO^Ow9&cX=;H6uh`>`hSuSTcDgBZzWkmBlWJl z?VjObx8^h8D5qf_Vx-FM(goCizUy-6{ibtic1*IAUd{i>vtIUso=krnm05IU*}c{W zmP-AwVtY2BkV9mumT5Snktd)Shbrhjbg==s^p@66FNufD44gLA55V?V9wBK00yp_V z@R}%ni+}fB{+Rxc-kWZaYe6l_0>9oi2HD4V0 zN+Zh@(DNv;HBgw5jw4~XRd%zWVm7_@`jE4kv$=74xTFq7c%XMjc?>wSi#@R)ibWEr z+(7pBwH0L)NPU^5g4$$xJ_d!34$5xd%&Y+;2-&wC-3@14`^?TNWrGTYIYznZ|F<6O;pbp~Quxp5U@`UrlvIRs1P{W3790+@gT05B zN<6Bsa~Y5Sl(KI{=*N-MMTo3E*=nMRglOC5(v7xtlufzKhc?xqz2ZPUr*K7Re@(#) z$7N}guuImQ0pbuecjB|rcUdYbQ_kkX%g#Qk(*N*VMgFlV|8Baymia3{PFby-Iw7zS zz`>GOdj|6oLx1zDzYjcLY@zm4l+R%j3OajA$nw;iinQ^{Pf#V;2~e?>A*pYd0vh*w ze*&p_X27N2OwnvAe05ziX=Ctbp!%2@}rKrIU%Y}fyw+|;=zzb)*LEAFXw~nx{+a5>et?m_`I@5D@d{sXF z^4aUccdZ0aW$KcZeaETt6ph;#40BEu84w zy$xh-Y}4m z#1P?Xr|h~DX3w&Vf?8PWJMaTAB=*XmF(d<<cAOrPXd+hWPvJ97vkktA-E0i{fa z3+P7`{=G|nYY&>*)W5{lo>5|Q`{Ogu?Q2Bif#=>&G*6~JaWkCAJlUqTvX-Fr6G3|8 zcE&yQb1lfgtQ0C}v+|HTbB2RRltsS?G6CS|&gr$5&fuaW zlc@XhWtBR7FKKb`yh2{ZJ&Rg2py{x*%};GDDAm{U|2lCPFWc5YtW7)_{@~^usy=>` z`V_~C?*W>G7gxQowSiz&6uMUDDv#Z>xa2Q)EJ z0P0TI+?@UK`6qH0E)KbBAtSH42lcCUQX{Yre_7yd*&-i$0($6+aifiPEWwU4wa+XvUY8AQ`D;sYF#H{Rwlq~OGCgh_!o}*pPczWJY)XDWLVlcph!aB zS&RU5ySrQA$e#PD_G<7Wpc-DS<5Z`TC+}$XZNT~Cv(wAa8$zDB1{8ism;t*Y7Dqws z@Wh@CpsO2@crQ5asLc09nv3?^hPo>8cy{ZYUH8T2nOO>Ps8FOX zhx@YB;+$0Nw>IgZepC`9e;SzPJP5S(L;n=ahfPNr0qQIb@Jue13D&2k5A+jZ2OPkT z#Lb;UC+|d1FQ@|26bPEB8!_HH2<{5FJ^T7=id_ErG3DC8%Y2;<&BDBAuO))TXc}H_ zfd2k>f7dJ?hQ>fLHUzT6GK9vd6vDQI16J)xDr9NEwa=>^nXjq0xJG3WMn>MwT2VDh zUB`}dhN0hca5l4@!V2LbnX+4ygeegAH1Ry%2 ziu{X4;C~5~Rmh@;p>kpv_0v|gfAiUy9cO*%GA8#nW5x7g)jOBY+loVVAH4`{rgFok zM=8g=q{u7|Q{>nGI=>FaIZ}falBp*UuGD)4$x{`3jRv;Y(*d(*N06JLgcUNralcW~ zz64P*VvYpQn*-wT)kz5D*zlln{H*{Ek`Q$OcDz{mxcxKDJo{y@d`F}nwj>q?PGlN^ z`^en4#GHI0q`GoK?J+agEGpqo-!2Mx#q!|!o;teSVW7jl7sO+`!vI0DF@6%G8gkqsKoD$hh5x6a9>>jciV_7dngF$rHbieM=)cm_m|7X;aE& z6r=s8DQStd$87OZVRM}T5Eq*?F;71`j!Fn|t9U+E$-(lQ#%X$a;L%%D2PYF!M zmI)`b9O9YER`%RQzaK_3!)0$zKlos-%ZVxJ;#;z87OxZR)rq#h8D{nN4e zl~`Anfn&sGUQqI6i{1SThX%1tgrvTz{#`*qg-MOgnhQ?PXcuQ9!)_iC;yw`k?RY-o zzBUQMthOATapNEhuv_Zr)JWL?>g9CA&aSm8VT4pqf$Y@9-o2)0$RUB^3{VVUK~w5s zQ57YChCl{5Kk$PSD-fcxE?V4tkz6_~Hi(6TT}Vm9P? z!6mqepv`cn&xaqOB*5&YLXyBoWjMcK!d@jU8Kyn90O`KJm#|`dRbaCc882|>%J=qP z7i0>*M}DoaN%8&VyB;!KX^<^j7&Ce`4H!9`JZ{|I-@RP-x#SXAFaJcz$U6u5+#a-%X?t%R2?G% z4m@%G@z#-!>m-H$A3Cog3i&rBA?@50wp_1v5N4~TKig##u2%>Rh`3|;(&B0V%VUpY zR04fn7Vitj)s%%mM5x#r;i*?aM=Y0(RPG_U+at2{#~P%HOfP=F>X&lRHmqTGueg!n z&@#>1DP=LMO8Bej7dpblqS<)%Ei@+Iv|S>p9+(7W?*Qg|tn}XxpwZTBE7Tu8gWvXp zA#`nm<{8N@nVs#o<6#;H!6@_6%B^2g_oeTr2Z)!?_wU`Rd5O^(72PUwfcq(E`A_Hz79f=b_yPw61@Pm=E2a-^Hgimt($2#AJ97dUub? zNwo!;d$bS0|AgM7Bo+wK$B6RLQv%YYOUY0{4SDUaeP(%8S8FeuIuaIun7I{NHHD9gsHd6ZLDRFzza9Nl#ZilNu0?emxF5Q{*Z=7H&P!oE^lAoC4A@S4!Fx+B zLEMV(63@36Vro9ds_Lp$^#uAugs2!(*-0eMd*FNTk*O9G=YvYO4~W9+#$RBs+X4%h zugkOEiE$1qd~I(KeZLywC<2GY#axirR(G{p<>c*TxUtf}os?OrUy09OImeKKa{3|m z8$3Gx5*z;$VAB3llJ7^T#^Aw@@H0y5HJ1V|JhkEyb%+`Zzgc!$7T5yxXS4Tc$6gi< zk+WO@f=|C?6+9(8K63CfqYd?`$WH1%az##lCKYf)#i@x1ZAdAf!~%jk+ByWR;5Rq- zUntrKf32fy)U;K`acQV+^++9R7<�N*BR{uvR&BuF; zU(cA$=Uc%YIaHEc-eekO*miZivwQEgFSw87093mG*^FluDHlkO9du7@nKjBeLHfh9 zJlbcOmk*T+BMIujae(FDE2XYd&!kH9#LjU?e+Sep9tKdD|{sH<1cgRNPq| z+p_>j>tdZh9*I94lRrKC!&GEi;54Ng)%OGal>)W5npDF*hwGHOR-ShmJSu;D`yldQ za`LYD$qT@7*cT+s4oG?Rj!{VuzUmH|JM#0ZbT)La5F)Pvl3PRto%3gdMh3BEX|F?E zIe}Pb(Z`sLDp4(7m-M%wm~>xD=K-$=)NNm zi63_z(3Q%WLheabu5>Dk`9BLOpS-Phf5p=OGSV)(OxE3{@K245-h5CGwtj`6vLTA` z&9hrOqp8U`z4^fIjBoY+`=FA`S637G&Byi(Cl*GkTG}Zb4x0^Di;xw#4z~SIYVPf| zMfi``J-uG5)bI-6fRU?jsjm7xj`;muGx=%0w|ZlTjF6? zfLJ<|kHf~d9U$fYqS4q->2@_3>CR5BsMVbCFC<{gw|@rSIAjq%{)vxa+mc_07fjlN z>DHIZ-nNU9g@U*qcRjxy^{66FXS$1d24X(ifv&7f^erUwR8*Ad4uD@PT@6uU>?tz6 zhbC@E9%wTa-br^3OTRqX^DtJHfEeCfaQ^0Yvc_H0A7t9JcwevAh-L^u_f^c5J3UiH z2ass60p^8+9DmA{#0ZTA%4v-ezk~_@^r+&ZPsdp<6II)L zM6;yp-v!@Tt76+_dlEr}6k>tPtMhI5S}3K+6&4O+gX7eYIt#1ajw`?#ewU^j9~M## z{6^dX%Bf!qv3AV25{V8-$vpb4Su`y!Zed-qiZ}flM26b3J~IZknPSwL5dfE3R}D{g zJG=wgDx{4jhCOB=zn^}g;=dQG&d)G+3EPTD_V|alFGBF~{huNrh%D6Y>Au2T~JP+jd)(b9vMm-w#Dar;BGff>mc-Ptx9l&Z+>;KkEEFq&Ib_DgID6g1e@^evNtEmU< zttjS6hWH$Y)kE9Ce+ZWWxB;e?{_lU7|7&RQ|Dvj!kHv-<5AA@*rXCH;geGn{%Stdz z`$}_V8zP5`MQlzKv{r@+yR!?31 zx_YNd+wvu~*`(3xsL<;DgueY=BT5D8h@DS#?n*LdqGIL9CDCw1)eqW^eajBeKqe4@ zJ7gMXg zHk$dF6X+J=-py=)F`FfnE-Y4(QNTD=ZO0w7?m~Xxa6NcBgMkd96rh;Na=3fxy!fJ& z#pJxsVq*O6ljP=<4lCEd;>qz_K;n9-e>To+$d;b*1b?wT$u+1~epJr{vts(KT)UCQ zxH)Bz-3FLnW1|kf4bgJo=NmxWxHtOtH7}zLFx*}9F5-i2__$bRhm3CP0z+{0gPUsl z*n(0G%6?#!_txTmNVMk9!^NZ@DGnL~9SyiC<9*ONS?wrPSbY!0H7Y1%%XMV!9Z*FD z+@a~Npdl7w<;YD-XKqq+)EAzXzTKOv9)vIN#qK!Y%YJ;3&M@4pk|`*DMdA2|0)_fD zfje~!-=iAS`NOTNG!4&+p5H6Vo4uN0#W|9xH)Mt|x&$V9^(SajC($VidB$nVfvoyQ zBL~hU@ur3L{w2#9mol*MnVaDVuP@M46})7GGbokdUzPTHBFpEnMD1H#?pYZn%2+gRBQBp! z-g{-i&&V_A1a4fbMp4D!;|P?y+T32{slDO5;f>+@DvIk_St7+$QJ+0NS?N+2^NPoC z*QEjMUo_plh!KFPSdV?7EKeN`qQT2Ioh|=qDL5c2UHmgy+Qj0gU!+Xma!pR!T2Xmm zM(5)7k2L1IF(2O$k|8)fso>|(ph2S$O*~!eZgXOMtEc~PJR1+nCN7iZ%5`2b*6q{R zuao>p%?H|94A=>?!nQPehi?AkxV{Dw_lz&wb?y&N_7`V?Rj$`L+z|_0``Hb2HMDSw zOj1x9eR#ofa;gq9#f+C*C6AJf`u5FQGmtvXAckdAF!gj!5`s9nt06cC8m0MP)dG6Gm{76`Ti!O z5S{b-VBQnKX&<=1i|Qe*&MZ5^{NIythmD%;AVYKx~#3!Gd|jPAiw^4T1(RmJt$QtJzK9Z(s%!9Z9;QOQCx3n zWyw(F@Vv#`BG2&i>(`Zl7Apj~%DjBWCwJbQzL6g)eMvm;2++r(qjf0Fbg=kRXSLj} zJu+ycYqNHu429H8NYsto(U$L0E-}y$I@Y6x@mzPT=!tM!Iu8wV$vmA6|Dk zbxg%Z@xkM1+K*Nk6tckhAn;4Bi!T541tZS|&Q#K=}gkq5;x zw+z_~^d0I6+^6RiK)fMzL`|Iw_G(;>SR+hBwFDAUu zMY)^gJpma6id|$g)@w;O9XQ-fOdS;d3wp0fPt&k}OCU1zji8<2FZub?BoI;c?j!0P zenp4yDeUp)1wq5#xKD19wQt?VWE#n^+#&0z+b2`6*OFX%k_*c9`HR#guepmeE0?`T zlt(1Bb{s4_h3cv;NT2lnV;_QCmj9wbEH%Cd^sd<0b663$WNZO62&={6_S5gf6A!*` zWivA*sT6mvhipc?F@AGn%0e2<~G;T3A|I8==jgIL#=H=KX-7Gx=7woK=!UR1(%Nr&RPgk87|?( zRP6`Pzv`hCOWOs|w*+u%;WLl>F_+(Zm>N_024pWLovQWn)DA&tIec);*BZ>xyAAoE z@Yb1NPdb<7dL3vX-v#|yIrTLh*tqU)A!ICG%68O@;Ak}~28i5}8#M!u_dY}pkZ*hV zKW}|jX2HMv>;>aYifZV+u?A9+4%pn&Qi4(w#0k~!zK>3BrixU@8r=F0`|2g;<}y!P zn&J;_PS=r6;^p_VU3od}%EVMv4tY0kR7`l8~plvEwImDFET%=E1QqoP!Lp+ zc3%#_NJ^w$;6`?>&^dg+_Esi+A6%(q` zyraMH*HO~2G5V8fi0Re-ml=eBrKGbJW%==fAM(tVW?2dqG`rhGUug2WBrvu6^)sIuc9n0WCU9;&WScyw_n*nQE# zP*7$q#q_f7u*C}2+gXfjuvJA&;|Ojmpd7}% zt2Rcryj6>^x5a-n1wX$?+o}Y?!)8SrdZWQi0T=K(t?tB1Y>R^0YvK5*yM51M97(ml zo`BA~o$Vd7uQW7nQ8?*Vt>(2Cg>kDj5$&5Np~%L`lX!TrlAcJERSC4L`%a>yXTEiH z{8BGebYKaY_&NT&mB2ZBbC47lrXj>MNCGDs=>^7w%#5~Zih<;t511qB)U18I7FCrB zQp?gh87+bnSzY4i;lks&%;$6s$VM=!M!Nm^*~i=N*1F6a@MPlgl) z(A|7uF07NqPc;Tf;aitpl5f}+PVw+`*DF|P-@N*BSuF4=&$`^=p6xsSt3{#lXF3?4 zM9nzBB4K>Wpa9;wKA9xC0$JyBo&`lrGL6(7nOR*oFsQ-%U=U_vXlBQg;`%3Cii)zTQlN0#jIPA_GuX%U zXE&{SD+07}pvbzr_|0i_v}Y8k!fui^wS_rfXHgYx%TePNOk4V1aWi3IpRIIg+T#}Q z0~k`iL9|RE#N3eu`(r#X!31ze6FpCU1G>MktgJ6Mms?$IILkY?M0)SA3wnjF;CMmh zA;y4@PhwXvo3FTgNvc}ly8{~tvs8b+MQJw$tl+CG8zaN<+wLcB{gB<-i=rMQ>kd(m z1`R1QeC=yV>21+Dzq5@qYO!|I&OYYz?clsV_ldYX17|1S z$A!P1kz$oa5TybPT%0ysAOIzd+(~I0`e#y59t-}qMgB(J^d|FF0T+mgB;&}>3FIil)bXq{Q&9m{ zUs`8*qB>+Xr8w1HXR3>Zhp@WAi*pYG_1PmIohnp6Yf|N_TT{+10eMCp3Oa^2Zh?f2 zO!DEJV+rAzpjQF%0sX#ltx^6=mLrQZdm0^RA?aGH8`qnH?;oShDkE-A*8_9Er<#k| zdyR)SJh8oNt5}kSI(P4(qhj(4StEe$Xtawxz=w;cQu7GQAuDA3( zKce(Fx%W$~#2W)X1(_u1w~z&6mdag&?#rbh&(V&9*q1>fw@E4{#H<zmtf3TE;5exRrWSplHqvR)w_JTpc~Lsf+aBwXalf-Ykv@q2{dj) zao~k4rSVK@F<&r-lUaEVr#vwqy)mKstqb4cEzt=V5+4>lIYs9bnKIg-W!6Ha!3&Ru z09~MCZ{ttb!-;Z^m3ZSOY{mpu=QLzMZ5u(_p| z`j?cR;Cde+Gql6+hWPWBz{)nnBl6#yiyeuvlx_7#Hc1^FBhwREGNB3^^U}DschAr@ zj|Wb=5D=~F-E{k@J!#o*Uv^Hc-s0ttB|Qlc#?wdD-I(D!J8Ds-s5-=|T1)*t z#62FTl(Z*&R7sD{)>nC>;n}g=9%C;EW@-9+Y{|s3j}Ld;pv;6OiGSN7R8Fiq#b{@ghO*iBjHmZ-dq#KWevDA(UC2l0i1c>u8P0N-?kKi)75ON0|&t?yM{C znN`Lq5!ap|kaa9|A}WR{{vnP$Wp7Ui`K|Xh0_`W4u|+JT_>Q*Mn`#=(thb-IH=#G|C;sFp@?9Frs``Y>L6*%*w$?ZnkNA zV1E1OWqH3(gA&4wR!R8cEU#Azg;(wI$AI>5r^zw-nDsdDrzj zd-Cu^8*LE2Umr1%)aQ9-%c2^S^WjC~V>EQO&Xxq412^hKO`Qu+ncj>*f>>nWuX?{s z{V+FOk&}bl`da62A@ZgF0dCvQlqQ=kUlV z$z9{&wF~&9YU`XX93?H`L9ii7M)hi3)sF~JodqQ|4)Soc8(o$Wj1que1EQ$pS(n(; zVTHuxHtxx)%C8o}^P0!A1iJah40uYr77`!D#b*hiKL0cShXUc-+rU5}5}HFv0$17F zgkr3M@uXR5U7po~=;du@X~s~qfOc&Rc&6}F z509K>c9et}wkp#HiIA78EIg5-LT7dL>Q}86m-8<_a?{MqpfPB5oN>!0FA+qDz%xgX z1yFpIxV^DE<5!E4O{23DEJNd+;o{OAS&r`$9L_+FjSg7K6YFu!z{GE>Suig&y;*J0 z>ki$=LG7xVF5ir*QwdU?+vFEBL00Xu+dDUJ0o^^4^wHg zj_{1<$TPRaB!zTI$6kd-WA8G~qD=b9Z1}s4DYk3Na}+;UcA~9DzcNp5#Aa?^#r@qt zo%=%{oaV*6a>sv6Y4N(ZvqaBFPJL62*6S)fa|>)tmY$704IR8yiZ>4RQgpUe8_GJ1 zwKUZzzkQWoUNG~r>TUXTpzciUyJs(kV>$>i7lOm8<57t|v#tte)@3mZrfOBl&XjXt zuZ39qDem~ykhfPHQz-A1xIRI}WjtD7RBEN^^#Z>sgfK z5f(O(4J+$H^5WU|7v}D9(SWjoM2WjNtHz)!6-3xr6@1b$i(EXYPt8>m3=8^?hKg zv~d?xXVzdk(c3r`3F8Qmn(I9Zjewx>n^7D_gHjdi0?w*!K*yXa1S0rEcC8`lCi=%4 z?Gof!)pG&eZ-+yG+v4isPkI=@G3Xg+ri15bksp!;F*`t>R#`~SD%Kw#>5a4^$ z>dpIt6ax71YHwnjr%prB+Cz03g`Pc6j?JnMnLHru@w^Kh=PoshHtfO_pFkJV;@*%INXdShxK?%=PMLq>bleNt14`&q<+_!`rDtiR3GOgWt z;W&wY#!NRsRBYtG>TG;BL1jobmWNnCg(y!HFILA`=)1Yzm@N1Xr_qtvr;|&(-b&-o z1Pp@89#p=lK4YP#-(T$M2xIOp{FY!||OLg;dDD+GRcm zbu}Cb;K~cQJX8@imy6XDGY`zwg~$yYf5S8?b<*bka9z_4J=2V`q+0yxIX%35=I3Nyj8M@fbiYP4FLMcsIs!edjdX??%cjR4)dRu(_cPQ$-fM0O%-&1~{u z0^~;OjB_#2^xf$gMf&%?lh*w$CmNndOV7%>pUS8?tzrY1@(3c=^b|+#ivYv9VnGc& zZN+`34aqrmL4f%g7O%b@o!#-H1H@u|&dJ_0(-eY9P)d$$m1lkPYJY407|Gbeh$p&@OM zGrMqBog8yB`)lLNHQ9c1rGp#ULy1hqRTajspRsC0GryAy3G1Mrf>m(D?83uAv`R2M zPjl3%6%<$BE|co8%eguc$4C74CJP=Wj%S@`iJL48kOn5(WX(oTkp(T;@Ii@1#H8;7 ziczaPitpm@qgJ}^1iR;7_|d|OKp9=ADxsYl4I%5ptUx@_(X+99`W@wkR1CpLn(IOR>Zj z>&odRPFVvgx6U*NS%_eHN*UbWR-0#`X+`&;_RWJ)J#9kINpo_EP4t|h6NmK$ca@+3 zqVYrG1YT-Zs-fPs4P*Oydbkce>-YgtFG^*JUF)H5Ucm(JFub*_;CF61)AjjbRT8`Y zsWqF|I=Ax!F5#76rAw_h1-hF!qGm0OXHVx8w7T+z1|Q5zMe0A2Og!ovf5atn`TQ~8 zTW7o(Xh_Mkx$AU5cisnOP*|(wl>txK+3~SXHvn?n<2o-ii>ti*dKrHHM^0q8YofYr zGm(|>5hsgm*bIlU1j#^SAK(_@bJ54U4eX*g6X%mAZ@6$@eC>0l?HSMe_`DP2<8O$z zk;n6=U85kNonBV5@<^Sz@@3QI+&if{1M@@Tn~$S3&Dg(IeY}K3Y?ZL92`tRrDdXrA zirl8nCWg#RHA;mj3***vqsX#&|7IAo)6VMkwK{p9qDNEg;T(Y=gO`urnxQio;=dDm z_%(e)kJ8ZpgvACa1#lBBQ~b%d@JSys0Y>DWZ3PDh12QT)?I`elj4_4FKD*SzIXmI+e} z)v{)p2mCh#pL1DosWTRxI3E&xgUz8XY8n(G1Bu8uuEaMDKP{K%Sudy^&ZAd+K1%PA z+dIwoO{y$Id^vKGgMQ4K-}LU zRY?^mEea<-sjYOzeSi7#l#uhb;OU23H;S8%a4Lmps?hk#hf$A`nQ>~bL2b%R0G`|V ut!|1ObU&y>R`a`_efnt52oQz02Fv^O`1PFopMz)ql(YDM@QnVivHt;7^4Kr{ literal 0 HcmV?d00001 diff --git a/nest_text_classification/index.html b/nest_text_classification/index.html new file mode 100644 index 0000000000..a3fa50b3af --- /dev/null +++ b/nest_text_classification/index.html @@ -0,0 +1,237 @@ + + + + + + + + + + + + + + + + + +
+
+ + + + + + + diff --git a/nest_text_classification/infer.py b/nest_text_classification/infer.py new file mode 100644 index 0000000000..2d66b76aa8 --- /dev/null +++ b/nest_text_classification/infer.py @@ -0,0 +1,82 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- +import sys +import os +import gzip + +import paddle.v2 as paddle + +import reader +from network_conf import nest_net +from utils import logger + + +def infer(data_path, model_path, word_dict_path, batch_size, class_num): + def _infer_a_batch(inferer, test_batch, ids_2_word): + probs = inferer.infer(input=test_batch, field=["value"]) + assert len(probs) == len(test_batch) + for word_ids, prob in zip(test_batch, probs): + sent_ids = [] + for sent in word_ids[0]: + sent_ids.extend(sent) + word_text = " ".join([ids_2_word[id] for id in sent_ids]) + print("%s\t%s\t%s" % (prob.argmax(), + " ".join(["{:0.4f}".format(p) + for p in prob]), word_text)) + + logger.info("begin to predict...") + use_default_data = (data_path is None) + + if use_default_data: + word_dict = reader.imdb_word_dict() + word_reverse_dict = dict((value, key) + for key, value in word_dict.iteritems()) + test_reader = reader.imdb_test(word_dict) + class_num = 2 + else: + assert os.path.exists( + word_dict_path), "the word dictionary file does not exist" + + word_dict = reader.load_dict(word_dict_path) + word_reverse_dict = dict((value, key) + for key, value in word_dict.iteritems()) + + test_reader = reader.infer_reader(data_path, word_dict)() + + dict_dim = len(word_dict) + prob_layer = nest_net(dict_dim, class_num=class_num, is_infer=True) + + # initialize PaddlePaddle + paddle.init(use_gpu=True, trainer_count=4) + + # load the trained models + parameters = paddle.parameters.Parameters.from_tar( + gzip.open(model_path, "r")) + inferer = paddle.inference.Inference( + output_layer=prob_layer, parameters=parameters) + + test_batch = [] + for idx, item in enumerate(test_reader): + test_batch.append([item[0]]) + if len(test_batch) == batch_size: + _infer_a_batch(inferer, test_batch, word_reverse_dict) + test_batch = [] + + if len(test_batch): + _infer_a_batch(inferer, test_batch, word_reverse_dict) + test_batch = [] + + +if __name__ == "__main__": + model_path = "models/params_pass_00000.tar.gz" + assert os.path.exists(model_path), "the trained model does not exist." + + infer_path = None + word_dict = None + + infer( + data_path=infer_path, + word_dict_path=word_dict, + model_path=model_path, + batch_size=10, + class_num=2) diff --git a/nest_text_classification/network_conf.py b/nest_text_classification/network_conf.py new file mode 100644 index 0000000000..2b86431c1b --- /dev/null +++ b/nest_text_classification/network_conf.py @@ -0,0 +1,45 @@ +import paddle.v2 as paddle + + +def cnn_cov_group(group_input, hidden_size): + conv3 = paddle.networks.sequence_conv_pool( + input=group_input, context_len=3, hidden_size=hidden_size) + conv4 = paddle.networks.sequence_conv_pool( + input=group_input, context_len=4, hidden_size=hidden_size) + output_group = paddle.layer.fc( + input=[conv3, conv4], + size=hidden_size, + param_attr=paddle.attr.ParamAttr(name='_cov_value_weight'), + bias_attr=paddle.attr.ParamAttr(name='_cov_value_bias'), + act=paddle.activation.Linear()) + return output_group + + +def nest_net(dict_dim, + emb_size=28, + hidden_size=128, + class_num=2, + is_infer=False): + + data = paddle.layer.data( + "word", paddle.data_type.integer_value_sub_sequence(dict_dim)) + + emb = paddle.layer.embedding(input=data, size=emb_size) + nest_group = paddle.layer.recurrent_group( + input=[paddle.layer.SubsequenceInput(emb), hidden_size], + step=cnn_cov_group) + avg_pool = paddle.layer.pooling( + input=nest_group, + pooling_type=paddle.pooling.Avg(), + agg_level=paddle.layer.AggregateLevel.TO_NO_SEQUENCE) + prob = paddle.layer.mixed( + size=class_num, + input=[paddle.layer.full_matrix_projection(input=avg_pool)], + act=paddle.activation.Softmax()) + if is_infer == False: + label = paddle.layer.data("label", + paddle.data_type.integer_value(class_num)) + cost = paddle.layer.classification_cost(input=prob, label=label) + return cost, prob, label + + return prob diff --git a/nest_text_classification/reader.py b/nest_text_classification/reader.py new file mode 100644 index 0000000000..e880f1a355 --- /dev/null +++ b/nest_text_classification/reader.py @@ -0,0 +1,252 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- +""" +IMDB dataset. + +This module downloads IMDB dataset from +http://ai.stanford.edu/%7Eamaas/data/sentiment/. This dataset contains a set +of 25,000 highly polar movie reviews for training, and 25,000 for testing. +Besides, this module also provides API for building dictionary. +""" +import collections +import tarfile +import Queue +import re +import string +import threading +import os + +import paddle.v2.dataset.common + +URL = 'http://ai.stanford.edu/%7Eamaas/data/sentiment/aclImdb_v1.tar.gz' +MD5 = '7c2ac02c03563afcf9b574c7e56c153a' + + +def tokenize(pattern): + """ + Read files that match the given pattern. Tokenize and yield each file. + """ + with tarfile.open( + paddle.v2.dataset.common.download(URL, 'imdb', MD5)) as tarf: + tf = tarf.next() + while tf != None: + if bool(pattern.match(tf.name)): + # newline and punctuations removal and ad-hoc tokenization. + docs = tarf.extractfile(tf).read().rstrip("\n\r").lower().split( + '.') + doc_list = [] + for doc in docs: + doc = doc.strip() + if doc: + doc_without_punc = doc.translate( + None, string.punctuation).strip() + if doc_without_punc: + doc_list.append( + [word for word in doc_without_punc.split()]) + yield doc_list + tf = tarf.next() + + +def imdb_build_dict(pattern, cutoff): + """ + Build a word dictionary from the corpus. Keys of the dictionary are words, + and values are zero-based IDs of these words. + """ + word_freq = collections.defaultdict(int) + for doc_list in tokenize(pattern): + for doc in doc_list: + for word in doc: + word_freq[word] += 1 + + word_freq[''] = cutoff + 1 + word_freq = filter(lambda x: x[1] > cutoff, word_freq.items()) + dictionary = sorted(word_freq, key=lambda x: (-x[1], x[0])) + words, _ = list(zip(*dictionary)) + word_idx = dict(zip(words, xrange(len(words)))) + return word_idx + + +def reader_creator(pos_pattern, neg_pattern, word_idx, buffer_size): + UNK = word_idx[''] + + qs = [Queue.Queue(maxsize=buffer_size), Queue.Queue(maxsize=buffer_size)] + + def load(pattern, queue): + for doc_list in tokenize(pattern): + queue.put(doc_list) + queue.put(None) + + def reader(): + # Creates two threads that loads positive and negative samples + # into qs. + t0 = threading.Thread(target=load, args=(pos_pattern, qs[0], )) + t0.daemon = True + t0.start() + + t1 = threading.Thread(target=load, args=(neg_pattern, qs[1], )) + t1.daemon = True + t1.start() + + # Read alternatively from qs[0] and qs[1]. + i = 0 + doc_list = qs[i].get() + + while doc_list != None: + ids_list = [] + for doc in doc_list: + ids_list.append([word_idx.get(w, UNK) for w in doc]) + yield ids_list, i % 2 + i += 1 + doc_list = qs[i % 2].get() + + # If any queue is empty, reads from the other queue. + i += 1 + doc_list = qs[i % 2].get() + while doc_list != None: + ids_list = [] + for doc in doc_list: + ids_list.append([word_idx.get(w, UNK) for w in doc]) + yield ids_list, i % 2 + doc_list = qs[i % 2].get() + + return reader() + + +def imdb_train(word_idx): + """ + IMDB training set creator. + + It returns a reader creator, each sample in the reader is an zero-based ID + subsequence and label in [0, 1]. + + :param word_idx: word dictionary + :type word_idx: dict + :return: Training reader creator + :rtype: callable + """ + return reader_creator( + re.compile("aclImdb/train/pos/.*\.txt$"), + re.compile("aclImdb/train/neg/.*\.txt$"), word_idx, 1000) + + +def imdb_test(word_idx): + """ + IMDB test set creator. + + It returns a reader creator, each sample in the reader is an zero-based ID + subsequence and label in [0, 1]. + + :param word_idx: word dictionary + :type word_idx: dict + :return: Test reader creator + :rtype: callable + """ + return reader_creator( + re.compile("aclImdb/test/pos/.*\.txt$"), + re.compile("aclImdb/test/neg/.*\.txt$"), word_idx, 1000) + + +def imdb_word_dict(): + """ + Build a word dictionary from the corpus. + + :return: Word dictionary + :rtype: dict + """ + return imdb_build_dict( + re.compile("aclImdb/((train)|(test))/((pos)|(neg))/.*\.txt$"), 150) + + +def build_dict(data_dir, save_path, use_col=1, cutoff_fre=1): + values = collections.defaultdict(int) + + for file_name in os.listdir(data_dir): + file_path = os.path.join(data_dir, file_name) + if not os.path.isfile(file_path): + continue + with open(file_path, "r") as fdata: + for line in fdata: + line_splits = line.strip().split("\t") + if len(line_splits) < use_col: + continue + doc = line_splits[use_col] + for sent in doc.strip().split("."): + for w in sent.split(): + values[w] += 1 + + values[''] = cutoff_fre + with open(save_path, "w") as f: + for v, count in sorted( + values.iteritems(), key=lambda x: x[1], reverse=True): + if count < cutoff_fre: + break + f.write("%s\t%d\n" % (v, count)) + + +def load_dict(dict_path): + return dict((line.strip().split("\t")[0], idx) + for idx, line in enumerate(open(dict_path, "r").readlines())) + + +def train_reader(data_dir, word_dict): + """ + Reader interface for training data + + :param data_dir: data directory + :type data_dir: str + :param word_dict: path of word dictionary, + the dictionary must has a "UNK" in it. + :type word_dict: Python dict + """ + + def reader(): + UNK_ID = word_dict[''] + word_col = 1 + lbl_col = 0 + + for file_name in os.listdir(data_dir): + file_path = os.path.join(data_dir, file_name) + if not os.path.isfile(file_path): + continue + with open(file_path, "r") as f: + for line in f: + line_split = line.strip().split("\t") + doc = line_split[word_col] + doc_ids = [] + for sent in doc.strip().split("."): + sent_ids = [ + word_dict.get(w, UNK_ID) for w in sent.split() + ] + if sent_ids: + doc_ids.append(sent_ids) + + yield doc_ids, int(line_split[lbl_col]) + + return reader + + +def infer_reader(file_path, word_dict): + """ + Reader interface for prediction + + :param data_dir: data directory + :type data_dir: str + :param word_dict: path of word dictionary, + the dictionary must has a "UNK" in it. + :type word_dict: Python dict + """ + + def reader(): + UNK_ID = word_dict[''] + + with open(file_path, "r") as f: + for doc in f: + doc_ids = [] + for sent in doc.strip().split("."): + sent_ids = [word_dict.get(w, UNK_ID) for w in sent.split()] + if sent_ids: + doc_ids.append(sent_ids) + + yield doc_ids, doc + + return reader diff --git a/nest_text_classification/train.py b/nest_text_classification/train.py new file mode 100644 index 0000000000..3704e2a987 --- /dev/null +++ b/nest_text_classification/train.py @@ -0,0 +1,163 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- +import os +import sys +import gzip + +import paddle.v2 as paddle + +import reader +from network_conf import nest_net +from utils import logger, parse_train_cmd + + +def train(train_data_dir=None, + test_data_dir=None, + word_dict_path=None, + model_save_dir="models", + batch_size=32, + num_passes=10): + """ + :params train_data_path: path of training data, if this parameter + is not specified, imdb dataset will be used to run this example + :type train_data_path: str + :params test_data_path: path of testing data, if this parameter + is not specified, imdb dataset will be used to run this example + :type test_data_path: str + :params word_dict_path: path of training data, if this parameter + is not specified, imdb dataset will be used to run this example + :type word_dict_path: str + :params model_save_dir: dir where models saved + :type num_pass: str + :params batch_size: train batch size + :type num_pass: int + :params num_pass: train pass number + :type num_pass: int + """ + if not os.path.exists(model_save_dir): + os.mkdir(model_save_dir) + + use_default_data = (train_data_dir is None) + + if use_default_data: + logger.info(("No training data are porivided, " + "use imdb to train the model.")) + logger.info("please wait to build the word dictionary ...") + + word_dict = reader.imdb_word_dict() + + train_reader = paddle.batch( + paddle.reader.shuffle( + lambda: reader.imdb_train(word_dict), buf_size=1000), + batch_size=100) + test_reader = paddle.batch( + lambda: reader.imdb_test(word_dict), batch_size=100) + class_num = 2 + else: + if word_dict_path is None or not os.path.exists(word_dict_path): + logger.info(("word dictionary is not given, the dictionary " + "is automatically built from the training data.")) + + # build the word dictionary to map the original string-typed + # words into integer-typed index + reader.build_dict( + data_dir=train_data_dir, + save_path=word_dict_path, + use_col=1, + cutoff_fre=0) + + word_dict = reader.load_dict(word_dict_path) + class_num = args.class_num + logger.info("class number is : %d." % class_num) + + train_reader = paddle.batch( + paddle.reader.shuffle( + reader.train_reader(train_data_dir, word_dict), buf_size=1000), + batch_size=batch_size) + + if test_data_dir is not None: + # here, because training and testing data share a same format, + # we still use the reader.train_reader to read the testing data. + test_reader = paddle.batch( + paddle.reader.shuffle( + reader.train_reader(test_data_dir, word_dict), + buf_size=1000), + batch_size=batch_size) + else: + test_reader = None + + dict_dim = len(word_dict) + emb_size = 28 + hidden_size = 128 + + logger.info("length of word dictionary is : %d." % (dict_dim)) + + paddle.init(use_gpu=True, trainer_count=4) + + # network config + cost, prob, label = nest_net( + dict_dim, emb_size, hidden_size, class_num, is_infer=False) + + # create parameters + parameters = paddle.parameters.create(cost) + + # create optimizer + adam_optimizer = paddle.optimizer.Adam( + learning_rate=1e-3, + regularization=paddle.optimizer.L2Regularization(rate=1e-3), + model_average=paddle.optimizer.ModelAverage(average_window=0.5)) + + # create trainer + trainer = paddle.trainer.SGD( + cost=cost, + extra_layers=paddle.evaluator.auc(input=prob, label=label), + parameters=parameters, + update_equation=adam_optimizer) + + # begin training network + feeding = {"word": 0, "label": 1} + + def _event_handler(event): + """ + Define end batch and end pass event handler + """ + if isinstance(event, paddle.event.EndIteration): + if event.batch_id % 100 == 0: + logger.info("Pass %d, Batch %d, Cost %f, %s\n" % ( + event.pass_id, event.batch_id, event.cost, event.metrics)) + + if isinstance(event, paddle.event.EndPass): + if test_reader is not None: + result = trainer.test(reader=test_reader, feeding=feeding) + logger.info("Test at Pass %d, %s \n" % (event.pass_id, + result.metrics)) + with gzip.open( + os.path.join(model_save_dir, "params_pass_%05d.tar.gz" % + event.pass_id), "w") as f: + parameters.to_tar(f) + + trainer.train( + reader=train_reader, + event_handler=_event_handler, + feeding=feeding, + num_passes=num_passes) + + logger.info("Training has finished.") + + +def main(args): + train( + train_data_dir=args.train_data_dir, + test_data_dir=args.test_data_dir, + word_dict_path=args.word_dict, + batch_size=args.batch_size, + num_passes=args.num_passes, + model_save_dir=args.model_save_dir) + + +if __name__ == "__main__": + args = parse_train_cmd() + if args.train_data_dir is not None: + assert args.word_dict, ("the parameter train_data_dir, word_dict_path " + "should be set at the same time.") + main(args) diff --git a/nest_text_classification/utils.py b/nest_text_classification/utils.py new file mode 100644 index 0000000000..79ac2f9f97 --- /dev/null +++ b/nest_text_classification/utils.py @@ -0,0 +1,61 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- +import logging +import os +import argparse +from collections import defaultdict + +logger = logging.getLogger("paddle") +logger.setLevel(logging.INFO) + + +def parse_train_cmd(): + parser = argparse.ArgumentParser( + description="PaddlePaddle text classification demo") + parser.add_argument( + "--train_data_dir", + type=str, + required=False, + help=("path of training dataset (default: None). " + "if this parameter is not set, " + "imdb dataset will be used."), + default=None) + parser.add_argument( + "--test_data_dir", + type=str, + required=False, + help=("path of testing dataset (default: None). " + "if this parameter is not set, " + "imdb dataset will be used."), + default=None) + parser.add_argument( + "--word_dict", + type=str, + required=False, + help=("path of word dictionary (default: None)." + "if this parameter is not set, imdb dataset will be used." + "if this parameter is set, but the file does not exist, " + "word dictionay will be built from " + "the training data automatically."), + default=None) + parser.add_argument( + "--class_num", + type=int, + required=False, + help=("class number."), + default=2) + parser.add_argument( + "--batch_size", + type=int, + default=32, + help="the number of training examples in one forward/backward pass") + parser.add_argument( + "--num_passes", type=int, default=10, help="number of passes to train") + parser.add_argument( + "--model_save_dir", + type=str, + required=False, + help=("path to save the trained models."), + default="models") + + return parser.parse_args() From 43098129fc63469460cfbbec51cdeb6548f1984f Mon Sep 17 00:00:00 2001 From: peterzhang2029 Date: Thu, 12 Oct 2017 16:49:03 +0800 Subject: [PATCH 2/8] update repo name and dir structure --- nested_sequence/README.md | 1 + .../text_classification}/README.md | 0 .../text_classification}/data/infer.txt | 0 .../text_classification}/data/test_data/test.txt | 0 .../text_classification}/data/train_data/train.txt | 0 .../text_classification}/images/model.jpg | Bin .../text_classification}/index.html | 0 .../text_classification}/infer.py | 0 .../text_classification}/network_conf.py | 0 .../text_classification}/reader.py | 0 .../text_classification}/train.py | 0 .../text_classification}/utils.py | 0 12 files changed, 1 insertion(+) create mode 100644 nested_sequence/README.md rename {nest_text_classification => nested_sequence/text_classification}/README.md (100%) rename {nest_text_classification => nested_sequence/text_classification}/data/infer.txt (100%) rename {nest_text_classification => nested_sequence/text_classification}/data/test_data/test.txt (100%) rename {nest_text_classification => nested_sequence/text_classification}/data/train_data/train.txt (100%) rename {nest_text_classification => nested_sequence/text_classification}/images/model.jpg (100%) rename {nest_text_classification => nested_sequence/text_classification}/index.html (100%) rename {nest_text_classification => nested_sequence/text_classification}/infer.py (100%) rename {nest_text_classification => nested_sequence/text_classification}/network_conf.py (100%) rename {nest_text_classification => nested_sequence/text_classification}/reader.py (100%) rename {nest_text_classification => nested_sequence/text_classification}/train.py (100%) rename {nest_text_classification => nested_sequence/text_classification}/utils.py (100%) diff --git a/nested_sequence/README.md b/nested_sequence/README.md new file mode 100644 index 0000000000..f6a09ed22d --- /dev/null +++ b/nested_sequence/README.md @@ -0,0 +1 @@ +[TBD] diff --git a/nest_text_classification/README.md b/nested_sequence/text_classification/README.md similarity index 100% rename from nest_text_classification/README.md rename to nested_sequence/text_classification/README.md diff --git a/nest_text_classification/data/infer.txt b/nested_sequence/text_classification/data/infer.txt similarity index 100% rename from nest_text_classification/data/infer.txt rename to nested_sequence/text_classification/data/infer.txt diff --git a/nest_text_classification/data/test_data/test.txt b/nested_sequence/text_classification/data/test_data/test.txt similarity index 100% rename from nest_text_classification/data/test_data/test.txt rename to nested_sequence/text_classification/data/test_data/test.txt diff --git a/nest_text_classification/data/train_data/train.txt b/nested_sequence/text_classification/data/train_data/train.txt similarity index 100% rename from nest_text_classification/data/train_data/train.txt rename to nested_sequence/text_classification/data/train_data/train.txt diff --git a/nest_text_classification/images/model.jpg b/nested_sequence/text_classification/images/model.jpg similarity index 100% rename from nest_text_classification/images/model.jpg rename to nested_sequence/text_classification/images/model.jpg diff --git a/nest_text_classification/index.html b/nested_sequence/text_classification/index.html similarity index 100% rename from nest_text_classification/index.html rename to nested_sequence/text_classification/index.html diff --git a/nest_text_classification/infer.py b/nested_sequence/text_classification/infer.py similarity index 100% rename from nest_text_classification/infer.py rename to nested_sequence/text_classification/infer.py diff --git a/nest_text_classification/network_conf.py b/nested_sequence/text_classification/network_conf.py similarity index 100% rename from nest_text_classification/network_conf.py rename to nested_sequence/text_classification/network_conf.py diff --git a/nest_text_classification/reader.py b/nested_sequence/text_classification/reader.py similarity index 100% rename from nest_text_classification/reader.py rename to nested_sequence/text_classification/reader.py diff --git a/nest_text_classification/train.py b/nested_sequence/text_classification/train.py similarity index 100% rename from nest_text_classification/train.py rename to nested_sequence/text_classification/train.py diff --git a/nest_text_classification/utils.py b/nested_sequence/text_classification/utils.py similarity index 100% rename from nest_text_classification/utils.py rename to nested_sequence/text_classification/utils.py From 9c955cc0aa83cba7dfa1a9611d064a22d48e6d24 Mon Sep 17 00:00:00 2001 From: peterzhang2029 Date: Fri, 13 Oct 2017 12:14:49 +0800 Subject: [PATCH 3/8] refine introduction in doc --- nested_sequence/text_classification/README.md | 23 +- .../text_classification/index.html | 237 ------------------ 2 files changed, 14 insertions(+), 246 deletions(-) delete mode 100644 nested_sequence/text_classification/index.html diff --git a/nested_sequence/text_classification/README.md b/nested_sequence/text_classification/README.md index 0950564b57..aee0bf7148 100644 --- a/nested_sequence/text_classification/README.md +++ b/nested_sequence/text_classification/README.md @@ -1,29 +1,34 @@ -# 双层序列文本分类 +# 基于双层序列的文本分类 ## 简介 -序列数据是自然语言处理任务面对的一种主要输入数据类型:一句话是由词语构成的序列,多句话进一步构成了段落。因此,段落可以看作是一个嵌套的双层的序列,这个序列的每个元素又是一个序列。 +序列是自然语言处理任务面对的一种主要输入数据类型:句子由词语构成,而多个句子进一步构成了段落。因此,段落可以看作是一个嵌套的序列(或者叫作:双层序列),这个序列的每个元素又是一个序列。 -双层序列是`PaddlePaddle`支持的一种非常灵活的数据组织方式,帮助我们更好地描述段落、多轮对话等更为复杂的语言数据。基于双层序列输入,我们可以设计一个层次化的网络,分别从词语和句子级别编码输入数据,更好地完成一些复杂的语言理解任务。 +双层序列是 PaddlePaddle 支持的一种非常灵活的数据组织方式,能够帮助我们更好地描述段落、多轮对话等更为复杂的语言数据。以双层序列作为输入,我们可以设计一个层次化的网络,分别从词语和句子级别编码输入数据,从而更好地完成一些复杂的语言理解任务。 -本示例将演示如何使用`PaddlePaddle`来组织双层序列文本数据,完成文本分类任务。 +本例将演示如何在 PaddlePaddle 中将长文本输入(通常能达到段落或者篇章基本)组织为双层序列,完成对长文本的分类任务。 ## 模型介绍 -对于文本分类,我们将一段文本看成句子的数组,每个句子又是单词的数组,这便是一种双层序列的输入数据。而将这个段落的每一句话用卷积神经网络编码为一个向量,再将每句话的表示向量经过池化层编码成一个段落的向量, 即可得到段落的表示向量。对于分类任务,将段落表示向量作为分类器的输入可以得到分类结果。 +我们将一段文本看成句子的序列,而每个句子又是词语的序列。 + +我们首先用卷积神经网络编码段落中的每一句话;然后,将每句话的表示向量经过池化层得到段落的编码向量;最后将段落的编码向量作为分类器(以softmax层的全连接层)输入,得到最终的分类结果。 **模型结构如下图所示**


-图1. 本例中的文本分类模型 +图1. 基于双层序列的文本分类模型

PaddlePaddle 实现该网络结构的代码见 `network_conf.py`。 -对于双层序列的处理,需要先将双层时间序列数据先变换成单层时间序列数据,再对每一个单层时间序列进行处理。 PaddlePaddle提供了 `recurrent_group` 接口进行转换,在本例中,我们将文本数据的每一段,通过 recurrent_group 进行拆解,拆解成的每一句话再通过一个 CNN网络学习对应的向量表示。 +对双层时间序列的处理,需要先将双层时间序列数据变换成单层时间序列数据,再对每一个单层时间序列进行处理。 在 PaddlePaddle 中 ,`recurrent_group` 是帮助我们构建处理双层序列的层次化模型的主要工具。这里,我们使用两个嵌套的 `recurrent_group` 。外层的 `recurrent_group` 将段落拆解为句子,`step` 函数中拿到的输入是句子序列;内层的 `recurrent_group` 将句子拆解为词语,`step` 函数中拿到的输入是非序列的词语。 + +在词语级别,我们通过 CNN 网络以词向量为输入输出学习到的句子表示;在段落级别,将每个句子的表示通过池化作用得到段落表示。 + ``` python nest_group = paddle.layer.recurrent_group(input=[paddle.layer.SubsequenceInput(emb), hidden_size], step=cnn_cov_group) ``` -使用`recurrent_group`接口进行变换时,需要将输入序列传入 `input` 属性。 由于本例要实现的变换是`双层时间序列 => 单层时间序列`,所以我们需要将输入数据标记成 `SubsequenceInput`。 + 拆解后的单层序列数据经过一个CNN网络学习对应的向量表示,CNN的网络结构包含以下部分: @@ -77,7 +82,7 @@ python infer.py ### 训练 1.数据组织 -假设有如下格式的训练数据:每一行为一条样本,以 `\t` 分隔,第一列是类别标签,第二列是输入文本的内容。以下是两条示例数据: +输入数据格式如下:每一行为一条样本,以 `\t` 分隔,第一列是类别标签,第二列是输入文本的内容。以下是两条示例数据: ``` 1 This movie is very good. The actor is so handsome. diff --git a/nested_sequence/text_classification/index.html b/nested_sequence/text_classification/index.html deleted file mode 100644 index a3fa50b3af..0000000000 --- a/nested_sequence/text_classification/index.html +++ /dev/null @@ -1,237 +0,0 @@ - - - - - - - - - - - - - - - - - -
-
- - - - - - - From 28dd91639ba2750c85b0943c004174958e3d772d Mon Sep 17 00:00:00 2001 From: peterzhang2029 Date: Fri, 13 Oct 2017 18:09:34 +0800 Subject: [PATCH 4/8] replace argparse with click and update example date --- nested_sequence/text_classification/README.md | 66 +++-- .../text_classification/data/infer.txt | 8 +- .../data/test_data/test.txt | 8 +- .../data/train_data/train.txt | 8 +- .../text_classification/index.html | 250 ++++++++++++++++++ nested_sequence/text_classification/infer.py | 51 ++-- .../text_classification/network_conf.py | 4 + nested_sequence/text_classification/reader.py | 33 --- nested_sequence/text_classification/train.py | 83 ++++-- nested_sequence/text_classification/utils.py | 82 ++---- 10 files changed, 419 insertions(+), 174 deletions(-) create mode 100644 nested_sequence/text_classification/index.html diff --git a/nested_sequence/text_classification/README.md b/nested_sequence/text_classification/README.md index aee0bf7148..e066a48065 100644 --- a/nested_sequence/text_classification/README.md +++ b/nested_sequence/text_classification/README.md @@ -135,44 +135,52 @@ def train_reader(data_dir, word_dict): `train.py`训练脚本中包含以下参数: ``` ---train_data_dir TRAIN_DATA_DIR - path of training dataset (default: None). if this - parameter is not set, imdb dataset will be used. ---test_data_dir TEST_DATA_DIR - path of testing dataset (default: None). if this - parameter is not set, imdb dataset will be used. ---word_dict WORD_DICT - path of word dictionary (default: None).if this - parameter is not set, imdb dataset will be used.if - this parameter is set, but the file does not exist, - word dictionay will be built from the training data - automatically. ---class_num CLASS_NUM - class number. ---batch_size BATCH_SIZE - the number of training examples in one - forward/backward pass ---num_passes NUM_PASSES - number of passes to train ---model_save_dir MODEL_SAVE_DIR - path to save the trained models. +Options: + --train_data_dir TEXT path of training dataset (default: None). if this + parameter is not set, imdb dataset will be used. + --test_data_dir TEXT path of testing dataset (default: None). if this + parameter is not set, imdb dataset will be used. + --word_dict_path TEXT path of word dictionary (default: None).if this + parameter is not set, imdb dataset will be used.if + this parameter is set, but the file does not exist, + word dictionay will be built from the training data + automatically. + --class_num INTEGER class number (default: 2). + --batch_size INTEGER the number of training examples in one batch + (default: 32). + --num_passes INTEGER number of passes to train (default: 10). + --model_save_dir TEXT path to save the trained models (default: 'models'). + --help Show this message and exit. ``` 修改`train.py`脚本中的启动参数,可以直接运行本例。 以`data`目录下的示例数据为例,在终端执行: ```bash -python train.py --train_data_dir 'data/train_data' --test_data_dir 'data/test_data' --word_dict 'dict.txt' +python train.py --train_data_dir 'data/train_data' --test_data_dir 'data/test_data' --word_dict_path 'dict.txt' ``` 即可对样例数据进行训练。 ### 预测 -1.修改 `infer.py` 中以下变量,指定使用的模型、指定测试数据。 +1.指定命令行参数 -```python -model_path = "models/params_pass_00000.tar.gz" # 指定模型所在的路径 -assert os.path.exists(model_path), "the trained model does not exist." -infer_path = 'data/infer.txt' # 指定测试文件所在的目录 -word_dict = 'dict.txt' # 指定字典所在的路径 +`infer.py`训练脚本中包含以下参数: + +``` +Options: + --data_path TEXT path of data for inference (default: None). if this + parameter is not set, imdb test dataset will be used. + --model_path TEXT path of saved model. (default: + 'models/params_pass_00000.tar.gz') + --word_dict_path TEXT path of word dictionary (default: None).if this + parameter is not set, imdb dataset will be used. + --class_num INTEGER class number (default: 2). + --batch_size INTEGER the number of examples in one batch (default: 32). + --help Show this message and exit. +``` + +2.以`data`目录下的示例数据为例,在终端执行: +```bash +python infer.py --data_path 'data/infer.txt' --word_dict_path 'dict.txt' ``` -2.在终端中执行 `python infer.py`。 +即可对样例数据进行预测。 diff --git a/nested_sequence/text_classification/data/infer.txt b/nested_sequence/text_classification/data/infer.txt index 3d9681b21d..8309d5c026 100644 --- a/nested_sequence/text_classification/data/infer.txt +++ b/nested_sequence/text_classification/data/infer.txt @@ -1,4 +1,4 @@ -At this point it seems almost unnecessary to state that Jon Bon Jovi delivers a firm, strong, seamless performance as Derek Bliss. His capability as an actor has been previously established by his critical acclaim garnered in other films (The Leading Man, No Looking Back). But, in case anyone is still wondering, yes, Jon Bon Jovi can act. He can act well and that's come to be expected of him. It's easy to separate Derek from the guy who belts out hits on VH-1.

I generally would not watch a horror movie. I've come to expect them to focus on sensationalistic gore rather than dialogue and plot. What pleased me most about this film was that there really was a viable plot being moved along. The gore is not so much as to become the focus of the film and does not have a disturbingly realistic quality of films with higher technical effects budgets. So, gore fans might be disappointed, but story fans will not.

Unlike an action film like U-571 where the dialogue takes a back seat to the bombast, we get a chance to know "the good guys" and actually care what happens to them. A few scenes are left unexplained (like Derek's hallucinations) but you get the feeling certain aspects were as they were to lay the foundation for a sequel. Unfortunately, with the lack of interest shown by Hollywood in this film, that sequel will never happen. These few instances are forgiveable knowing that Vampires could have been a continuing series.

Is this the best film I've ever seen in my life? No. Is it a good way to spend about two hours being entertained? Yes. It won't leave the person who fears horror movies with insomnia and it won't leave the horror movie lover completely disappointed either. If you're somewhere in between the horror genre loather and the horror genre lover, this film is for you. It reaches a happy medium with the effects and story balancing each other.

-The original Vampires (1998) is one of my favorites. I was curious to see how a sequel would work considering they used none of the original characters. I was quite surprised at how this played out. As a rule, sequels are never as good as the original, with a few exceptions. Though this one was not a great movie, the writer did well in keeping the main themes & vampire lore from the first one in tact. Jon Bon Jovi was a drawback initially, but he proved to be a half-way decent Slayer. I doubt anyone could top James Wood's performance in the first one, though. unless you bring in Buffy!

All in all, this was a decent watch & I would watch it again.

I was left with two questions, though... what happened to Jack Crow & how did Derek Bliss come to be a slayer? Guess we'll just have to leave that to imagination. -The movie opens with a flashback to Doddsville County High School on April Fool's Day. A group of students play a prank on class nerd Marty. When they are punished for playing said prank, they follow up with a bigger prank which (par for the course in slasher films involving pranks on class nerds) goes ridiculously awry leaving Marty simultaneously burned by fire and disfigured by acid for the sake of being thorough. Fast forward five years, where we find members of the student body gathering at the now abandoned high school for their five year class reunion. We find out that it is no coincidence that everyone at the reunion belonged to the clique of pranksters from the flashback scene, as all of the attendees are being stalked and killed by a mysterious, jester mask-clad murderer in increasingly complicated and mind-numbingly ludicrous fashions. It doesn't take Sherlock Holmes to solve the mystery of the killer's identity, as it is revealed to be none other than a scarred Marty who has seemingly been using his nerd rage and high intellect to bend the laws of physics and engineering in order to rig the school for his revenge scenario. The film takes a turn for the bizarre as Marty finishes exacting his revenge on his former tormentors, only to be haunted by their ghosts. Marty is finally pushed fully over the edge and takes his own life. Finally, the film explodes in a crescendo of disjointed weirdness as the whole revenge scenario is revealed to be a dream in the first place as Marty wakes up in a hospital bed, breaks free of his restraints, stabs a nurse, and finally disfigures his own face.

The script is tired and suffers from a terminal case of horror movie logic. The only originality comes from the mind-numbingly convoluted ways that the victims are dispatched. The absurd it-was-all-a-dream ending feels tacked on. It's almost as if someone pointed out the disjointed nature of the film and the writer decided then and there that it was a dream.

Technically speaking, the film is atrocious. Some scenes were filmed so dark that I had to pause the film and play with the color on my television. The acting is sub-par, even for slasher films. I can't help but think that casting was a part of the problem as all of the actors look at least five years older than the characters they portray, which makes the flashback scene even more unintentionally laughable. Their lack of commitment to the movie is made obvious as half of them can't bother to keep their accents straight through the movie.

All of this being said, if you like bad horror movies, you might like this one, too. It isn't the worst film of the genre, but it's far from the best. -Robert Taylor definitely showed himself to be a fine dramatic actor in his role as a gun-slinging buffalo hunter in this 1956 western. It was one of the few times that Taylor would play a heavy in a film. Nonetheless, this picture was far from great as shortly after this, Taylor fled to television with the successful series The Detectives.

Stuart Granger hid his British accent and turned in a formidable performance as Taylor's partner.

Taylor is a bigot here and his hatred for the Indians really shows.

Another very good performance here was by veteran actor Lloyd Nolan as an aged, drinking old-timer who joined in the hunt for buffalo as well. In his early scenes, Nolan was really doing an excellent take-off of Walter Huston in his Oscar-winning role in The Treasure of the Sierre Madre in 1948. Note the appearance of Russ Tamblyn in the film. The following year Tamblyn and Nolan would join in the phenomenal Peyton Place.

The writing in the film is stiff at best. By the film's end, it's the elements of nature that did Taylor in. How about the elements of the writing here? \ No newline at end of file +I was overtaken by the emotion. Unforgettable rendering of a wartime story which is unknown to most people. The performances were faultless and outstanding. +The original Vampires (1998) is one of my favorites. I was curious to see how a sequel would work considering they used none of the original characters. I was quite surprised at how this played out. +Without question, the worst ELVIS film ever made. The movie portrays all Indians as drunk, stupid, and lazy. Watch ELVIS's skin change color throughout the film. +I thought this movie was hysterical. I have watched it many times and recommend it highly. Mel Brooks, was excellent. The cast was fantastic..I don't understand how this movie gets a 2 out of 5 rating. I loved it. \ No newline at end of file diff --git a/nested_sequence/text_classification/data/test_data/test.txt b/nested_sequence/text_classification/data/test_data/test.txt index 5660ee2f1a..b5e7d11aa5 100644 --- a/nested_sequence/text_classification/data/test_data/test.txt +++ b/nested_sequence/text_classification/data/test_data/test.txt @@ -1,4 +1,4 @@ -1 I liked the film. Some of the action scenes were very interesting, tense and well done. I especially liked the opening scene which had a semi truck in it. A very tense action scene that seemed well done.

Some of the transitional scenes were filmed in interesting ways such as time lapse photography, unusual colors, or interesting angles. Also the film is funny is several parts. I also liked how the evil guy was portrayed too. I'd give the film an 8 out of 10. -0 The plot for Descent, if it actually can be called a plot, has two noteworthy events. One near the beginning - one at the end. Together these events make up maybe 5% of the total movie time. Everything (and I mean _everything_) in between is basically the director's desperate effort to fill in the minutes. I like disturbing movies, I like dark movies and I don't get troubled by gritty scenes - but if you expect me to sit through 60 minutes of hazy/dark (literally) scenes with NO storyline you have another thing coming. Rosario Dawson, one of my favorite actresses is completely wasted here. And no, she doesn't get naked, not even in the NC-17 version, which I saw.

If you have a couple of hours to throw away and want to watch "Descent", take a nap instead - you'll probably have more interesting dreams. -0 This film lacked something I couldn't put my finger on at first: charisma on the part of the leading actress. This inevitably translated to lack of chemistry when she shared the screen with her leading man. Even the romantic scenes came across as being merely the actors at play. It could very well have been the director who miscalculated what he needed from the actors. I just don't know.

But could it have been the screenplay? Just exactly who was the chef in love with? He seemed more enamored of his culinary skills and restaurant, and ultimately of himself and his youthful exploits, than of anybody or anything else. He never convinced me he was in love with the princess.

I was disappointed in this movie. But, don't forget it was nominated for an Oscar, so judge for yourself. -0 I read the book a long time back and don't specifically remember the plot but do remember that I enjoyed it. Since I'm home sick on the couch it seemed like a good idea and Hey !! It is a Lifetime movie.

The movie is populated with grade B actors and actresses.

The female cast is right out of Desperate Housewives. I've never seen the show but there are lots of commercials for the show and I get the gist. Is there nothing original anymore? Sure, but not on Lifetime.

The male cast are all fairly effeminate looking and acting but the girls need to have husbands I suppose.

In one scene a female is struggling with a male, for her life, and what does she do??? Kicks him in the testicles. What else? Women love that but let me tell you girls something. It's not as easy as it's always made to look.

It wasn't all bad. I did get the chills a time or two so I have to credit someone with that. +1 I liked the film. Some of the action scenes were very interesting, tense and well done. I especially liked the opening scene which had a semi truck in it. Also the film is funny is several parts. I'd give the film an 8 out of 10. +0 The plot for Descent, if it actually can be called a plot, has two noteworthy events. One near the beginning - one at the end. Together these events make up maybe 5% of the total movie time. Everything (and I mean _everything_) in between is basically the director's desperate effort to fill in the minutes. +0 This film lacked something I couldn't put my finger on at first: charisma on the part of the leading actress. This inevitably translated to lack of chemistry when she shared the screen with her leading man. Even the romantic scenes came across as being merely the actors at play. +0 I read the book a long time back and don't specifically remember the plot but do remember that I enjoyed it. Since I'm home sick on the couch it seemed like a good idea and Hey !! It is a Lifetime movie.

The movie is populated with grade B actors and actresses.

The female cast is right out of Desperate Housewives. \ No newline at end of file diff --git a/nested_sequence/text_classification/data/train_data/train.txt b/nested_sequence/text_classification/data/train_data/train.txt index 6a335327d1..51c67c39fb 100644 --- a/nested_sequence/text_classification/data/train_data/train.txt +++ b/nested_sequence/text_classification/data/train_data/train.txt @@ -1,4 +1,4 @@ -0 I admit that I am a vampire addict: I have seen so many vampire movies I have lost count and this one is definitely in the top ten. I was very impressed by the original John Carpenter's Vampires and when I descovered there was a sequel I went straight out and bought it. This movie does not obey quite the same rules as the first, and it is not quite so dark, but it is close enough and I felt that it built nicely on the original.

Jon Bon Jovi was very good as Derek Bliss: his performance was likeable and yet hard enough for the viewer to believe that he might actually be able to survive in the world in which he lives. One of my favourite parts was just after he meets Zoey and wanders into the bathroom of the diner to check to see if she is more than she seems. His comments are beautifully irreverant and yet emminently practical which contrast well with the rest of the scene as it unfolds.

The other cast members were also well chosen and they knitted nicely to produce an entertaining and original film. It is not simply a rehash of the first movie and it has grown in a similar way to the way Fright Night II grew out of Fright Night. There are different elements which make it a fresh movie with a similar theme.

If you like vampire movies I would recommend this one. If you prefer your films less bloody then choose something else. -0 Almost too well done... "John Carpenter's Vampires" was entertaining, a solid piece of popcorn-entertainment with a budget small enough not to be overrun by special effects. And obviously aiming on the "From Dusk Till Dawn"-audience. "Vampires: Los Muertos" tries the same starting with a rock-star Jon Bon Jovi playing one of the main characters, but does that almost too well...: I haven't seen Jon Bon Jovi in any other movie, so I am not able to compare his acting in "Vampires: Los Muertos" to his other roles, but I was really suprised of his good performance. After the movie started he convinced me not expecting him to grab any guitar and playing "It' my life" or something, but kill vampires, showing no mercy and doing a job which has to be done. This means a lot, because a part of the audience (also me) was probably thinking: "...just because he's a rockstar...". Of course Bon Jovi is not James Woods but to be honest: It could have been much worse, and in my opinion Bon Jovi did a very good performance. The vampiress played by Arly Jover is not the leather dressed killer-machine of a vampire-leader we met in Part 1 (or in similar way in "Ghosts of Mars"). Jover plays the vampire very seductive and very sexy, moving as lithe as a cat, attacking as fast as a snake and dressed in thin, light almost transparent very erotic cloth. And even the optical effects supporting her kind of movement are very well made. It really takes some beating. But the director is in some parts of the film only just avoiding turning the movie from an action-horrorfilm into a sensitive horrormovie like Murnau's "Nosferatu". You can almost see the director's temptation to create a movie with a VERY personal note and different to the original. This is the real strength of the movie and at the same time its weakest point: The audience celebrating the fun-bloodbath of the first movie is probably expecting a pure fun-bloodbath for the second time and might be a little disappointed. Make no mistake: "Vampires:Los Muertos" IS a fun-bloodbath but it's just not ALL THE TIME this kind of movie. Just think of the massacre in the bar compared to the scene in which the vampiress tries to seduce Zoey in the ruins: the bar-massacre is what you expect from american popcorn-entertainment, the seducing-Zoey-in-the-ruins-scene is ALMOST european-like cinema (the movie is eager to tell us more about the relationship between Zoey and the vampiress, but refuses answers at the same time. Because it would had slow down the action? Showed the audience a vampiress with a human past, a now suffering creature and not only a beast which is just slaughtering anybody). And that's the point to me which decides whether the movie is accepted by the audience of the original movie or not. And also: Is the "From Dusk Till Dawn"-audience really going to like this? I'm not sure about that. Nevertheless Tommy Lee Wallace did really a great job, "Vampires:Los Muertos" is surprisingly good. But I also think to direct a sequel of a popcorn movie Wallace is sometimes almost too creative, too expressive. Like he's keeping himself from developing his talent in order to satisfy the expectations of audience. In my opinion, Wallace' talent fills the movie with life and is maybe sometimes sucking it out at the same time. "Vampires: Los Muertos" is almost too well done. (I give it 7 of 10) -1 We all know that countless duds have graced the 80s slasher genre and often deserve nothing but our deepest disgust. Maybe that's a bit hastey but damn if "Slaughter High" wasn't terribly unoriginal, even for a slasher flick. Pretty much, the plot involves a kid who experienced a Carrie-like shower humiliation in high school and returns to the dilapidated building to seek out revenge on a group of former-bullies who all show up to reminisce. As you'd expect, they are killed off steadily by a masked madman on April 1st by means of electrocution, burning, hanging, and chemically altered beer. I've got a number of problems with the plot details and settings of this movie, but considering the ending, I feel the need to discard my complaints and just say that this is a complete waste of time. Ignore any thought of viewing this movie. -1 What a terrible movie. The acting was bad, the pacing was bad, the cinematography was bad, the directing was bad, the "special" effects were bad. You expect a certain degree of badness in a slasher, but even the killings were bad.

First of all, the past event that set up the motive for the slaughter went on for 15 or 20 minutes. I thought it would never end. They could have removed 80% of it and explained what happened well enough.

Then, the victims were invited to the "reunion" in an abandoned school which still had all the utilities turned on. One of the victims thought this was a little odd, but they dismissed it and decided to break in anyway.

Finally, the killings were so fake as to be virtually unwatchable.

There is no reason to watch this movie, unless you want to see some breasts, and not very good breasts at that. This movie makes Showgirls virtually indistinguishable from Citizen Kane. +0 It was a Sunday night and I was waiting for the advertised movie on TV. They said it was a comedy! The movie started, 10 minutes passed, after that 30 minutes and I didn't laugh not even once. The fact is that the movie ended and I didn't get even on echance to laugh. +0 I saw this piece of garbage on AMC last night, and wonder how it could be considered in any way an American Movie Classic. It was awful in every way. How badly did Jack Lemmon, James Stewart and the rest of the cast need cash that they would even consider doing this movie? +1 its not as good as the first movie,but its a good solid movie its has good car chase scenes,on the remake of this movie there a story for are hero to drive fast as his trying to rush to the side of his ailing wife,the ending is great just a good fair movie to watch in my opinion. +1 Rosalind Russell executes a power-house performance as Rosie Lord, a very wealthy woman with greedy heirs. With an Auntie Mame-type character, this actress can never go wrong. Her very-real terror at being in an insane assylum is a wonderful piece of acting. Everyone should watch this. \ No newline at end of file diff --git a/nested_sequence/text_classification/index.html b/nested_sequence/text_classification/index.html new file mode 100644 index 0000000000..e5fdfbb9eb --- /dev/null +++ b/nested_sequence/text_classification/index.html @@ -0,0 +1,250 @@ + + + + + + + + + + + + + + + + + +
+
+ + + + + + + diff --git a/nested_sequence/text_classification/infer.py b/nested_sequence/text_classification/infer.py index 2d66b76aa8..24a39f6908 100644 --- a/nested_sequence/text_classification/infer.py +++ b/nested_sequence/text_classification/infer.py @@ -1,16 +1,41 @@ -#!/usr/bin/env python -# -*- coding: utf-8 -*- import sys import os import gzip +import click import paddle.v2 as paddle import reader from network_conf import nest_net -from utils import logger - - +from utils import logger, load_dict + + +@click.command('infer') +@click.option( + "--data_path", + default=None, + help=("path of data for inference (default: None). " + "if this parameter is not set, " + "imdb test dataset will be used.")) +@click.option( + "--model_path", + type=str, + default='models/params_pass_00000.tar.gz', + help=("path of saved model. " + "(default: 'models/params_pass_00000.tar.gz')")) +@click.option( + "--word_dict_path", + type=str, + default=None, + help=("path of word dictionary (default: None)." + "if this parameter is not set, imdb dataset will be used.")) +@click.option( + "--class_num", type=int, default=2, help="class number (default: 2).") +@click.option( + "--batch_size", + type=int, + default=32, + help="the number of examples in one batch (default: 32).") def infer(data_path, model_path, word_dict_path, batch_size, class_num): def _infer_a_batch(inferer, test_batch, ids_2_word): probs = inferer.infer(input=test_batch, field=["value"]) @@ -24,6 +49,7 @@ def _infer_a_batch(inferer, test_batch, ids_2_word): " ".join(["{:0.4f}".format(p) for p in prob]), word_text)) + assert os.path.exists(model_path), "the trained model does not exist." logger.info("begin to predict...") use_default_data = (data_path is None) @@ -37,7 +63,7 @@ def _infer_a_batch(inferer, test_batch, ids_2_word): assert os.path.exists( word_dict_path), "the word dictionary file does not exist" - word_dict = reader.load_dict(word_dict_path) + word_dict = load_dict(word_dict_path) word_reverse_dict = dict((value, key) for key, value in word_dict.iteritems()) @@ -68,15 +94,4 @@ def _infer_a_batch(inferer, test_batch, ids_2_word): if __name__ == "__main__": - model_path = "models/params_pass_00000.tar.gz" - assert os.path.exists(model_path), "the trained model does not exist." - - infer_path = None - word_dict = None - - infer( - data_path=infer_path, - word_dict_path=word_dict, - model_path=model_path, - batch_size=10, - class_num=2) + infer() diff --git a/nested_sequence/text_classification/network_conf.py b/nested_sequence/text_classification/network_conf.py index 2b86431c1b..0ecc3c2d12 100644 --- a/nested_sequence/text_classification/network_conf.py +++ b/nested_sequence/text_classification/network_conf.py @@ -6,12 +6,16 @@ def cnn_cov_group(group_input, hidden_size): input=group_input, context_len=3, hidden_size=hidden_size) conv4 = paddle.networks.sequence_conv_pool( input=group_input, context_len=4, hidden_size=hidden_size) + + #output_group = paddle.layer.concat(input=[conv3, conv4]) + output_group = paddle.layer.fc( input=[conv3, conv4], size=hidden_size, param_attr=paddle.attr.ParamAttr(name='_cov_value_weight'), bias_attr=paddle.attr.ParamAttr(name='_cov_value_bias'), act=paddle.activation.Linear()) + return output_group diff --git a/nested_sequence/text_classification/reader.py b/nested_sequence/text_classification/reader.py index e880f1a355..ddefbbdcd7 100644 --- a/nested_sequence/text_classification/reader.py +++ b/nested_sequence/text_classification/reader.py @@ -1,5 +1,3 @@ -#!/usr/bin/env python -# -*- coding: utf-8 -*- """ IMDB dataset. @@ -157,37 +155,6 @@ def imdb_word_dict(): re.compile("aclImdb/((train)|(test))/((pos)|(neg))/.*\.txt$"), 150) -def build_dict(data_dir, save_path, use_col=1, cutoff_fre=1): - values = collections.defaultdict(int) - - for file_name in os.listdir(data_dir): - file_path = os.path.join(data_dir, file_name) - if not os.path.isfile(file_path): - continue - with open(file_path, "r") as fdata: - for line in fdata: - line_splits = line.strip().split("\t") - if len(line_splits) < use_col: - continue - doc = line_splits[use_col] - for sent in doc.strip().split("."): - for w in sent.split(): - values[w] += 1 - - values[''] = cutoff_fre - with open(save_path, "w") as f: - for v, count in sorted( - values.iteritems(), key=lambda x: x[1], reverse=True): - if count < cutoff_fre: - break - f.write("%s\t%d\n" % (v, count)) - - -def load_dict(dict_path): - return dict((line.strip().split("\t")[0], idx) - for idx, line in enumerate(open(dict_path, "r").readlines())) - - def train_reader(data_dir, word_dict): """ Reader interface for training data diff --git a/nested_sequence/text_classification/train.py b/nested_sequence/text_classification/train.py index 3704e2a987..1742248caa 100644 --- a/nested_sequence/text_classification/train.py +++ b/nested_sequence/text_classification/train.py @@ -1,22 +1,57 @@ -#!/usr/bin/env python -# -*- coding: utf-8 -*- import os import sys import gzip +import click import paddle.v2 as paddle import reader from network_conf import nest_net -from utils import logger, parse_train_cmd - - -def train(train_data_dir=None, - test_data_dir=None, - word_dict_path=None, - model_save_dir="models", - batch_size=32, - num_passes=10): +from utils import build_dict, load_dict, logger + + +@click.command('train') +@click.option( + "--train_data_dir", + default=None, + help=("path of training dataset (default: None). " + "if this parameter is not set, " + "imdb dataset will be used.")) +@click.option( + "--test_data_dir", + default=None, + help=("path of testing dataset (default: None). " + "if this parameter is not set, " + "imdb dataset will be used.")) +@click.option( + "--word_dict_path", + type=str, + default=None, + help=("path of word dictionary (default: None)." + "if this parameter is not set, imdb dataset will be used." + "if this parameter is set, but the file does not exist, " + "word dictionay will be built from " + "the training data automatically.")) +@click.option( + "--class_num", type=int, default=2, help="class number (default: 2).") +@click.option( + "--batch_size", + type=int, + default=32, + help=("the number of training examples in one batch " + "(default: 32).")) +@click.option( + "--num_passes", + type=int, + default=10, + help="number of passes to train (default: 10).") +@click.option( + "--model_save_dir", + type=str, + default="models", + help="path to save the trained models (default: 'models').") +def train(train_data_dir, test_data_dir, word_dict_path, class_num, + model_save_dir, batch_size, num_passes): """ :params train_data_path: path of training data, if this parameter is not specified, imdb dataset will be used to run this example @@ -34,6 +69,10 @@ def train(train_data_dir=None, :params num_pass: train pass number :type num_pass: int """ + if train_data_dir is not None: + assert word_dict_path, ("the parameter train_data_dir, word_dict_path " + "should be set at the same time.") + if not os.path.exists(model_save_dir): os.mkdir(model_save_dir) @@ -60,14 +99,14 @@ def train(train_data_dir=None, # build the word dictionary to map the original string-typed # words into integer-typed index - reader.build_dict( + build_dict( data_dir=train_data_dir, save_path=word_dict_path, use_col=1, cutoff_fre=0) - word_dict = reader.load_dict(word_dict_path) - class_num = args.class_num + word_dict = load_dict(word_dict_path) + class_num = class_num logger.info("class number is : %d." % class_num) train_reader = paddle.batch( @@ -145,19 +184,5 @@ def _event_handler(event): logger.info("Training has finished.") -def main(args): - train( - train_data_dir=args.train_data_dir, - test_data_dir=args.test_data_dir, - word_dict_path=args.word_dict, - batch_size=args.batch_size, - num_passes=args.num_passes, - model_save_dir=args.model_save_dir) - - if __name__ == "__main__": - args = parse_train_cmd() - if args.train_data_dir is not None: - assert args.word_dict, ("the parameter train_data_dir, word_dict_path " - "should be set at the same time.") - main(args) + train() diff --git a/nested_sequence/text_classification/utils.py b/nested_sequence/text_classification/utils.py index 79ac2f9f97..0362e13782 100644 --- a/nested_sequence/text_classification/utils.py +++ b/nested_sequence/text_classification/utils.py @@ -1,61 +1,37 @@ -#!/usr/bin/env python -# -*- coding: utf-8 -*- -import logging import os -import argparse +import logging from collections import defaultdict logger = logging.getLogger("paddle") logger.setLevel(logging.INFO) -def parse_train_cmd(): - parser = argparse.ArgumentParser( - description="PaddlePaddle text classification demo") - parser.add_argument( - "--train_data_dir", - type=str, - required=False, - help=("path of training dataset (default: None). " - "if this parameter is not set, " - "imdb dataset will be used."), - default=None) - parser.add_argument( - "--test_data_dir", - type=str, - required=False, - help=("path of testing dataset (default: None). " - "if this parameter is not set, " - "imdb dataset will be used."), - default=None) - parser.add_argument( - "--word_dict", - type=str, - required=False, - help=("path of word dictionary (default: None)." - "if this parameter is not set, imdb dataset will be used." - "if this parameter is set, but the file does not exist, " - "word dictionay will be built from " - "the training data automatically."), - default=None) - parser.add_argument( - "--class_num", - type=int, - required=False, - help=("class number."), - default=2) - parser.add_argument( - "--batch_size", - type=int, - default=32, - help="the number of training examples in one forward/backward pass") - parser.add_argument( - "--num_passes", type=int, default=10, help="number of passes to train") - parser.add_argument( - "--model_save_dir", - type=str, - required=False, - help=("path to save the trained models."), - default="models") +def build_dict(data_dir, save_path, use_col=1, cutoff_fre=1): + values = defaultdict(int) + + for file_name in os.listdir(data_dir): + file_path = os.path.join(data_dir, file_name) + if not os.path.isfile(file_path): + continue + with open(file_path, "r") as fdata: + for line in fdata: + line_splits = line.strip().split("\t") + if len(line_splits) < use_col: + continue + doc = line_splits[use_col] + for sent in doc.strip().split("."): + for w in sent.split(): + values[w] += 1 + + values[''] = cutoff_fre + with open(save_path, "w") as f: + for v, count in sorted( + values.iteritems(), key=lambda x: x[1], reverse=True): + if count < cutoff_fre: + break + f.write("%s\t%d\n" % (v, count)) + - return parser.parse_args() +def load_dict(dict_path): + return dict((line.strip().split("\t")[0], idx) + for idx, line in enumerate(open(dict_path, "r").readlines())) From e89af968b069e0828847e5839f17d0ee607aa320 Mon Sep 17 00:00:00 2001 From: peterzhang2029 Date: Fri, 13 Oct 2017 18:17:53 +0800 Subject: [PATCH 5/8] add requirements.txt --- nested_sequence/text_classification/README.md | 5 ++++- nested_sequence/text_classification/index.html | 5 ++++- nested_sequence/text_classification/requirements.txt | 1 + 3 files changed, 9 insertions(+), 2 deletions(-) create mode 100644 nested_sequence/text_classification/requirements.txt diff --git a/nested_sequence/text_classification/README.md b/nested_sequence/text_classification/README.md index e066a48065..ca272a8097 100644 --- a/nested_sequence/text_classification/README.md +++ b/nested_sequence/text_classification/README.md @@ -60,7 +60,10 @@ prob = paddle.layer.mixed(size=class_num, input=[paddle.layer.full_matrix_projection(input=avg_pool)], act=paddle.activation.Softmax()) ``` - +## 安装依赖包 +```bash +pip install -r requirements.txt +``` ## 使用 PaddlePaddle 内置数据运行 diff --git a/nested_sequence/text_classification/index.html b/nested_sequence/text_classification/index.html index e5fdfbb9eb..de06050628 100644 --- a/nested_sequence/text_classification/index.html +++ b/nested_sequence/text_classification/index.html @@ -102,7 +102,10 @@ input=[paddle.layer.full_matrix_projection(input=avg_pool)], act=paddle.activation.Softmax()) ``` - +## 安装依赖包 +```bash +pip install -r requirements.txt +``` ## 使用 PaddlePaddle 内置数据运行 diff --git a/nested_sequence/text_classification/requirements.txt b/nested_sequence/text_classification/requirements.txt new file mode 100644 index 0000000000..dca9a90964 --- /dev/null +++ b/nested_sequence/text_classification/requirements.txt @@ -0,0 +1 @@ +click From 266b8eebfe45c4425bfa306cba57ad53227cb6eb Mon Sep 17 00:00:00 2001 From: peterzhang2029 Date: Fri, 13 Oct 2017 20:12:56 +0800 Subject: [PATCH 6/8] refine notation --- nested_sequence/text_classification/README.md | 35 ++++++++++--------- .../text_classification/index.html | 35 ++++++++++--------- nested_sequence/text_classification/infer.py | 24 ++++++------- .../text_classification/network_conf.py | 2 -- nested_sequence/text_classification/train.py | 32 ++++++++--------- 5 files changed, 62 insertions(+), 66 deletions(-) diff --git a/nested_sequence/text_classification/README.md b/nested_sequence/text_classification/README.md index ca272a8097..03e4f14c1e 100644 --- a/nested_sequence/text_classification/README.md +++ b/nested_sequence/text_classification/README.md @@ -76,7 +76,7 @@ python train.py ### 预测 训练结束后模型将存储在指定目录当中(默认models目录),在终端执行: ```bash -python infer.py +python infer.py --model_path 'models/params_pass_00000.tar.gz' ``` 默认情况下,预测脚本将加载训练一个pass的模型对 `imdb的测试集` 进行测试。 @@ -139,20 +139,21 @@ def train_reader(data_dir, word_dict): `train.py`训练脚本中包含以下参数: ``` Options: - --train_data_dir TEXT path of training dataset (default: None). if this + --train_data_dir TEXT The path of training dataset (default: None). If this parameter is not set, imdb dataset will be used. - --test_data_dir TEXT path of testing dataset (default: None). if this + --test_data_dir TEXT The path of testing dataset (default: None). If this parameter is not set, imdb dataset will be used. - --word_dict_path TEXT path of word dictionary (default: None).if this - parameter is not set, imdb dataset will be used.if + --word_dict_path TEXT The path of word dictionary (default: None). If this + parameter is not set, imdb dataset will be used. If this parameter is set, but the file does not exist, word dictionay will be built from the training data automatically. - --class_num INTEGER class number (default: 2). - --batch_size INTEGER the number of training examples in one batch + --class_num INTEGER The class number (default: 2). + --batch_size INTEGER The number of training examples in one batch (default: 32). - --num_passes INTEGER number of passes to train (default: 10). - --model_save_dir TEXT path to save the trained models (default: 'models'). + --num_passes INTEGER The number of passes to train (default: 10). + --model_save_dir TEXT The path to save the trained models (default: + 'models'). --help Show this message and exit. ``` @@ -170,20 +171,20 @@ python train.py --train_data_dir 'data/train_data' --test_data_dir 'data/test_da ``` Options: - --data_path TEXT path of data for inference (default: None). if this - parameter is not set, imdb test dataset will be used. - --model_path TEXT path of saved model. (default: - 'models/params_pass_00000.tar.gz') - --word_dict_path TEXT path of word dictionary (default: None).if this + --data_path TEXT The path of data for inference (default: None). If + this parameter is not set, imdb test dataset will be + used. + --model_path TEXT The path of saved model. [required] + --word_dict_path TEXT The path of word dictionary (default: None). If this parameter is not set, imdb dataset will be used. - --class_num INTEGER class number (default: 2). - --batch_size INTEGER the number of examples in one batch (default: 32). + --class_num INTEGER The class number (default: 2). + --batch_size INTEGER The number of examples in one batch (default: 32). --help Show this message and exit. ``` 2.以`data`目录下的示例数据为例,在终端执行: ```bash -python infer.py --data_path 'data/infer.txt' --word_dict_path 'dict.txt' +python infer.py --data_path 'data/infer.txt' --word_dict_path 'dict.txt' --model_path 'models/params_pass_00000.tar.gz' ``` 即可对样例数据进行预测。 diff --git a/nested_sequence/text_classification/index.html b/nested_sequence/text_classification/index.html index de06050628..30b020d25c 100644 --- a/nested_sequence/text_classification/index.html +++ b/nested_sequence/text_classification/index.html @@ -118,7 +118,7 @@ ### 预测 训练结束后模型将存储在指定目录当中(默认models目录),在终端执行: ```bash -python infer.py +python infer.py --model_path 'models/params_pass_00000.tar.gz' ``` 默认情况下,预测脚本将加载训练一个pass的模型对 `imdb的测试集` 进行测试。 @@ -181,20 +181,21 @@ `train.py`训练脚本中包含以下参数: ``` Options: - --train_data_dir TEXT path of training dataset (default: None). if this + --train_data_dir TEXT The path of training dataset (default: None). If this parameter is not set, imdb dataset will be used. - --test_data_dir TEXT path of testing dataset (default: None). if this + --test_data_dir TEXT The path of testing dataset (default: None). If this parameter is not set, imdb dataset will be used. - --word_dict_path TEXT path of word dictionary (default: None).if this - parameter is not set, imdb dataset will be used.if + --word_dict_path TEXT The path of word dictionary (default: None). If this + parameter is not set, imdb dataset will be used. If this parameter is set, but the file does not exist, word dictionay will be built from the training data automatically. - --class_num INTEGER class number (default: 2). - --batch_size INTEGER the number of training examples in one batch + --class_num INTEGER The class number (default: 2). + --batch_size INTEGER The number of training examples in one batch (default: 32). - --num_passes INTEGER number of passes to train (default: 10). - --model_save_dir TEXT path to save the trained models (default: 'models'). + --num_passes INTEGER The number of passes to train (default: 10). + --model_save_dir TEXT The path to save the trained models (default: + 'models'). --help Show this message and exit. ``` @@ -212,20 +213,20 @@ ``` Options: - --data_path TEXT path of data for inference (default: None). if this - parameter is not set, imdb test dataset will be used. - --model_path TEXT path of saved model. (default: - 'models/params_pass_00000.tar.gz') - --word_dict_path TEXT path of word dictionary (default: None).if this + --data_path TEXT The path of data for inference (default: None). If + this parameter is not set, imdb test dataset will be + used. + --model_path TEXT The path of saved model. [required] + --word_dict_path TEXT The path of word dictionary (default: None). If this parameter is not set, imdb dataset will be used. - --class_num INTEGER class number (default: 2). - --batch_size INTEGER the number of examples in one batch (default: 32). + --class_num INTEGER The class number (default: 2). + --batch_size INTEGER The number of examples in one batch (default: 32). --help Show this message and exit. ``` 2.以`data`目录下的示例数据为例,在终端执行: ```bash -python infer.py --data_path 'data/infer.txt' --word_dict_path 'dict.txt' +python infer.py --data_path 'data/infer.txt' --word_dict_path 'dict.txt' --model_path 'models/params_pass_00000.tar.gz' ``` 即可对样例数据进行预测。 diff --git a/nested_sequence/text_classification/infer.py b/nested_sequence/text_classification/infer.py index 24a39f6908..b3940e8379 100644 --- a/nested_sequence/text_classification/infer.py +++ b/nested_sequence/text_classification/infer.py @@ -14,28 +14,24 @@ @click.option( "--data_path", default=None, - help=("path of data for inference (default: None). " - "if this parameter is not set, " + help=("The path of data for inference (default: None). " + "If this parameter is not set, " "imdb test dataset will be used.")) @click.option( - "--model_path", - type=str, - default='models/params_pass_00000.tar.gz', - help=("path of saved model. " - "(default: 'models/params_pass_00000.tar.gz')")) + "--model_path", type=str, required=True, help="The path of saved model.") @click.option( "--word_dict_path", type=str, default=None, - help=("path of word dictionary (default: None)." - "if this parameter is not set, imdb dataset will be used.")) + help=("The path of word dictionary (default: None). " + "If this parameter is not set, imdb dataset will be used.")) @click.option( - "--class_num", type=int, default=2, help="class number (default: 2).") + "--class_num", type=int, default=2, help="The class number (default: 2).") @click.option( "--batch_size", type=int, default=32, - help="the number of examples in one batch (default: 32).") + help="The number of examples in one batch (default: 32).") def infer(data_path, model_path, word_dict_path, batch_size, class_num): def _infer_a_batch(inferer, test_batch, ids_2_word): probs = inferer.infer(input=test_batch, field=["value"]) @@ -49,8 +45,8 @@ def _infer_a_batch(inferer, test_batch, ids_2_word): " ".join(["{:0.4f}".format(p) for p in prob]), word_text)) - assert os.path.exists(model_path), "the trained model does not exist." - logger.info("begin to predict...") + assert os.path.exists(model_path), "The trained model does not exist." + logger.info("Begin to predict...") use_default_data = (data_path is None) if use_default_data: @@ -61,7 +57,7 @@ def _infer_a_batch(inferer, test_batch, ids_2_word): class_num = 2 else: assert os.path.exists( - word_dict_path), "the word dictionary file does not exist" + word_dict_path), "The word dictionary file does not exist" word_dict = load_dict(word_dict_path) word_reverse_dict = dict((value, key) diff --git a/nested_sequence/text_classification/network_conf.py b/nested_sequence/text_classification/network_conf.py index 0ecc3c2d12..a86964ebcf 100644 --- a/nested_sequence/text_classification/network_conf.py +++ b/nested_sequence/text_classification/network_conf.py @@ -7,8 +7,6 @@ def cnn_cov_group(group_input, hidden_size): conv4 = paddle.networks.sequence_conv_pool( input=group_input, context_len=4, hidden_size=hidden_size) - #output_group = paddle.layer.concat(input=[conv3, conv4]) - output_group = paddle.layer.fc( input=[conv3, conv4], size=hidden_size, diff --git a/nested_sequence/text_classification/train.py b/nested_sequence/text_classification/train.py index 1742248caa..c6371da842 100644 --- a/nested_sequence/text_classification/train.py +++ b/nested_sequence/text_classification/train.py @@ -14,42 +14,42 @@ @click.option( "--train_data_dir", default=None, - help=("path of training dataset (default: None). " - "if this parameter is not set, " + help=("The path of training dataset (default: None). " + "If this parameter is not set, " "imdb dataset will be used.")) @click.option( "--test_data_dir", default=None, - help=("path of testing dataset (default: None). " - "if this parameter is not set, " + help=("The path of testing dataset (default: None). " + "If this parameter is not set, " "imdb dataset will be used.")) @click.option( "--word_dict_path", type=str, default=None, - help=("path of word dictionary (default: None)." - "if this parameter is not set, imdb dataset will be used." - "if this parameter is set, but the file does not exist, " + help=("The path of word dictionary (default: None). " + "If this parameter is not set, imdb dataset will be used. " + "If this parameter is set, but the file does not exist, " "word dictionay will be built from " "the training data automatically.")) @click.option( - "--class_num", type=int, default=2, help="class number (default: 2).") + "--class_num", type=int, default=2, help="The class number (default: 2).") @click.option( "--batch_size", type=int, default=32, - help=("the number of training examples in one batch " + help=("The number of training examples in one batch " "(default: 32).")) @click.option( "--num_passes", type=int, default=10, - help="number of passes to train (default: 10).") + help="The number of passes to train (default: 10).") @click.option( "--model_save_dir", type=str, default="models", - help="path to save the trained models (default: 'models').") + help="The path to save the trained models (default: 'models').") def train(train_data_dir, test_data_dir, word_dict_path, class_num, model_save_dir, batch_size, num_passes): """ @@ -70,7 +70,7 @@ def train(train_data_dir, test_data_dir, word_dict_path, class_num, :type num_pass: int """ if train_data_dir is not None: - assert word_dict_path, ("the parameter train_data_dir, word_dict_path " + assert word_dict_path, ("The parameter train_data_dir, word_dict_path " "should be set at the same time.") if not os.path.exists(model_save_dir): @@ -81,7 +81,7 @@ def train(train_data_dir, test_data_dir, word_dict_path, class_num, if use_default_data: logger.info(("No training data are porivided, " "use imdb to train the model.")) - logger.info("please wait to build the word dictionary ...") + logger.info("Please wait to build the word dictionary ...") word_dict = reader.imdb_word_dict() @@ -94,7 +94,7 @@ def train(train_data_dir, test_data_dir, word_dict_path, class_num, class_num = 2 else: if word_dict_path is None or not os.path.exists(word_dict_path): - logger.info(("word dictionary is not given, the dictionary " + logger.info(("Word dictionary is not given, the dictionary " "is automatically built from the training data.")) # build the word dictionary to map the original string-typed @@ -107,7 +107,7 @@ def train(train_data_dir, test_data_dir, word_dict_path, class_num, word_dict = load_dict(word_dict_path) class_num = class_num - logger.info("class number is : %d." % class_num) + logger.info("Class number is : %d." % class_num) train_reader = paddle.batch( paddle.reader.shuffle( @@ -129,7 +129,7 @@ def train(train_data_dir, test_data_dir, word_dict_path, class_num, emb_size = 28 hidden_size = 128 - logger.info("length of word dictionary is : %d." % (dict_dim)) + logger.info("Length of word dictionary is : %d." % (dict_dim)) paddle.init(use_gpu=True, trainer_count=4) From 0096515a061e2181b69fd6e8b24fd699f1a8d790 Mon Sep 17 00:00:00 2001 From: peterzhang2029 Date: Sun, 15 Oct 2017 18:13:39 +0800 Subject: [PATCH 7/8] add config.py --- nested_sequence/text_classification/README.md | 108 ++++++++++++------ nested_sequence/text_classification/config.py | 39 +++++++ .../data/test_data/test.txt | 8 +- .../data/train_data/train.txt | 8 +- .../text_classification/index.html | 108 ++++++++++++------ nested_sequence/text_classification/infer.py | 37 +++--- .../text_classification/network_conf.py | 33 ++++-- nested_sequence/text_classification/reader.py | 6 +- nested_sequence/text_classification/train.py | 107 +++++++++-------- nested_sequence/text_classification/utils.py | 27 ++++- 10 files changed, 331 insertions(+), 150 deletions(-) create mode 100644 nested_sequence/text_classification/config.py diff --git a/nested_sequence/text_classification/README.md b/nested_sequence/text_classification/README.md index 03e4f14c1e..ce019662bf 100644 --- a/nested_sequence/text_classification/README.md +++ b/nested_sequence/text_classification/README.md @@ -39,16 +39,25 @@ nest_group = paddle.layer.recurrent_group(input=[paddle.layer.SubsequenceInput(e CNN网络具体代码实现如下: ```python def cnn_cov_group(group_input, hidden_size): + """ + Covolution group definition + :param group_input: The input of this layer. + :type group_input: LayerOutput + :params hidden_size: Size of FC layer. + :type hidden_size: int + """ conv3 = paddle.networks.sequence_conv_pool( input=group_input, context_len=3, hidden_size=hidden_size) conv4 = paddle.networks.sequence_conv_pool( input=group_input, context_len=4, hidden_size=hidden_size) - output_group = paddle.layer.fc(input=[conv3, conv4], + + linear_proj = paddle.layer.fc(input=[conv3, conv4], size=hidden_size, param_attr=paddle.attr.ParamAttr(name='_cov_value_weight'), bias_attr=paddle.attr.ParamAttr(name='_cov_value_bias'), act=paddle.activation.Linear()) - return output_group + + return linear_proj ``` PaddlePaddle 中已经封装好的带有池化的文本序列卷积模块:`paddle.networks.sequence_conv_pool`,可直接调用。 @@ -65,6 +74,33 @@ prob = paddle.layer.mixed(size=class_num, pip install -r requirements.txt ``` +## 指定训练配置参数 + +`config.py`脚本中包含训练配置和模型配置的参数设置, 示例代码如下: +``` +class TrainerConfig(object): + + # whether to use GPU for training + use_gpu = False + # the number of threads used in one machine + trainer_count = 1 + + # train batch size + batch_size = 32 + + ... + + +class ModelConfig(object): + + # embedding vector dimension + emb_size = 28 + + ... +``` +用户可以对具体参数进行设置实现训练, 例如通过设置 `use_gpu` 参数来指定是否使用 GPU + 进行训练。 + ## 使用 PaddlePaddle 内置数据运行 ### 训练 @@ -88,15 +124,15 @@ python infer.py --model_path 'models/params_pass_00000.tar.gz' 输入数据格式如下:每一行为一条样本,以 `\t` 分隔,第一列是类别标签,第二列是输入文本的内容。以下是两条示例数据: ``` - 1 This movie is very good. The actor is so handsome. - 0 What a terrible movie. I waste so much time. + positive This movie is very good. The actor is so handsome. + negative What a terrible movie. I waste so much time. ``` 2.编写数据读取接口 自定义数据读取接口只需编写一个 Python 生成器实现**从原始输入文本中解析一条训练样本**的逻辑。以下代码片段实现了读取原始数据返回类型为: `paddle.data_type.integer_value_sub_sequence` 和 `paddle.data_type.integer_value` ```python -def train_reader(data_dir, word_dict): +def train_reader(data_dir, word_dict, label_dict): """ Reader interface for training data @@ -105,6 +141,8 @@ def train_reader(data_dir, word_dict): :param word_dict: path of word dictionary, the dictionary must has a "UNK" in it. :type word_dict: Python dict + :param label_dict: path of label dictionary. + :type label_dict: Python dict """ def reader(): @@ -128,38 +166,41 @@ def train_reader(data_dir, word_dict): if sent_ids: doc_ids.append(sent_ids) - yield doc_ids, int(line_split[lbl_col]) + yield doc_ids, label_dict[line_split[lbl_col]] return reader ``` 需要注意的是, 本例中以英文句号`'.'`作为分隔符, 将一段文本分隔为一定数量的句子, 且每个句子表示为对应词表的索引数组(`sent_ids`)。 由于当前样本的表示(`doc_ids`)中包含了该段文本的所有句子, 因此,它的类型为:`paddle.data_type.integer_value_sub_sequence`。 + 3.指定命令行参数进行训练 `train.py`训练脚本中包含以下参数: ``` Options: - --train_data_dir TEXT The path of training dataset (default: None). If this - parameter is not set, imdb dataset will be used. - --test_data_dir TEXT The path of testing dataset (default: None). If this - parameter is not set, imdb dataset will be used. - --word_dict_path TEXT The path of word dictionary (default: None). If this - parameter is not set, imdb dataset will be used. If - this parameter is set, but the file does not exist, - word dictionay will be built from the training data - automatically. - --class_num INTEGER The class number (default: 2). - --batch_size INTEGER The number of training examples in one batch - (default: 32). - --num_passes INTEGER The number of passes to train (default: 10). - --model_save_dir TEXT The path to save the trained models (default: - 'models'). - --help Show this message and exit. + --train_data_dir TEXT The path of training dataset (default: None). If + this parameter is not set, imdb dataset will be + used. + --test_data_dir TEXT The path of testing dataset (default: None). If this + parameter is not set, imdb dataset will be used. + --word_dict_path TEXT The path of word dictionary (default: None). If this + parameter is not set, imdb dataset will be used. If + this parameter is set, but the file does not exist, + word dictionay will be built from the training data + automatically. + --label_dict_path TEXT The path of label dictionary (default: None).If this + parameter is not set, imdb dataset will be used. If + this parameter is set, but the file does not exist, + label dictionay will be built from the training data + automatically. + --model_save_dir TEXT The path to save the trained models (default: + 'models'). + --help Show this message and exit. ``` 修改`train.py`脚本中的启动参数,可以直接运行本例。 以`data`目录下的示例数据为例,在终端执行: ```bash -python train.py --train_data_dir 'data/train_data' --test_data_dir 'data/test_data' --word_dict_path 'dict.txt' +python train.py --train_data_dir 'data/train_data' --test_data_dir 'data/test_data' --word_dict_path 'word_dict.txt' --label_dict_path 'label_dict.txt' ``` 即可对样例数据进行训练。 @@ -171,20 +212,21 @@ python train.py --train_data_dir 'data/train_data' --test_data_dir 'data/test_da ``` Options: - --data_path TEXT The path of data for inference (default: None). If - this parameter is not set, imdb test dataset will be - used. - --model_path TEXT The path of saved model. [required] - --word_dict_path TEXT The path of word dictionary (default: None). If this - parameter is not set, imdb dataset will be used. - --class_num INTEGER The class number (default: 2). - --batch_size INTEGER The number of examples in one batch (default: 32). - --help Show this message and exit. + --data_path TEXT The path of data for inference (default: None). If + this parameter is not set, imdb test dataset will be + used. + --model_path TEXT The path of saved model. [required] + --word_dict_path TEXT The path of word dictionary (default: None). If this + parameter is not set, imdb dataset will be used. + --label_dict_path TEXT The path of label dictionary (default: None).If this + parameter is not set, imdb dataset will be used. + --batch_size INTEGER The number of examples in one batch (default: 32). + --help Show this message and exit. ``` 2.以`data`目录下的示例数据为例,在终端执行: ```bash -python infer.py --data_path 'data/infer.txt' --word_dict_path 'dict.txt' --model_path 'models/params_pass_00000.tar.gz' +python infer.py --data_path 'data/infer.txt' --word_dict_path 'word_dict.txt' --label_dict_path 'label_dict.txt' --model_path 'models/params_pass_00000.tar.gz' ``` 即可对样例数据进行预测。 diff --git a/nested_sequence/text_classification/config.py b/nested_sequence/text_classification/config.py new file mode 100644 index 0000000000..4461626f34 --- /dev/null +++ b/nested_sequence/text_classification/config.py @@ -0,0 +1,39 @@ +__all__ = ["TrainerConfig", "ModelConfig"] + + +class TrainerConfig(object): + + # whether to use GPU for training + use_gpu = False + # the number of threads used in one machine + trainer_count = 1 + + # train batch size + batch_size = 32 + + # number of pass during training + num_passes = 10 + + # learning rate for optimizer + learning_rate = 1e-3 + + # learning rate for L2Regularization + l2_learning_rate = 1e-3 + + # average_window for ModelAverage + average_window = 0.5 + + # buffer size for shuffling + buf_size = 1000 + + # log progress every log_period batches + log_period = 100 + + +class ModelConfig(object): + + # embedding vector dimension + emb_size = 28 + + # size of sentence vector representation and fc layer in cnn + hidden_size = 128 diff --git a/nested_sequence/text_classification/data/test_data/test.txt b/nested_sequence/text_classification/data/test_data/test.txt index b5e7d11aa5..d162dbbeba 100644 --- a/nested_sequence/text_classification/data/test_data/test.txt +++ b/nested_sequence/text_classification/data/test_data/test.txt @@ -1,4 +1,4 @@ -1 I liked the film. Some of the action scenes were very interesting, tense and well done. I especially liked the opening scene which had a semi truck in it. Also the film is funny is several parts. I'd give the film an 8 out of 10. -0 The plot for Descent, if it actually can be called a plot, has two noteworthy events. One near the beginning - one at the end. Together these events make up maybe 5% of the total movie time. Everything (and I mean _everything_) in between is basically the director's desperate effort to fill in the minutes. -0 This film lacked something I couldn't put my finger on at first: charisma on the part of the leading actress. This inevitably translated to lack of chemistry when she shared the screen with her leading man. Even the romantic scenes came across as being merely the actors at play. -0 I read the book a long time back and don't specifically remember the plot but do remember that I enjoyed it. Since I'm home sick on the couch it seemed like a good idea and Hey !! It is a Lifetime movie.

The movie is populated with grade B actors and actresses.

The female cast is right out of Desperate Housewives. \ No newline at end of file +positive I liked the film. Some of the action scenes were very interesting, tense and well done. I especially liked the opening scene which had a semi truck in it. Also the film is funny is several parts. I'd give the film an 8 out of 10. +negative The plot for Descent, if it actually can be called a plot, has two noteworthy events. One near the beginning - one at the end. Together these events make up maybe 5% of the total movie time. Everything (and I mean _everything_) in between is basically the director's desperate effort to fill in the minutes. +negative This film lacked something I couldn't put my finger on at first: charisma on the part of the leading actress. This inevitably translated to lack of chemistry when she shared the screen with her leading man. Even the romantic scenes came across as being merely the actors at play. +negative I read the book a long time back and don't specifically remember the plot but do remember that I enjoyed it. Since I'm home sick on the couch it seemed like a good idea and Hey !! It is a Lifetime movie.

The movie is populated with grade B actors and actresses.

The female cast is right out of Desperate Housewives. \ No newline at end of file diff --git a/nested_sequence/text_classification/data/train_data/train.txt b/nested_sequence/text_classification/data/train_data/train.txt index 51c67c39fb..4f392593bf 100644 --- a/nested_sequence/text_classification/data/train_data/train.txt +++ b/nested_sequence/text_classification/data/train_data/train.txt @@ -1,4 +1,4 @@ -0 It was a Sunday night and I was waiting for the advertised movie on TV. They said it was a comedy! The movie started, 10 minutes passed, after that 30 minutes and I didn't laugh not even once. The fact is that the movie ended and I didn't get even on echance to laugh. -0 I saw this piece of garbage on AMC last night, and wonder how it could be considered in any way an American Movie Classic. It was awful in every way. How badly did Jack Lemmon, James Stewart and the rest of the cast need cash that they would even consider doing this movie? -1 its not as good as the first movie,but its a good solid movie its has good car chase scenes,on the remake of this movie there a story for are hero to drive fast as his trying to rush to the side of his ailing wife,the ending is great just a good fair movie to watch in my opinion. -1 Rosalind Russell executes a power-house performance as Rosie Lord, a very wealthy woman with greedy heirs. With an Auntie Mame-type character, this actress can never go wrong. Her very-real terror at being in an insane assylum is a wonderful piece of acting. Everyone should watch this. \ No newline at end of file +negative It was a Sunday night and I was waiting for the advertised movie on TV. They said it was a comedy! The movie started, 10 minutes passed, after that 30 minutes and I didn't laugh not even once. The fact is that the movie ended and I didn't get even on echance to laugh. +negative I saw this piece of garbage on AMC last night, and wonder how it could be considered in any way an American Movie Classic. It was awful in every way. How badly did Jack Lemmon, James Stewart and the rest of the cast need cash that they would even consider doing this movie? +positive its not as good as the first movie,but its a good solid movie its has good car chase scenes,on the remake of this movie there a story for are hero to drive fast as his trying to rush to the side of his ailing wife,the ending is great just a good fair movie to watch in my opinion. +positive Rosalind Russell executes a power-house performance as Rosie Lord, a very wealthy woman with greedy heirs. With an Auntie Mame-type character, this actress can never go wrong. Her very-real terror at being in an insane assylum is a wonderful piece of acting. Everyone should watch this. \ No newline at end of file diff --git a/nested_sequence/text_classification/index.html b/nested_sequence/text_classification/index.html index 30b020d25c..2b2c9dee95 100644 --- a/nested_sequence/text_classification/index.html +++ b/nested_sequence/text_classification/index.html @@ -81,16 +81,25 @@ CNN网络具体代码实现如下: ```python def cnn_cov_group(group_input, hidden_size): + """ + Covolution group definition + :param group_input: The input of this layer. + :type group_input: LayerOutput + :params hidden_size: Size of FC layer. + :type hidden_size: int + """ conv3 = paddle.networks.sequence_conv_pool( input=group_input, context_len=3, hidden_size=hidden_size) conv4 = paddle.networks.sequence_conv_pool( input=group_input, context_len=4, hidden_size=hidden_size) - output_group = paddle.layer.fc(input=[conv3, conv4], + + linear_proj = paddle.layer.fc(input=[conv3, conv4], size=hidden_size, param_attr=paddle.attr.ParamAttr(name='_cov_value_weight'), bias_attr=paddle.attr.ParamAttr(name='_cov_value_bias'), act=paddle.activation.Linear()) - return output_group + + return linear_proj ``` PaddlePaddle 中已经封装好的带有池化的文本序列卷积模块:`paddle.networks.sequence_conv_pool`,可直接调用。 @@ -107,6 +116,33 @@ pip install -r requirements.txt ``` +## 指定训练配置参数 + +`config.py`脚本中包含训练配置和模型配置的参数设置, 示例代码如下: +``` +class TrainerConfig(object): + + # whether to use GPU for training + use_gpu = False + # the number of threads used in one machine + trainer_count = 1 + + # train batch size + batch_size = 32 + + ... + + +class ModelConfig(object): + + # embedding vector dimension + emb_size = 28 + + ... +``` +用户可以对具体参数进行设置实现训练, 例如通过设置 `use_gpu` 参数来指定是否使用 GPU + 进行训练。 + ## 使用 PaddlePaddle 内置数据运行 ### 训练 @@ -130,15 +166,15 @@ 输入数据格式如下:每一行为一条样本,以 `\t` 分隔,第一列是类别标签,第二列是输入文本的内容。以下是两条示例数据: ``` - 1 This movie is very good. The actor is so handsome. - 0 What a terrible movie. I waste so much time. + positive This movie is very good. The actor is so handsome. + negative What a terrible movie. I waste so much time. ``` 2.编写数据读取接口 自定义数据读取接口只需编写一个 Python 生成器实现**从原始输入文本中解析一条训练样本**的逻辑。以下代码片段实现了读取原始数据返回类型为: `paddle.data_type.integer_value_sub_sequence` 和 `paddle.data_type.integer_value` ```python -def train_reader(data_dir, word_dict): +def train_reader(data_dir, word_dict, label_dict): """ Reader interface for training data @@ -147,6 +183,8 @@ :param word_dict: path of word dictionary, the dictionary must has a "UNK" in it. :type word_dict: Python dict + :param label_dict: path of label dictionary. + :type label_dict: Python dict """ def reader(): @@ -170,38 +208,41 @@ if sent_ids: doc_ids.append(sent_ids) - yield doc_ids, int(line_split[lbl_col]) + yield doc_ids, label_dict[line_split[lbl_col]] return reader ``` 需要注意的是, 本例中以英文句号`'.'`作为分隔符, 将一段文本分隔为一定数量的句子, 且每个句子表示为对应词表的索引数组(`sent_ids`)。 由于当前样本的表示(`doc_ids`)中包含了该段文本的所有句子, 因此,它的类型为:`paddle.data_type.integer_value_sub_sequence`。 + 3.指定命令行参数进行训练 `train.py`训练脚本中包含以下参数: ``` Options: - --train_data_dir TEXT The path of training dataset (default: None). If this - parameter is not set, imdb dataset will be used. - --test_data_dir TEXT The path of testing dataset (default: None). If this - parameter is not set, imdb dataset will be used. - --word_dict_path TEXT The path of word dictionary (default: None). If this - parameter is not set, imdb dataset will be used. If - this parameter is set, but the file does not exist, - word dictionay will be built from the training data - automatically. - --class_num INTEGER The class number (default: 2). - --batch_size INTEGER The number of training examples in one batch - (default: 32). - --num_passes INTEGER The number of passes to train (default: 10). - --model_save_dir TEXT The path to save the trained models (default: - 'models'). - --help Show this message and exit. + --train_data_dir TEXT The path of training dataset (default: None). If + this parameter is not set, imdb dataset will be + used. + --test_data_dir TEXT The path of testing dataset (default: None). If this + parameter is not set, imdb dataset will be used. + --word_dict_path TEXT The path of word dictionary (default: None). If this + parameter is not set, imdb dataset will be used. If + this parameter is set, but the file does not exist, + word dictionay will be built from the training data + automatically. + --label_dict_path TEXT The path of label dictionary (default: None).If this + parameter is not set, imdb dataset will be used. If + this parameter is set, but the file does not exist, + label dictionay will be built from the training data + automatically. + --model_save_dir TEXT The path to save the trained models (default: + 'models'). + --help Show this message and exit. ``` 修改`train.py`脚本中的启动参数,可以直接运行本例。 以`data`目录下的示例数据为例,在终端执行: ```bash -python train.py --train_data_dir 'data/train_data' --test_data_dir 'data/test_data' --word_dict_path 'dict.txt' +python train.py --train_data_dir 'data/train_data' --test_data_dir 'data/test_data' --word_dict_path 'word_dict.txt' --label_dict_path 'label_dict.txt' ``` 即可对样例数据进行训练。 @@ -213,20 +254,21 @@ ``` Options: - --data_path TEXT The path of data for inference (default: None). If - this parameter is not set, imdb test dataset will be - used. - --model_path TEXT The path of saved model. [required] - --word_dict_path TEXT The path of word dictionary (default: None). If this - parameter is not set, imdb dataset will be used. - --class_num INTEGER The class number (default: 2). - --batch_size INTEGER The number of examples in one batch (default: 32). - --help Show this message and exit. + --data_path TEXT The path of data for inference (default: None). If + this parameter is not set, imdb test dataset will be + used. + --model_path TEXT The path of saved model. [required] + --word_dict_path TEXT The path of word dictionary (default: None). If this + parameter is not set, imdb dataset will be used. + --label_dict_path TEXT The path of label dictionary (default: None).If this + parameter is not set, imdb dataset will be used. + --batch_size INTEGER The number of examples in one batch (default: 32). + --help Show this message and exit. ``` 2.以`data`目录下的示例数据为例,在终端执行: ```bash -python infer.py --data_path 'data/infer.txt' --word_dict_path 'dict.txt' --model_path 'models/params_pass_00000.tar.gz' +python infer.py --data_path 'data/infer.txt' --word_dict_path 'word_dict.txt' --label_dict_path 'label_dict.txt' --model_path 'models/params_pass_00000.tar.gz' ``` 即可对样例数据进行预测。 diff --git a/nested_sequence/text_classification/infer.py b/nested_sequence/text_classification/infer.py index b3940e8379..00204c9697 100644 --- a/nested_sequence/text_classification/infer.py +++ b/nested_sequence/text_classification/infer.py @@ -6,8 +6,8 @@ import paddle.v2 as paddle import reader -from network_conf import nest_net -from utils import logger, load_dict +from network_conf import nested_net +from utils import logger, load_dict, load_reverse_dict @click.command('infer') @@ -26,14 +26,18 @@ help=("The path of word dictionary (default: None). " "If this parameter is not set, imdb dataset will be used.")) @click.option( - "--class_num", type=int, default=2, help="The class number (default: 2).") + "--label_dict_path", + type=str, + default=None, + help=("The path of label dictionary (default: None)." + "If this parameter is not set, imdb dataset will be used. ")) @click.option( "--batch_size", type=int, default=32, help="The number of examples in one batch (default: 32).") -def infer(data_path, model_path, word_dict_path, batch_size, class_num): - def _infer_a_batch(inferer, test_batch, ids_2_word): +def infer(data_path, model_path, word_dict_path, batch_size, label_dict_path): + def _infer_a_batch(inferer, test_batch, ids_2_word, ids_2_label): probs = inferer.infer(input=test_batch, field=["value"]) assert len(probs) == len(test_batch) for word_ids, prob in zip(test_batch, probs): @@ -41,7 +45,7 @@ def _infer_a_batch(inferer, test_batch, ids_2_word): for sent in word_ids[0]: sent_ids.extend(sent) word_text = " ".join([ids_2_word[id] for id in sent_ids]) - print("%s\t%s\t%s" % (prob.argmax(), + print("%s\t%s\t%s" % (ids_2_label[prob.argmax()], " ".join(["{:0.4f}".format(p) for p in prob]), word_text)) @@ -53,25 +57,30 @@ def _infer_a_batch(inferer, test_batch, ids_2_word): word_dict = reader.imdb_word_dict() word_reverse_dict = dict((value, key) for key, value in word_dict.iteritems()) + + label_reverse_dict = {0: "positive", 1: "negative"} test_reader = reader.imdb_test(word_dict) class_num = 2 else: assert os.path.exists( word_dict_path), "The word dictionary file does not exist" + assert os.path.exists( + label_dict_path), "The label dictionary file does not exist" word_dict = load_dict(word_dict_path) word_reverse_dict = dict((value, key) for key, value in word_dict.iteritems()) - + label_reverse_dict = load_reverse_dict(label_dict_path) + class_num = len(label_reverse_dict) test_reader = reader.infer_reader(data_path, word_dict)() dict_dim = len(word_dict) - prob_layer = nest_net(dict_dim, class_num=class_num, is_infer=True) + prob_layer = nested_net(dict_dim, class_num, is_infer=True) - # initialize PaddlePaddle - paddle.init(use_gpu=True, trainer_count=4) + # initialize PaddlePaddle. + paddle.init(use_gpu=False, trainer_count=1) - # load the trained models + # load the trained models. parameters = paddle.parameters.Parameters.from_tar( gzip.open(model_path, "r")) inferer = paddle.inference.Inference( @@ -81,11 +90,13 @@ def _infer_a_batch(inferer, test_batch, ids_2_word): for idx, item in enumerate(test_reader): test_batch.append([item[0]]) if len(test_batch) == batch_size: - _infer_a_batch(inferer, test_batch, word_reverse_dict) + _infer_a_batch(inferer, test_batch, word_reverse_dict, + label_reverse_dict) test_batch = [] if len(test_batch): - _infer_a_batch(inferer, test_batch, word_reverse_dict) + _infer_a_batch(inferer, test_batch, word_reverse_dict, + label_reverse_dict) test_batch = [] diff --git a/nested_sequence/text_classification/network_conf.py b/nested_sequence/text_classification/network_conf.py index a86964ebcf..bee2c083d1 100644 --- a/nested_sequence/text_classification/network_conf.py +++ b/nested_sequence/text_classification/network_conf.py @@ -1,34 +1,47 @@ import paddle.v2 as paddle +from config import ModelConfig as conf def cnn_cov_group(group_input, hidden_size): + """ + Covolution group definition + :param group_input: The input of this layer. + :type group_input: LayerOutput + :params hidden_size: Size of FC layer. + :type hidden_size: int + """ conv3 = paddle.networks.sequence_conv_pool( input=group_input, context_len=3, hidden_size=hidden_size) conv4 = paddle.networks.sequence_conv_pool( input=group_input, context_len=4, hidden_size=hidden_size) - output_group = paddle.layer.fc( + linear_proj = paddle.layer.fc( input=[conv3, conv4], size=hidden_size, param_attr=paddle.attr.ParamAttr(name='_cov_value_weight'), bias_attr=paddle.attr.ParamAttr(name='_cov_value_bias'), act=paddle.activation.Linear()) - return output_group + return linear_proj -def nest_net(dict_dim, - emb_size=28, - hidden_size=128, - class_num=2, - is_infer=False): - +def nested_net(dict_dim, class_num, is_infer=False): + """ + Nested network definition. + :param dict_dim: Size of word dictionary. + :type dict_dim: int + :params class_num: Number of instance class. + :type class_num: int + :params is_infer: The boolean parameter + indicating inferring or training. + :type is_infer: bool + """ data = paddle.layer.data( "word", paddle.data_type.integer_value_sub_sequence(dict_dim)) - emb = paddle.layer.embedding(input=data, size=emb_size) + emb = paddle.layer.embedding(input=data, size=conf.emb_size) nest_group = paddle.layer.recurrent_group( - input=[paddle.layer.SubsequenceInput(emb), hidden_size], + input=[paddle.layer.SubsequenceInput(emb), conf.hidden_size], step=cnn_cov_group) avg_pool = paddle.layer.pooling( input=nest_group, diff --git a/nested_sequence/text_classification/reader.py b/nested_sequence/text_classification/reader.py index ddefbbdcd7..5202942caf 100644 --- a/nested_sequence/text_classification/reader.py +++ b/nested_sequence/text_classification/reader.py @@ -155,7 +155,7 @@ def imdb_word_dict(): re.compile("aclImdb/((train)|(test))/((pos)|(neg))/.*\.txt$"), 150) -def train_reader(data_dir, word_dict): +def train_reader(data_dir, word_dict, label_dict): """ Reader interface for training data @@ -164,6 +164,8 @@ def train_reader(data_dir, word_dict): :param word_dict: path of word dictionary, the dictionary must has a "UNK" in it. :type word_dict: Python dict + :param label_dict: path of label dictionary. + :type label_dict: Python dict """ def reader(): @@ -187,7 +189,7 @@ def reader(): if sent_ids: doc_ids.append(sent_ids) - yield doc_ids, int(line_split[lbl_col]) + yield doc_ids, label_dict[line_split[lbl_col]] return reader diff --git a/nested_sequence/text_classification/train.py b/nested_sequence/text_classification/train.py index c6371da842..863a0b47da 100644 --- a/nested_sequence/text_classification/train.py +++ b/nested_sequence/text_classification/train.py @@ -6,8 +6,9 @@ import paddle.v2 as paddle import reader -from network_conf import nest_net -from utils import build_dict, load_dict, logger +from network_conf import nested_net +from utils import build_word_dict, build_label_dict, load_dict, logger +from config import TrainerConfig as conf @click.command('train') @@ -33,25 +34,21 @@ "word dictionay will be built from " "the training data automatically.")) @click.option( - "--class_num", type=int, default=2, help="The class number (default: 2).") -@click.option( - "--batch_size", - type=int, - default=32, - help=("The number of training examples in one batch " - "(default: 32).")) -@click.option( - "--num_passes", - type=int, - default=10, - help="The number of passes to train (default: 10).") + "--label_dict_path", + type=str, + default=None, + help=("The path of label dictionary (default: None)." + "If this parameter is not set, imdb dataset will be used. " + "If this parameter is set, but the file does not exist, " + "label dictionay will be built from " + "the training data automatically.")) @click.option( "--model_save_dir", type=str, default="models", help="The path to save the trained models (default: 'models').") -def train(train_data_dir, test_data_dir, word_dict_path, class_num, - model_save_dir, batch_size, num_passes): +def train(train_data_dir, test_data_dir, word_dict_path, label_dict_path, + model_save_dir): """ :params train_data_path: path of training data, if this parameter is not specified, imdb dataset will be used to run this example @@ -59,19 +56,19 @@ def train(train_data_dir, test_data_dir, word_dict_path, class_num, :params test_data_path: path of testing data, if this parameter is not specified, imdb dataset will be used to run this example :type test_data_path: str - :params word_dict_path: path of training data, if this parameter + :params word_dict_path: path of word dictionary, if this parameter is not specified, imdb dataset will be used to run this example :type word_dict_path: str + :params label_dict_path: path of label dictionary, if this parameter + is not specified, imdb dataset will be used to run this example + :type label_dict_path: str :params model_save_dir: dir where models saved - :type num_pass: str - :params batch_size: train batch size - :type num_pass: int - :params num_pass: train pass number - :type num_pass: int + :type model_save_dir: str """ if train_data_dir is not None: - assert word_dict_path, ("The parameter train_data_dir, word_dict_path " - "should be set at the same time.") + assert word_dict_path and label_dict_path, ( + "The parameter train_data_dir, word_dict_path, label_dict_path " + "should be set at the same time.") if not os.path.exists(model_save_dir): os.mkdir(model_save_dir) @@ -84,7 +81,6 @@ def train(train_data_dir, test_data_dir, word_dict_path, class_num, logger.info("Please wait to build the word dictionary ...") word_dict = reader.imdb_word_dict() - train_reader = paddle.batch( paddle.reader.shuffle( lambda: reader.imdb_train(word_dict), buf_size=1000), @@ -99,69 +95,79 @@ def train(train_data_dir, test_data_dir, word_dict_path, class_num, # build the word dictionary to map the original string-typed # words into integer-typed index - build_dict( + build_word_dict( data_dir=train_data_dir, save_path=word_dict_path, use_col=1, cutoff_fre=0) + if not os.path.exists(label_dict_path): + logger.info(("Label dictionary is not given, the dictionary " + "is automatically built from the training data.")) + # build the label dictionary to map the original string-typed + # label into integer-typed index + build_label_dict( + data_dir=train_data_dir, save_path=label_dict_path, use_col=0) + word_dict = load_dict(word_dict_path) - class_num = class_num + label_dict = load_dict(label_dict_path) + + class_num = len(label_dict) logger.info("Class number is : %d." % class_num) train_reader = paddle.batch( paddle.reader.shuffle( - reader.train_reader(train_data_dir, word_dict), buf_size=1000), - batch_size=batch_size) + reader.train_reader(train_data_dir, word_dict, label_dict), + buf_size=conf.buf_size), + batch_size=conf.batch_size) if test_data_dir is not None: # here, because training and testing data share a same format, # we still use the reader.train_reader to read the testing data. test_reader = paddle.batch( paddle.reader.shuffle( - reader.train_reader(test_data_dir, word_dict), - buf_size=1000), - batch_size=batch_size) + reader.train_reader(test_data_dir, word_dict, label_dict), + buf_size=conf.buf_size), + batch_size=conf.batch_size) else: test_reader = None dict_dim = len(word_dict) - emb_size = 28 - hidden_size = 128 logger.info("Length of word dictionary is : %d." % (dict_dim)) - paddle.init(use_gpu=True, trainer_count=4) - - # network config - cost, prob, label = nest_net( - dict_dim, emb_size, hidden_size, class_num, is_infer=False) - - # create parameters - parameters = paddle.parameters.create(cost) + paddle.init(use_gpu=conf.use_gpu, trainer_count=conf.trainer_count) # create optimizer adam_optimizer = paddle.optimizer.Adam( - learning_rate=1e-3, - regularization=paddle.optimizer.L2Regularization(rate=1e-3), - model_average=paddle.optimizer.ModelAverage(average_window=0.5)) + learning_rate=conf.learning_rate, + regularization=paddle.optimizer.L2Regularization( + rate=conf.l2_learning_rate), + model_average=paddle.optimizer.ModelAverage( + average_window=conf.average_window)) - # create trainer + # define network topology. + cost, prob, label = nested_net(dict_dim, class_num, is_infer=False) + + # create all the trainable parameters. + parameters = paddle.parameters.create(cost) + + # create the trainer instance. trainer = paddle.trainer.SGD( cost=cost, extra_layers=paddle.evaluator.auc(input=prob, label=label), parameters=parameters, update_equation=adam_optimizer) - # begin training network + # feeding dictionary feeding = {"word": 0, "label": 1} def _event_handler(event): """ - Define end batch and end pass event handler + Define the end batch and the end pass event handler. """ if isinstance(event, paddle.event.EndIteration): - if event.batch_id % 100 == 0: + if event.batch_id % conf.log_period == 0: logger.info("Pass %d, Batch %d, Cost %f, %s\n" % ( event.pass_id, event.batch_id, event.cost, event.metrics)) @@ -175,11 +181,12 @@ def _event_handler(event): event.pass_id), "w") as f: parameters.to_tar(f) + # begin training network trainer.train( reader=train_reader, event_handler=_event_handler, feeding=feeding, - num_passes=num_passes) + num_passes=conf.num_passes) logger.info("Training has finished.") diff --git a/nested_sequence/text_classification/utils.py b/nested_sequence/text_classification/utils.py index 0362e13782..b83fef177b 100644 --- a/nested_sequence/text_classification/utils.py +++ b/nested_sequence/text_classification/utils.py @@ -6,7 +6,7 @@ logger.setLevel(logging.INFO) -def build_dict(data_dir, save_path, use_col=1, cutoff_fre=1): +def build_word_dict(data_dir, save_path, use_col=1, cutoff_fre=1): values = defaultdict(int) for file_name in os.listdir(data_dir): @@ -32,6 +32,31 @@ def build_dict(data_dir, save_path, use_col=1, cutoff_fre=1): f.write("%s\t%d\n" % (v, count)) +def build_label_dict(data_dir, save_path, use_col=0): + values = defaultdict(int) + + for file_name in os.listdir(data_dir): + file_path = os.path.join(data_dir, file_name) + if not os.path.isfile(file_path): + continue + with open(file_path, "r") as fdata: + for line in fdata: + line_splits = line.strip().split("\t") + if len(line_splits) < use_col: + continue + values[line_splits[use_col]] += 1 + + with open(save_path, "w") as f: + for v, count in sorted( + values.iteritems(), key=lambda x: x[1], reverse=True): + f.write("%s\t%d\n" % (v, count)) + + def load_dict(dict_path): return dict((line.strip().split("\t")[0], idx) for idx, line in enumerate(open(dict_path, "r").readlines())) + + +def load_reverse_dict(dict_path): + return dict((idx, line.strip().split("\t")[0]) + for idx, line in enumerate(open(dict_path, "r").readlines())) From 13cd4dc0a13959b0e433cffb83a7312e8f66b2ec Mon Sep 17 00:00:00 2001 From: peterzhang2029 Date: Sun, 15 Oct 2017 21:00:21 +0800 Subject: [PATCH 8/8] refine docstring and notation --- nested_sequence/text_classification/README.md | 19 +++++++---- nested_sequence/text_classification/config.py | 29 +++++++++------- .../text_classification/index.html | 19 +++++++---- nested_sequence/text_classification/infer.py | 4 ++- .../text_classification/network_conf.py | 6 ++-- nested_sequence/text_classification/train.py | 10 +++--- nested_sequence/text_classification/utils.py | 33 +++++++++++++++++++ 7 files changed, 88 insertions(+), 32 deletions(-) diff --git a/nested_sequence/text_classification/README.md b/nested_sequence/text_classification/README.md index ce019662bf..dbc1b4a5d3 100644 --- a/nested_sequence/text_classification/README.md +++ b/nested_sequence/text_classification/README.md @@ -76,8 +76,8 @@ pip install -r requirements.txt ## 指定训练配置参数 -`config.py`脚本中包含训练配置和模型配置的参数设置, 示例代码如下: -``` +通过 `config.py` 脚本修改训练和模型配置参数,脚本中有对可配置参数的详细解释,示例如下: +```python class TrainerConfig(object): # whether to use GPU for training @@ -98,8 +98,7 @@ class ModelConfig(object): ... ``` -用户可以对具体参数进行设置实现训练, 例如通过设置 `use_gpu` 参数来指定是否使用 GPU - 进行训练。 +修改 `config.py` 对参数进行调整。例如,通过修改 `use_gpu` 参数来指定是否使用 GPU 进行训练。 ## 使用 PaddlePaddle 内置数据运行 @@ -200,7 +199,11 @@ Options: 修改`train.py`脚本中的启动参数,可以直接运行本例。 以`data`目录下的示例数据为例,在终端执行: ```bash -python train.py --train_data_dir 'data/train_data' --test_data_dir 'data/test_data' --word_dict_path 'word_dict.txt' --label_dict_path 'label_dict.txt' +python train.py \ + --train_data_dir 'data/train_data' \ + --test_data_dir 'data/test_data' \ + --word_dict_path 'word_dict.txt' \ + --label_dict_path 'label_dict.txt' ``` 即可对样例数据进行训练。 @@ -226,7 +229,11 @@ Options: 2.以`data`目录下的示例数据为例,在终端执行: ```bash -python infer.py --data_path 'data/infer.txt' --word_dict_path 'word_dict.txt' --label_dict_path 'label_dict.txt' --model_path 'models/params_pass_00000.tar.gz' +python infer.py \ + --data_path 'data/infer.txt' \ + --word_dict_path 'word_dict.txt' \ + --label_dict_path 'label_dict.txt' \ + --model_path 'models/params_pass_00000.tar.gz' ``` 即可对样例数据进行预测。 diff --git a/nested_sequence/text_classification/config.py b/nested_sequence/text_classification/config.py index 4461626f34..1a6e4681b1 100644 --- a/nested_sequence/text_classification/config.py +++ b/nested_sequence/text_classification/config.py @@ -3,37 +3,44 @@ class TrainerConfig(object): - # whether to use GPU for training + # Whether to use GPU in training or not. use_gpu = False - # the number of threads used in one machine + # The number of computing threads. trainer_count = 1 - # train batch size + # The training batch size. batch_size = 32 - # number of pass during training + # The epoch number. num_passes = 10 - # learning rate for optimizer + # The global learning rate. learning_rate = 1e-3 - # learning rate for L2Regularization + # The decay rate for L2Regularization l2_learning_rate = 1e-3 - # average_window for ModelAverage + # This parameter is used for the averaged SGD. + # About the average_window * (number of the processed batch) parameters + # are used for average. + # To be accurate, between average_window *(number of the processed batch) + # and 2 * average_window * (number of the processed batch) parameters + # are used for average. average_window = 0.5 - # buffer size for shuffling + # The buffer size of the data reader. + # The number of buffer size samples will be shuffled in training. buf_size = 1000 - # log progress every log_period batches + # The parameter is used to control logging period. + # Training log will be printed every log_period. log_period = 100 class ModelConfig(object): - # embedding vector dimension + # The dimension of embedding vector. emb_size = 28 - # size of sentence vector representation and fc layer in cnn + # The hidden size of sentence vectors. hidden_size = 128 diff --git a/nested_sequence/text_classification/index.html b/nested_sequence/text_classification/index.html index 2b2c9dee95..005de9249a 100644 --- a/nested_sequence/text_classification/index.html +++ b/nested_sequence/text_classification/index.html @@ -118,8 +118,8 @@ ## 指定训练配置参数 -`config.py`脚本中包含训练配置和模型配置的参数设置, 示例代码如下: -``` +通过 `config.py` 脚本修改训练和模型配置参数,脚本中有对可配置参数的详细解释,示例如下: +```python class TrainerConfig(object): # whether to use GPU for training @@ -140,8 +140,7 @@ ... ``` -用户可以对具体参数进行设置实现训练, 例如通过设置 `use_gpu` 参数来指定是否使用 GPU - 进行训练。 +修改 `config.py` 对参数进行调整。例如,通过修改 `use_gpu` 参数来指定是否使用 GPU 进行训练。 ## 使用 PaddlePaddle 内置数据运行 @@ -242,7 +241,11 @@ 修改`train.py`脚本中的启动参数,可以直接运行本例。 以`data`目录下的示例数据为例,在终端执行: ```bash -python train.py --train_data_dir 'data/train_data' --test_data_dir 'data/test_data' --word_dict_path 'word_dict.txt' --label_dict_path 'label_dict.txt' +python train.py \ + --train_data_dir 'data/train_data' \ + --test_data_dir 'data/test_data' \ + --word_dict_path 'word_dict.txt' \ + --label_dict_path 'label_dict.txt' ``` 即可对样例数据进行训练。 @@ -268,7 +271,11 @@ 2.以`data`目录下的示例数据为例,在终端执行: ```bash -python infer.py --data_path 'data/infer.txt' --word_dict_path 'word_dict.txt' --label_dict_path 'label_dict.txt' --model_path 'models/params_pass_00000.tar.gz' +python infer.py \ + --data_path 'data/infer.txt' \ + --word_dict_path 'word_dict.txt' \ + --label_dict_path 'label_dict.txt' \ + --model_path 'models/params_pass_00000.tar.gz' ``` 即可对样例数据进行预测。 diff --git a/nested_sequence/text_classification/infer.py b/nested_sequence/text_classification/infer.py index 00204c9697..461eba4935 100644 --- a/nested_sequence/text_classification/infer.py +++ b/nested_sequence/text_classification/infer.py @@ -58,6 +58,7 @@ def _infer_a_batch(inferer, test_batch, ids_2_word, ids_2_label): word_reverse_dict = dict((value, key) for key, value in word_dict.iteritems()) + # The reversed label dict of the imdb dataset label_reverse_dict = {0: "positive", 1: "negative"} test_reader = reader.imdb_test(word_dict) class_num = 2 @@ -75,11 +76,12 @@ def _infer_a_batch(inferer, test_batch, ids_2_word, ids_2_label): test_reader = reader.infer_reader(data_path, word_dict)() dict_dim = len(word_dict) - prob_layer = nested_net(dict_dim, class_num, is_infer=True) # initialize PaddlePaddle. paddle.init(use_gpu=False, trainer_count=1) + prob_layer = nested_net(dict_dim, class_num, is_infer=True) + # load the trained models. parameters = paddle.parameters.Parameters.from_tar( gzip.open(model_path, "r")) diff --git a/nested_sequence/text_classification/network_conf.py b/nested_sequence/text_classification/network_conf.py index bee2c083d1..b4c4066909 100644 --- a/nested_sequence/text_classification/network_conf.py +++ b/nested_sequence/text_classification/network_conf.py @@ -4,10 +4,10 @@ def cnn_cov_group(group_input, hidden_size): """ - Covolution group definition + Convolution group definition. :param group_input: The input of this layer. :type group_input: LayerOutput - :params hidden_size: Size of FC layer. + :params hidden_size: The size of the fully connected layer. :type hidden_size: int """ conv3 = paddle.networks.sequence_conv_pool( @@ -32,7 +32,7 @@ def nested_net(dict_dim, class_num, is_infer=False): :type dict_dim: int :params class_num: Number of instance class. :type class_num: int - :params is_infer: The boolean parameter + :params is_infer: The boolean parameter indicating inferring or training. :type is_infer: bool """ diff --git a/nested_sequence/text_classification/train.py b/nested_sequence/text_classification/train.py index 863a0b47da..a0da1ad0c5 100644 --- a/nested_sequence/text_classification/train.py +++ b/nested_sequence/text_classification/train.py @@ -37,7 +37,7 @@ "--label_dict_path", type=str, default=None, - help=("The path of label dictionary (default: None)." + help=("The path of label dictionary (default: None). " "If this parameter is not set, imdb dataset will be used. " "If this parameter is set, but the file does not exist, " "label dictionay will be built from " @@ -50,16 +50,16 @@ def train(train_data_dir, test_data_dir, word_dict_path, label_dict_path, model_save_dir): """ - :params train_data_path: path of training data, if this parameter + :params train_data_path: The path of training data, if this parameter is not specified, imdb dataset will be used to run this example :type train_data_path: str - :params test_data_path: path of testing data, if this parameter + :params test_data_path: The path of testing data, if this parameter is not specified, imdb dataset will be used to run this example :type test_data_path: str - :params word_dict_path: path of word dictionary, if this parameter + :params word_dict_path: The path of word dictionary, if this parameter is not specified, imdb dataset will be used to run this example :type word_dict_path: str - :params label_dict_path: path of label dictionary, if this parameter + :params label_dict_path: The path of label dictionary, if this parameter is not specified, imdb dataset will be used to run this example :type label_dict_path: str :params model_save_dir: dir where models saved diff --git a/nested_sequence/text_classification/utils.py b/nested_sequence/text_classification/utils.py index b83fef177b..1535e31f46 100644 --- a/nested_sequence/text_classification/utils.py +++ b/nested_sequence/text_classification/utils.py @@ -7,6 +7,18 @@ def build_word_dict(data_dir, save_path, use_col=1, cutoff_fre=1): + """ + Build word dictionary from training data. + :param data_dir: The directory of training dataset. + :type data_dir: str + :params save_path: The path where the word dictionary will be saved. + :type save_path: str + :params use_col: The index of text juring line split. + :type use_col: int + :params cutoff_fre: The word will not be added to dictionary if it's + frequency is less than cutoff_fre. + :type cutoff_fre: int + """ values = defaultdict(int) for file_name in os.listdir(data_dir): @@ -33,6 +45,15 @@ def build_word_dict(data_dir, save_path, use_col=1, cutoff_fre=1): def build_label_dict(data_dir, save_path, use_col=0): + """ + Build label dictionary from training data. + :param data_dir: The directory of training dataset. + :type data_dir: str + :params save_path: The path where the label dictionary will be saved. + :type save_path: str + :params use_col: The index of label juring line split. + :type use_col: int + """ values = defaultdict(int) for file_name in os.listdir(data_dir): @@ -53,10 +74,22 @@ def build_label_dict(data_dir, save_path, use_col=0): def load_dict(dict_path): + """ + Load word dictionary from dictionary path. + :param dict_path: The path of word dictionary. + :type data_dir: str + """ return dict((line.strip().split("\t")[0], idx) for idx, line in enumerate(open(dict_path, "r").readlines())) def load_reverse_dict(dict_path): + """ + Load the reversed word dictionary from dictionary path. + Index of each word is saved in key of the dictionary and the + corresponding word saved in value of the dictionary. + :param dict_path: The path of word dictionary. + :type data_dir: str + """ return dict((idx, line.strip().split("\t")[0]) for idx, line in enumerate(open(dict_path, "r").readlines()))